-#include <stack>
#include <limits>
-#include <msp/core/except.h>
+#include <list>
+#include <stack>
#include "format.h"
#include "regex.h"
namespace Msp {
+bad_regex::bad_regex(const string &w, const string &e, const string::const_iterator &i):
+ logic_error(w+"\n"+make_where(e, i))
+{ }
+
+string bad_regex::make_where(const string &e, const string::const_iterator &i)
+{
+ string result;
+ string::size_type offset = i-e.begin();
+ if(offset>40)
+ {
+ result = e.substr(offset-40, 60);
+ offset = 40;
+ }
+ else
+ result = e.substr(0, 60);
+ result += '\n';
+ result.append(offset, ' ');
+ result += '^';
+ return result;
+}
+
+
Regex::Regex(const string &expr)
{
n_groups = 0;
Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch)
{
bool has_branches = false;
- unsigned level = 0;
+ stack<string::const_iterator> parens;
bool escape = false;
unsigned bracket = 0;
string::const_iterator end;
else if(*end=='\\')
escape = true;
else if(*end=='(')
- ++level;
+ parens.push(end);
else if(*end==')')
{
- if(level==0)
+ if(parens.empty())
{
if(group==0)
- throw InvalidParameterValue("Unexpected )");
+ throw bad_regex("unmatched ')'", expr, end);
else
break;
}
- --level;
+ parens.pop();
}
- else if(*end=='|' && level==0)
+ else if(*end=='|' && parens.empty())
{
if(branch)
break;
bracket = 1;
}
- if(level>0)
- throw InvalidParameterValue("Unmatched (");
+ if(!parens.empty())
+ throw bad_regex("unmatched '('", expr, parens.top());
Code result;
Count repeat_min = 1;
Count repeat_max = 1;
- parse_repeat(i, repeat_min, repeat_max);
+ parse_repeat(expr, i, repeat_min, repeat_max);
for(unsigned j=0; j<repeat_min; ++j)
result += atom;
if(*i=='\\')
{
if(++i==expr.end())
- throw InvalidParameterValue("Stray backslash");
+ throw bad_regex("stray backslash", expr, i-1);
flag = true;
}
if(!flag)
{
if(*i=='*' || *i=='{' || *i=='}' || *i=='+' || *i=='?' || *i=='|' || *i==')')
- throw InvalidParameterValue("Invalid atom");
+ throw bad_regex("invalid atom", expr, i);
else if(*i=='[')
return parse_brackets(expr, i);
else if(*i=='.')
return result;
}
-bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
+bool Regex::parse_repeat(const string &expr, string::const_iterator &i, Count &rmin, Count &rmax)
{
if(*i!='*' && *i!='+' && *i!='?' && *i!='{')
return false;
rmin = 0;
if(*i=='{')
{
+ string::const_iterator begin = i;
+
rmin = 0;
for(++i; isdigit(*i); ++i)
rmin = rmin*10+(*i-'0');
for(; isdigit(*i); ++i)
rmax = rmax*10+(*i-'0');
if(rmax<rmin)
- throw InvalidParameterValue("Invalid bound");
+ throw bad_regex("invalid bound", expr, begin);
}
else
rmax = numeric_limits<Count>::max();
else
rmax = rmin;
if(*i!='}')
- throw InvalidParameterValue("Invalid bound");
+ throw bad_regex("invalid bound", expr, begin);
}
++i;
Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter)
{
+ string::const_iterator begin = iter;
Code result;
++iter;
string::const_iterator end = iter;
for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
if(end==str.end())
- throw InvalidParameterValue("Unmatched '['");
+ throw bad_regex("unmatched '['", str, begin);
unsigned char mask[32] = {0};
unsigned type = 0;
input_consumed = true;
}
else
- throw Exception("Invalid instruction");
+ throw logic_error("invalid instruction in regex bytecode");
if(match_result==negate_match)
terminate = true;
#ifndef MSP_STRINGS_REGEX_H_
#define MSP_STRINGS_REGEX_H_
+#include <stdexcept>
#include <string>
#include "regmatch.h"
namespace Msp {
+class bad_regex: public std::logic_error
+{
+public:
+ bad_regex(const std::string &, const std::string &, const std::string::const_iterator &);
+ virtual ~bad_regex() throw() { }
+
+private:
+ std::string make_where(const std::string &, const std::string::const_iterator &);
+};
+
+
/**
This class provides regular expression matching. It supports a subset of
POSIX.2 extended regex syntax. Character classes, equivalence classes and
Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &);
Code parse_brackets(const std::string &, std::string::const_iterator &);
- bool parse_repeat(std::string::const_iterator &, Count &, Count &);
+ bool parse_repeat(const std::string &, std::string::const_iterator &, Count &, Count &);
public:
/** Matches the regex against a string. Refer to RegMatch documentation for