-#include <stack>
#include <limits>
-#include <msp/core/except.h>
+#include <list>
+#include <stack>
#include "format.h"
#include "regex.h"
void write_int(T n, Msp::Regex::Code &code)
{
for(unsigned i=0; i<sizeof(T); ++i)
- code += (n>>i*8)&0xFF;
+ code += (n>>(i*8))&0xFF;
}
-/** Reads an integer from a Regex code stream, in little-endian order. */
+/** Reads an integer from a Regex code string, in little-endian order. */
template<typename T>
T read_int(Msp::Regex::Code::const_iterator &c)
{
T result = 0;
for(unsigned i=0; i<sizeof(T); ++i)
- result += (*c++)<<i*8;
+ result += (*c++)<<(i*8);
return result;
}
namespace Msp {
+bad_regex::bad_regex(const string &w, const string &e, const string::const_iterator &i):
+ logic_error(w+"\n"+make_where(e, i))
+{ }
+
+string bad_regex::make_where(const string &e, const string::const_iterator &i)
+{
+ string result;
+ string::size_type offset = i-e.begin();
+ if(offset>40)
+ {
+ result = e.substr(offset-40, 60);
+ offset = 40;
+ }
+ else
+ result = e.substr(0, 60);
+ result += '\n';
+ result.append(offset, ' ');
+ result += '^';
+ return result;
+}
+
+
Regex::Regex(const string &expr)
{
n_groups = 0;
Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch)
{
bool has_branches = false;
- unsigned level = 0;
+ stack<string::const_iterator> parens;
bool escape = false;
unsigned bracket = 0;
string::const_iterator end;
else if(*end=='\\')
escape = true;
else if(*end=='(')
- ++level;
+ parens.push(end);
else if(*end==')')
{
- if(level==0)
+ if(parens.empty())
{
if(group==0)
- throw InvalidParameterValue("Unexpected )");
+ throw bad_regex("unmatched ')'", expr, end);
else
break;
}
- --level;
+ parens.pop();
}
- else if(*end=='|' && level==0)
+ else if(*end=='|' && parens.empty())
{
if(branch)
break;
bracket = 1;
}
- if(level>0)
- throw InvalidParameterValue("Unmatched (");
+ if(!parens.empty())
+ throw bad_regex("unmatched '('", expr, parens.top());
Code result;
Count repeat_min = 1;
Count repeat_max = 1;
- parse_repeat(i, repeat_min, repeat_max);
+ parse_repeat(expr, i, repeat_min, repeat_max);
for(unsigned j=0; j<repeat_min; ++j)
result += atom;
if(*i=='\\')
{
if(++i==expr.end())
- throw InvalidParameterValue("Stray backslash");
+ throw bad_regex("stray backslash", expr, i-1);
flag = true;
}
if(!flag)
{
if(*i=='*' || *i=='{' || *i=='}' || *i=='+' || *i=='?' || *i=='|' || *i==')')
- throw InvalidParameterValue("Invalid atom");
+ throw bad_regex("invalid atom", expr, i);
else if(*i=='[')
return parse_brackets(expr, i);
else if(*i=='.')
return result;
}
-bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
+bool Regex::parse_repeat(const string &expr, string::const_iterator &i, Count &rmin, Count &rmax)
{
if(*i!='*' && *i!='+' && *i!='?' && *i!='{')
return false;
rmin = 0;
if(*i=='{')
{
+ string::const_iterator begin = i;
+
rmin = 0;
for(++i; isdigit(*i); ++i)
rmin = rmin*10+(*i-'0');
for(; isdigit(*i); ++i)
rmax = rmax*10+(*i-'0');
if(rmax<rmin)
- throw InvalidParameterValue("Invalid bound");
+ throw bad_regex("invalid bound", expr, begin);
}
else
rmax = numeric_limits<Count>::max();
else
rmax = rmin;
if(*i!='}')
- throw InvalidParameterValue("Invalid bound");
+ throw bad_regex("invalid bound", expr, begin);
}
++i;
Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter)
{
+ string::const_iterator begin = iter;
Code result;
++iter;
string::const_iterator end = iter;
for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
if(end==str.end())
- throw InvalidParameterValue("Unmatched '['");
+ throw bad_regex("unmatched '['", str, begin);
unsigned char mask[32] = {0};
unsigned type = 0;
bool range = false;
- unsigned char first=0, last = 0;
+ unsigned char first = 0, last = 0;
for(string::const_iterator i=iter; i!=end; ++i)
{
unsigned char c = *i;
input_consumed = true;
}
else
- throw Exception("Invalid instruction");
+ throw logic_error("invalid instruction in regex bytecode");
if(match_result==negate_match)
terminate = true;