X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstrings%2Fregex.cpp;fp=source%2Fstrings%2Fregex.cpp;h=5b047647a1d0b8f9a1b640c63bf2462567503832;hp=bfd25c7036bffc88fbe5ca4a7db0a8057295c6de;hb=30a1bd63b44a9d59f02231ed7b013164e957da52;hpb=fc1475d88018934a61df890c192a404a105308fd diff --git a/source/strings/regex.cpp b/source/strings/regex.cpp index bfd25c7..5b04764 100644 --- a/source/strings/regex.cpp +++ b/source/strings/regex.cpp @@ -1,6 +1,6 @@ -#include #include -#include +#include +#include #include "format.h" #include "regex.h" @@ -31,6 +31,28 @@ T read_int(Msp::Regex::Code::const_iterator &c) namespace Msp { +bad_regex::bad_regex(const string &w, const string &e, const string::const_iterator &i): + logic_error(w+"\n"+make_where(e, i)) +{ } + +string bad_regex::make_where(const string &e, const string::const_iterator &i) +{ + string result; + string::size_type offset = i-e.begin(); + if(offset>40) + { + result = e.substr(offset-40, 60); + offset = 40; + } + else + result = e.substr(0, 60); + result += '\n'; + result.append(offset, ' '); + result += '^'; + return result; +} + + Regex::Regex(const string &expr) { n_groups = 0; @@ -42,7 +64,7 @@ Regex::Regex(const string &expr) Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch) { bool has_branches = false; - unsigned level = 0; + stack parens; bool escape = false; unsigned bracket = 0; string::const_iterator end; @@ -62,19 +84,19 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns else if(*end=='\\') escape = true; else if(*end=='(') - ++level; + parens.push(end); else if(*end==')') { - if(level==0) + if(parens.empty()) { if(group==0) - throw InvalidParameterValue("Unexpected )"); + throw bad_regex("unmatched ')'", expr, end); else break; } - --level; + parens.pop(); } - else if(*end=='|' && level==0) + else if(*end=='|' && parens.empty()) { if(branch) break; @@ -85,8 +107,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bracket = 1; } - if(level>0) - throw InvalidParameterValue("Unmatched ("); + if(!parens.empty()) + throw bad_regex("unmatched '('", expr, parens.top()); Code result; @@ -107,7 +129,7 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns Count repeat_min = 1; Count repeat_max = 1; - parse_repeat(i, repeat_min, repeat_max); + parse_repeat(expr, i, repeat_min, repeat_max); for(unsigned j=0; j::max(); @@ -257,7 +281,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) else rmax = rmin; if(*i!='}') - throw InvalidParameterValue("Invalid bound"); + throw bad_regex("invalid bound", expr, begin); } ++i; @@ -267,6 +291,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter) { + string::const_iterator begin = iter; Code result; ++iter; @@ -280,7 +305,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite string::const_iterator end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) - throw InvalidParameterValue("Unmatched '['"); + throw bad_regex("unmatched '['", str, begin); unsigned char mask[32] = {0}; unsigned type = 0; @@ -451,7 +476,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch input_consumed = true; } else - throw Exception("Invalid instruction"); + throw logic_error("invalid instruction in regex bytecode"); if(match_result==negate_match) terminate = true;