X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstrings%2Fregex.cpp;h=67157c9b185e434adf995ec75eeb3f122869ea9a;hp=bfd25c7036bffc88fbe5ca4a7db0a8057295c6de;hb=dce2985e07e6184f000ef176451150710e21ee35;hpb=00e36d6bf092b98dfa413578d9af58d61cedbc13 diff --git a/source/strings/regex.cpp b/source/strings/regex.cpp index bfd25c7..67157c9 100644 --- a/source/strings/regex.cpp +++ b/source/strings/regex.cpp @@ -1,6 +1,6 @@ -#include #include -#include +#include +#include #include "format.h" #include "regex.h" @@ -13,16 +13,16 @@ template void write_int(T n, Msp::Regex::Code &code) { for(unsigned i=0; i>i*8)&0xFF; + code += (n>>(i*8))&0xFF; } -/** Reads an integer from a Regex code stream, in little-endian order. */ +/** Reads an integer from a Regex code string, in little-endian order. */ template T read_int(Msp::Regex::Code::const_iterator &c) { T result = 0; for(unsigned i=0; i40) + { + result = e.substr(offset-40, 60); + offset = 40; + } + else + result = e.substr(0, 60); + result += '\n'; + result.append(offset, ' '); + result += '^'; + return result; +} + + Regex::Regex(const string &expr) { n_groups = 0; @@ -42,7 +64,7 @@ Regex::Regex(const string &expr) Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch) { bool has_branches = false; - unsigned level = 0; + stack parens; bool escape = false; unsigned bracket = 0; string::const_iterator end; @@ -62,19 +84,19 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns else if(*end=='\\') escape = true; else if(*end=='(') - ++level; + parens.push(end); else if(*end==')') { - if(level==0) + if(parens.empty()) { if(group==0) - throw InvalidParameterValue("Unexpected )"); + throw bad_regex("unmatched ')'", expr, end); else break; } - --level; + parens.pop(); } - else if(*end=='|' && level==0) + else if(*end=='|' && parens.empty()) { if(branch) break; @@ -85,8 +107,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bracket = 1; } - if(level>0) - throw InvalidParameterValue("Unmatched ("); + if(!parens.empty()) + throw bad_regex("unmatched '('", expr, parens.top()); Code result; @@ -107,7 +129,7 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns Count repeat_min = 1; Count repeat_max = 1; - parse_repeat(i, repeat_min, repeat_max); + parse_repeat(expr, i, repeat_min, repeat_max); for(unsigned j=0; j::max(); @@ -257,7 +281,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) else rmax = rmin; if(*i!='}') - throw InvalidParameterValue("Invalid bound"); + throw bad_regex("invalid bound", expr, begin); } ++i; @@ -267,6 +291,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter) { + string::const_iterator begin = iter; Code result; ++iter; @@ -280,12 +305,12 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite string::const_iterator end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) - throw InvalidParameterValue("Unmatched '['"); + throw bad_regex("unmatched '['", str, begin); unsigned char mask[32] = {0}; unsigned type = 0; bool range = false; - unsigned char first=0, last = 0; + unsigned char first = 0, last = 0; for(string::const_iterator i=iter; i!=end; ++i) { unsigned char c = *i; @@ -451,7 +476,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch input_consumed = true; } else - throw Exception("Invalid instruction"); + throw logic_error("invalid instruction in regex bytecode"); if(match_result==negate_match) terminate = true;