X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstrings%2Fregex.cpp;h=67157c9b185e434adf995ec75eeb3f122869ea9a;hp=5ee2fcc5cf926d76e10b3a1b46a7912092e46d9c;hb=dce2985e07e6184f000ef176451150710e21ee35;hpb=967785734be5c3fc6f75da122c2d93ebbb338271 diff --git a/source/strings/regex.cpp b/source/strings/regex.cpp index 5ee2fcc..67157c9 100644 --- a/source/strings/regex.cpp +++ b/source/strings/regex.cpp @@ -1,6 +1,6 @@ -#include #include -#include +#include +#include #include "format.h" #include "regex.h" @@ -13,16 +13,16 @@ template void write_int(T n, Msp::Regex::Code &code) { for(unsigned i=0; i>i*8)&0xFF; + code += (n>>(i*8))&0xFF; } -/** Reads an integer from a Regex code stream, in little-endian order. */ +/** Reads an integer from a Regex code string, in little-endian order. */ template T read_int(Msp::Regex::Code::const_iterator &c) { T result = 0; for(unsigned i=0; i(*c++)<40) + { + result = e.substr(offset-40, 60); + offset = 40; + } + else + result = e.substr(0, 60); + result += '\n'; + result.append(offset, ' '); + result += '^'; + return result; +} + + Regex::Regex(const string &expr) { n_groups = 0; @@ -42,7 +64,7 @@ Regex::Regex(const string &expr) Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch) { bool has_branches = false; - unsigned level = 0; + stack parens; bool escape = false; unsigned bracket = 0; string::const_iterator end; @@ -62,19 +84,19 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns else if(*end=='\\') escape = true; else if(*end=='(') - ++level; + parens.push(end); else if(*end==')') { - if(level==0) + if(parens.empty()) { if(group==0) - throw InvalidParameterValue("Unexpected )"); + throw bad_regex("unmatched ')'", expr, end); else break; } - --level; + parens.pop(); } - else if(*end=='|' && level==0) + else if(*end=='|' && parens.empty()) { if(branch) break; @@ -85,8 +107,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bracket = 1; } - if(level>0) - throw InvalidParameterValue("Unmatched ("); + if(!parens.empty()) + throw bad_regex("unmatched '('", expr, parens.top()); Code result; @@ -107,7 +129,7 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns Count repeat_min = 1; Count repeat_max = 1; - parse_repeat(i, repeat_min, repeat_max); + parse_repeat(expr, i, repeat_min, repeat_max); for(unsigned j=0; j::max(); @@ -257,7 +281,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) else rmax = rmin; if(*i!='}') - throw InvalidParameterValue("Invalid bound"); + throw bad_regex("invalid bound", expr, begin); } ++i; @@ -267,6 +291,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter) { + string::const_iterator begin = iter; Code result; ++iter; @@ -280,12 +305,12 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite string::const_iterator end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) - throw InvalidParameterValue("Unmatched '['"); + throw bad_regex("unmatched '['", str, begin); unsigned char mask[32] = {0}; unsigned type = 0; bool range = false; - unsigned char first=0, last = 0; + unsigned char first = 0, last = 0; for(string::const_iterator i=iter; i!=end; ++i) { unsigned char c = *i; @@ -328,7 +353,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite else { result += MATCH_MASK; - result.append(reinterpret_cast(mask), 32); + result.append(mask, 32); } iter = end; @@ -360,7 +385,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch { int c; if(i!=str.end()) - c = static_cast(*i); + c = *i; else c = -1; @@ -451,7 +476,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch input_consumed = true; } else - throw Exception("Invalid instruction"); + throw logic_error("invalid instruction in regex bytecode"); if(match_result==negate_match) terminate = true; @@ -497,7 +522,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) string Regex::disassemble() const { - ostringstream ss; + string result; for(Code::const_iterator i=code.begin(); i!=code.end();) { @@ -506,77 +531,70 @@ string Regex::disassemble() const string decompiled = disassemble_instruction(i); string bytes; for(; j!=i; ++j) - bytes += format(" %02X", static_cast(*j)&0xFF); - ss<9) - ss<<"\n"<(*i++); - ostringstream result; switch(instr) { case JUMP: { Offset offset = read_int(i); - result<<"JUMP "<(i); - result<<"ND_JUMP "<(i); - break; + return format("GROUP_BEGIN %d", read_int(i)); case GROUP_END: - result<<"GROUP_END "<(i); - break; + return format("GROUP_END %d", read_int(i)); case NEGATE: - result<<"NEGATE"; - break; + return "NEGATE"; case MATCH_BEGIN: - result<<"MATCH_BEGIN"; - break; + return "MATCH_BEGIN"; case MATCH_END: - result<<"MATCH_END"; - break; + return "MATCH_END"; case MATCH_CHAR: { - char c = *i++; - result<<"MATCH_CHAR "; + unsigned char c = *i++; if(c>=0x20 && c<=0x7E) - result<<'\''<(c)&0xFF); + return format("MATCH_CHAR %d", c); } break; case MATCH_RANGE: - result<<"MATCH_RANGE "<<(static_cast(*i++)&0xFF); - result<<'-'<<(static_cast(*i++)&0xFF); - break; + { + int begin = *i++; + int end = *i++; + return format("MATCH_RANGE %d-%d", begin, end); + } case MATCH_MASK: - result<<"MATCH_MASK"; - for(unsigned j=0; j<32; ++j) - result<<' '<(*i++)&0xFF); - break; + { + string result = "MATCH_MASK"; + for(unsigned j=0; j<32; ++j) + result += format(" %02X", *i++); + return result; + } case MATCH_ANY: - result<<"MATCH_ANY"; - break; + return "MATCH_ANY"; default: - result<<"UNKNOWN "<