X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Fstrings%2Fregex.cpp;h=5307c6990286ba373291281fb4ee230ff81d2d07;hb=HEAD;hp=6122fe40c557c9716cca6e88e7b2926408ad29ef;hpb=b537410dcd5b7e661625b51ee72b15e40045fd18;p=libs%2Fcore.git diff --git a/source/strings/regex.cpp b/source/strings/regex.cpp index 6122fe4..5307c69 100644 --- a/source/strings/regex.cpp +++ b/source/strings/regex.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "format.h" #include "regex.h" @@ -55,8 +57,7 @@ string bad_regex::make_where(const string &e, const string::const_iterator &i) Regex::Regex(const string &expr) { - n_groups = 0; - string::const_iterator iter = expr.begin(); + auto iter = expr.begin(); code = compile(expr, iter, n_groups, false); ++n_groups; } @@ -123,13 +124,14 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns if(!has_branches) { - for(string::const_iterator i=iter; i!=end;) + for(auto i=iter; i!=end;) { Code atom = parse_atom(expr, i, group); Count repeat_min = 1; Count repeat_max = 1; - parse_repeat(expr, i, repeat_min, repeat_max); + if(i!=end) + parse_repeat(expr, i, repeat_min, repeat_max); for(unsigned j=0; j(-(atom.size()+jump_size), result); + write_int(-static_cast(atom.size()+jump_size), result); } else if(repeat_max>repeat_min) { @@ -157,8 +159,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns } else { - list branches; - for(string::const_iterator i=iter;;) + vector branches; + for(auto i=iter;;) { branches.push_back(compile(expr, i, group, true)); if(i==end) @@ -169,14 +171,14 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns unsigned n_branches = branches.size(); Offset offset = (n_branches-1)*jump_size+branches.front().size(); - for(list::iterator i=++branches.begin(); i!=branches.end(); ++i) + for(auto i=++branches.begin(); i!=branches.end(); ++i) { result += ND_JUMP; write_int(offset, result); offset += i->size(); } - for(list::iterator i=branches.begin(); i!=branches.end();) + for(auto i=branches.begin(); i!=branches.end();) { result += *i; offset -= i->size()+jump_size; @@ -258,7 +260,7 @@ bool Regex::parse_repeat(const string &expr, string::const_iterator &i, Count &r rmin = 0; if(*i=='{') { - string::const_iterator begin = i; + auto begin = i; rmin = 0; for(++i; isdigit(*i); ++i) @@ -291,7 +293,7 @@ bool Regex::parse_repeat(const string &expr, string::const_iterator &i, Count &r Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter) { - string::const_iterator begin = iter; + auto begin = iter; Code result; ++iter; @@ -302,7 +304,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite ++iter; } - string::const_iterator end = iter; + auto end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) throw bad_regex("unmatched '['", str, begin); @@ -311,7 +313,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite unsigned type = 0; bool range = false; unsigned char first = 0, last = 0; - for(string::const_iterator i=iter; i!=end; ++i) + for(auto i=iter; i!=end; ++i) { unsigned char c = *i; if(range) @@ -364,16 +366,16 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite RegMatch Regex::match(const string &str) const { - RegMatch::GroupArray groups(n_groups); + vector groups(n_groups); - for(string::const_iterator i=str.begin(); i!=str.end(); ++i) + for(auto i=str.begin(); i!=str.end(); ++i) if(run(str, i, groups)) return RegMatch(str, groups); return RegMatch(); } -bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch::GroupArray &groups) const +bool Regex::run(const string &str, const string::const_iterator &begin, vector &groups) const { bool result = false; list ctx; @@ -381,7 +383,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch ctx.front().citer = code.begin(); ctx.front().groups.resize(groups.size()); - for(string::const_iterator i=begin;;) + for(auto i=begin;;) { int c; if(i!=str.end()) @@ -389,7 +391,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch else c = -1; - for(list::iterator j=ctx.begin(); j!=ctx.end();) + for(auto j=ctx.begin(); j!=ctx.end();) { bool terminate = false; bool negate_match = false; @@ -476,7 +478,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch input_consumed = true; } else - throw logic_error("invalid instruction in regex bytecode"); + throw internal_error("invalid instruction in regex bytecode"); if(match_result==negate_match) terminate = true; @@ -513,7 +515,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) // Earlier match is better if(g1.beging2.begin) + if(g1.begin>g2.begin) return false; // Longer match at same position is better @@ -524,9 +526,9 @@ string Regex::disassemble() const { string result; - for(Code::const_iterator i=code.begin(); i!=code.end();) + for(auto i=code.begin(); i!=code.end();) { - Code::const_iterator j = i; + auto j = i; Offset offset = i-code.begin(); string decompiled = disassemble_instruction(i); string bytes;