#include <limits>
#include <list>
#include <stack>
+#include <vector>
+#include <msp/core/except.h>
#include "format.h"
#include "regex.h"
/** Writes an integer to a Regex code string, in little-endian order. */
template<typename T>
-void write_int(T n, Msp::Regex::Code &code)
+void write_int(T n, basic_string<unsigned char> &code)
{
for(unsigned i=0; i<sizeof(T); ++i)
code += (n>>(i*8))&0xFF;
/** Reads an integer from a Regex code string, in little-endian order. */
template<typename T>
-T read_int(Msp::Regex::Code::const_iterator &c)
+T read_int(basic_string<unsigned char>::const_iterator &c)
{
T result = 0;
for(unsigned i=0; i<sizeof(T); ++i)
Regex::Regex(const string &expr)
{
- n_groups = 0;
- string::const_iterator iter = expr.begin();
+ auto iter = expr.begin();
code = compile(expr, iter, n_groups, false);
++n_groups;
}
if(!has_branches)
{
- for(string::const_iterator i=iter; i!=end;)
+ for(auto i=iter; i!=end;)
{
Code atom = parse_atom(expr, i, group);
Count repeat_min = 1;
Count repeat_max = 1;
- parse_repeat(expr, i, repeat_min, repeat_max);
+ if(i!=end)
+ parse_repeat(expr, i, repeat_min, repeat_max);
for(unsigned j=0; j<repeat_min; ++j)
result += atom;
result += atom;
}
result += ND_JUMP;
- write_int<Offset>(-(atom.size()+jump_size), result);
+ write_int<Offset>(-static_cast<Offset>(atom.size()+jump_size), result);
}
else if(repeat_max>repeat_min)
{
}
else
{
- list<Code> branches;
- for(string::const_iterator i=iter;;)
+ vector<Code> branches;
+ for(auto i=iter;;)
{
branches.push_back(compile(expr, i, group, true));
if(i==end)
unsigned n_branches = branches.size();
Offset offset = (n_branches-1)*jump_size+branches.front().size();
- for(list<Code>::iterator i=++branches.begin(); i!=branches.end(); ++i)
+ for(auto i=++branches.begin(); i!=branches.end(); ++i)
{
result += ND_JUMP;
write_int<Offset>(offset, result);
offset += i->size();
}
- for(list<Code>::iterator i=branches.begin(); i!=branches.end();)
+ for(auto i=branches.begin(); i!=branches.end();)
{
result += *i;
offset -= i->size()+jump_size;
rmin = 0;
if(*i=='{')
{
- string::const_iterator begin = i;
+ auto begin = i;
rmin = 0;
for(++i; isdigit(*i); ++i)
Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter)
{
- string::const_iterator begin = iter;
+ auto begin = iter;
Code result;
++iter;
++iter;
}
- string::const_iterator end = iter;
+ auto end = iter;
for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
if(end==str.end())
throw bad_regex("unmatched '['", str, begin);
unsigned type = 0;
bool range = false;
unsigned char first = 0, last = 0;
- for(string::const_iterator i=iter; i!=end; ++i)
+ for(auto i=iter; i!=end; ++i)
{
unsigned char c = *i;
if(range)
RegMatch Regex::match(const string &str) const
{
- RegMatch::GroupArray groups(n_groups);
+ vector<RegMatch::Group> groups(n_groups);
- for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+ for(auto i=str.begin(); i!=str.end(); ++i)
if(run(str, i, groups))
return RegMatch(str, groups);
return RegMatch();
}
-bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch::GroupArray &groups) const
+bool Regex::run(const string &str, const string::const_iterator &begin, vector<RegMatch::Group> &groups) const
{
bool result = false;
list<RunContext> ctx;
ctx.front().citer = code.begin();
ctx.front().groups.resize(groups.size());
- for(string::const_iterator i=begin;;)
+ for(auto i=begin;;)
{
int c;
if(i!=str.end())
else
c = -1;
- for(list<RunContext>::iterator j=ctx.begin(); j!=ctx.end();)
+ for(auto j=ctx.begin(); j!=ctx.end();)
{
bool terminate = false;
bool negate_match = false;
input_consumed = true;
}
else
- throw logic_error("invalid instruction in regex bytecode");
+ throw internal_error("invalid instruction in regex bytecode");
if(match_result==negate_match)
terminate = true;
// Earlier match is better
if(g1.begin<g2.begin)
return true;
- if(g2.begin>g2.begin)
+ if(g1.begin>g2.begin)
return false;
// Longer match at same position is better
{
string result;
- for(Code::const_iterator i=code.begin(); i!=code.end();)
+ for(auto i=code.begin(); i!=code.end();)
{
- Code::const_iterator j = i;
+ auto j = i;
Offset offset = i-code.begin();
string decompiled = disassemble_instruction(i);
string bytes;