return RegMatch();
}
-bool Regex::run(const string &str, const string::const_iterator &begin, vector<RegMatch::Group> &groups) const
+bool Regex::run(const string &str, const string::const_iterator &begin, vector<RegMatch::Group> &out_groups) const
{
bool result = false;
- list<RunContext> ctx;
- ctx.push_back(RunContext());
- ctx.front().citer = code.begin();
- ctx.front().groups.resize(groups.size());
+ vector<RunContext> ctx(1);
+ ctx.front().code_iter = code.begin();
+ vector<RegMatch::Group> groups(out_groups.size());
+ size_t ctx_count = 1;
+ size_t best_groups = 0;
- for(auto i=begin;;)
+ for(auto i=begin;; ++i)
{
int c;
if(i!=str.end())
else
c = -1;
- for(auto j=ctx.begin(); j!=ctx.end();)
+ for(size_t j=0; j<ctx_count; ++j)
{
- bool terminate = false;
bool negate_match = false;
- for(; j->citer!=code.end();)
+ while(ctx[j].code_iter!=code.end())
{
- Instruction instr = static_cast<Instruction>(*j->citer++);
+ Instruction instr = static_cast<Instruction>(*ctx[j].code_iter++);
if(instr==NEGATE)
negate_match = true;
else if(instr==JUMP)
{
- Offset offset = read_int<Offset>(j->citer);
- j->citer += offset;
+ Offset offset = read_int<Offset>(ctx[j].code_iter);
+ ctx[j].code_iter += offset;
}
else if(instr==ND_JUMP)
{
- Offset offset = read_int<Offset>(j->citer);
- ctx.push_back(*j);
- ctx.back().citer += offset;
+ Offset offset = read_int<Offset>(ctx[j].code_iter);
+ if(ctx_count>=ctx.size())
+ {
+ ctx.emplace_back();
+ ctx[ctx_count].groups_index = groups.size();
+ groups.resize(groups.size()+out_groups.size());
+ }
+ ctx[ctx_count].code_iter = ctx[j].code_iter+offset;
+ RegMatch::Group *groups_ptr = groups.data()+ctx[j].groups_index;
+ copy(groups_ptr, groups_ptr+out_groups.size(), groups.data()+ctx[ctx_count].groups_index);
+ ++ctx_count;
}
else if(instr==GROUP_BEGIN)
{
- Index n = read_int<Index>(j->citer);
- if(!j->groups[n].match)
- j->groups[n].begin = i-str.begin();
+ RegMatch::Group *groups_ptr = groups.data()+ctx[j].groups_index;
+ Index n = read_int<Index>(ctx[j].code_iter);
+ if(!groups_ptr[n].match)
+ groups_ptr[n].begin = i-str.begin();
}
else if(instr==GROUP_END)
{
- Index n = read_int<Index>(j->citer);
- if(!j->groups[n].match)
+ RegMatch::Group *groups_ptr = groups.data()+ctx[j].groups_index;
+ Index n = read_int<Index>(ctx[j].code_iter);
+ if(!groups_ptr[n].match)
{
- j->groups[n].match = true;
- j->groups[n].end = i-str.begin();
- j->groups[n].length = j->groups[n].end-j->groups[n].begin;
+ groups_ptr[n].match = true;
+ groups_ptr[n].end = i-str.begin();
+ groups_ptr[n].length = groups_ptr[n].end-groups_ptr[n].begin;
}
if(n==0)
{
result = true;
bool better = false;
- for(unsigned k=0; (k<groups.size() && !better); ++k)
+ const RegMatch::Group *best_ptr = groups.data()+best_groups;
+ for(unsigned k=0; (k<out_groups.size() && !better); ++k)
{
- better = group_compare(j->groups[k], groups[k]);
- if(group_compare(groups[k], j->groups[k]))
+ better = group_compare(groups_ptr[k], best_ptr[k]);
+ if(group_compare(best_ptr[k], groups_ptr[k]))
break;
}
if(better)
- groups = j->groups;
+ best_groups = ctx[j].groups_index;
}
}
else
match_result = (i==str.end());
else if(instr==MATCH_CHAR)
{
- match_result = (c==*j->citer++);
+ match_result = (c==*ctx[j].code_iter++);
input_consumed = true;
}
else if(instr==MATCH_RANGE)
{
- unsigned char first = *j->citer++;
- unsigned char last = *j->citer++;
+ unsigned char first = *ctx[j].code_iter++;
+ unsigned char last = *ctx[j].code_iter++;
match_result = (c>=first && c<=last);
input_consumed = true;
}
{
if(c>=0 && c<=0xFF)
{
- unsigned char m = *(j->citer+(c>>3));
+ unsigned char m = *(ctx[j].code_iter+(c>>3));
match_result = m&(1<<(c&7));
}
input_consumed = true;
- j->citer += 32;
+ ctx[j].code_iter += 32;
}
else if(instr==MATCH_ANY)
{
throw internal_error("invalid instruction in regex bytecode");
if(match_result==negate_match)
- terminate = true;
+ ctx[j].code_iter = code.end();
negate_match = false;
- if(input_consumed || terminate)
+ if(input_consumed)
break;
}
}
+ }
- if(terminate || j->citer==code.end())
- j = ctx.erase(j);
+ for(size_t j=0; j<ctx_count; )
+ {
+ if(ctx[j].code_iter==code.end())
+ {
+ if(j!=ctx_count-1)
+ swap(ctx[j], ctx[ctx_count-1]);
+ --ctx_count;
+ }
else
++j;
}
- if(i==str.end() || ctx.empty())
+ if(i==str.end() || !ctx_count)
break;
- ++i;
}
+ const RegMatch::Group *best_ptr = groups.data()+best_groups;
+ copy(best_ptr, best_ptr+out_groups.size(), out_groups.begin());
+
return result;
}