X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fregex.cpp;h=9ba089d09008c7fd1c23a9d700051984eab11362;hp=ae3576ce5423930c4cfb225a51fbe42f649d3141;hb=cc69674bf670178a8076f2ce9740c2e60c4644c9;hpb=79d472ad3fde75de2eba2487579b047d35e56978 diff --git a/source/regex.cpp b/source/regex.cpp index ae3576c..9ba089d 100644 --- a/source/regex.cpp +++ b/source/regex.cpp @@ -4,15 +4,15 @@ This file is part of libmspstrings Copyright © 2007 Mikko Rasa Distributed under the LGPL */ + #include -#include +#include +#include #include "formatter.h" #include "regex.h" using namespace std; -#include - namespace { /** @@ -86,11 +86,21 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bool has_branches=false; unsigned level=0; bool escape=false; + unsigned bracket=0; string::const_iterator end; for(end=iter; end!=expr.end(); ++end) { if(escape) escape=false; + else if(bracket) + { + if(bracket==3 && *end==']') + bracket=0; + else if(bracket==1 && *end=='^') + bracket=2; + else + bracket=3; + } else if(*end=='\\') escape=true; else if(*end=='(') @@ -106,13 +116,15 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns } --level; } - else if(*end=='|') + else if(*end=='|' && level==0) { if(branch) break; - else if(level==0) + else has_branches=true; } + else if(*end=='[') + bracket=1; } if(level>0) @@ -246,8 +258,7 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns if(flag) { - if(static_cast(*i)<=LAST_INSTRUCTION_) - result+=MATCH_CHAR; + result+=MATCH_CHAR; result+=*i; } @@ -309,14 +320,14 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite } string::const_iterator end=iter; - for(; (end!=str.end() && *end!=']'); ++end); + for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) throw InvalidParameterValue("Unmatched '['"); - uint8_t mask[32]={0}; + unsigned char mask[32]={0}; unsigned type=0; bool range=false; - unsigned char first, last; + unsigned char first=0, last=0; for(string::const_iterator i=iter; i!=end; ++i) { unsigned char c=*i; @@ -390,11 +401,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch bool negate_match=false; for(; j->citer!=code.end();) { - Instruction instr=static_cast(*j->citer); - if(instr>LAST_INSTRUCTION_) - instr=MATCH_CHAR; - else - ++j->citer; + Instruction instr=static_cast(*j->citer++); if(instr==NEGATE) negate_match=true; @@ -461,11 +468,13 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch } else if(instr==MATCH_MASK) { - uint8_t mask[32]; - for(unsigned k=0; k<32; ++k) - mask[k]=*j->citer++; - match_result=mask[c>>3]&(1<<(c&7)); - input_consumed=true; + if(c>=0 && c<=0xFF) + { + unsigned char m=*(j->citer+(c>>3)); + match_result=m&(1<<(c&7)); + input_consumed=true; + j->citer+=32; + } } else if(instr==MATCH_ANY) { @@ -502,7 +511,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) { if(!g1.match) return false; - + // Any match is better than no match if(!g2.match) return true; @@ -519,11 +528,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) string Regex::disassemble_instruction(Code::const_iterator &i) const { - Instruction instr=static_cast(*i); - if(instr>=LAST_INSTRUCTION_) - instr=MATCH_CHAR; - else - ++i; + Instruction instr=static_cast(*i++); ostringstream result; switch(instr) @@ -577,8 +582,7 @@ string Regex::disassemble_instruction(Code::const_iterator &i) const case MATCH_ANY: result<<"MATCH_ANY"; break; - case FIRST_INSTRUCTION_: - case LAST_INSTRUCTION_: + default: result<<"UNKNOWN "<