X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fregex.cpp;h=9ba089d09008c7fd1c23a9d700051984eab11362;hp=320705e8e33dc1cecbca03a26adab78e9d498805;hb=cc69674bf670178a8076f2ce9740c2e60c4644c9;hpb=df1a66180655104af15f9bb6067b4eaa1421f5ff diff --git a/source/regex.cpp b/source/regex.cpp index 320705e..9ba089d 100644 --- a/source/regex.cpp +++ b/source/regex.cpp @@ -4,8 +4,10 @@ This file is part of libmspstrings Copyright © 2007 Mikko Rasa Distributed under the LGPL */ + #include -#include +#include +#include #include "formatter.h" #include "regex.h" @@ -84,11 +86,21 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bool has_branches=false; unsigned level=0; bool escape=false; + unsigned bracket=0; string::const_iterator end; for(end=iter; end!=expr.end(); ++end) { if(escape) escape=false; + else if(bracket) + { + if(bracket==3 && *end==']') + bracket=0; + else if(bracket==1 && *end=='^') + bracket=2; + else + bracket=3; + } else if(*end=='\\') escape=true; else if(*end=='(') @@ -104,13 +116,15 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns } --level; } - else if(*end=='|') + else if(*end=='|' && level==0) { if(branch) break; - else if(level==0) + else has_branches=true; } + else if(*end=='[') + bracket=1; } if(level>0) @@ -244,8 +258,7 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns if(flag) { - if(static_cast(*i)<=LAST_INSTRUCTION_) - result+=MATCH_CHAR; + result+=MATCH_CHAR; result+=*i; } @@ -307,11 +320,11 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite } string::const_iterator end=iter; - for(; (end!=str.end() && (end==iter || *end!=']')); ++end); + for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) throw InvalidParameterValue("Unmatched '['"); - uint8_t mask[32]={0}; + unsigned char mask[32]={0}; unsigned type=0; bool range=false; unsigned char first=0, last=0; @@ -388,11 +401,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch bool negate_match=false; for(; j->citer!=code.end();) { - Instruction instr=static_cast(*j->citer); - if(instr>LAST_INSTRUCTION_) - instr=MATCH_CHAR; - else - ++j->citer; + Instruction instr=static_cast(*j->citer++); if(instr==NEGATE) negate_match=true; @@ -459,11 +468,13 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch } else if(instr==MATCH_MASK) { - uint8_t mask[32]; - for(unsigned k=0; k<32; ++k) - mask[k]=*j->citer++; - match_result=mask[c>>3]&(1<<(c&7)); - input_consumed=true; + if(c>=0 && c<=0xFF) + { + unsigned char m=*(j->citer+(c>>3)); + match_result=m&(1<<(c&7)); + input_consumed=true; + j->citer+=32; + } } else if(instr==MATCH_ANY) { @@ -517,11 +528,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) string Regex::disassemble_instruction(Code::const_iterator &i) const { - Instruction instr=static_cast(*i); - if(instr>=LAST_INSTRUCTION_) - instr=MATCH_CHAR; - else - ++i; + Instruction instr=static_cast(*i++); ostringstream result; switch(instr) @@ -575,8 +582,7 @@ string Regex::disassemble_instruction(Code::const_iterator &i) const case MATCH_ANY: result<<"MATCH_ANY"; break; - case FIRST_INSTRUCTION_: - case LAST_INSTRUCTION_: + default: result<<"UNKNOWN "<