]> git.tdb.fi Git - libs/core.git/blobdiff - source/regex.cpp
Fix octal escape generation in c_escape
[libs/core.git] / source / regex.cpp
index ae3576ce5423930c4cfb225a51fbe42f649d3141..9ba089d09008c7fd1c23a9d700051984eab11362 100644 (file)
@@ -4,15 +4,15 @@ This file is part of libmspstrings
 Copyright © 2007 Mikko Rasa
 Distributed under the LGPL
 */
+
 #include <stack>
-#include <msp/core/error.h>
+#include <limits>
+#include <msp/core/except.h>
 #include "formatter.h"
 #include "regex.h"
 
 using namespace std;
 
-#include <iostream>
-
 namespace {
 
 /**
@@ -86,11 +86,21 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns
        bool has_branches=false;
        unsigned level=0;
        bool escape=false;
+       unsigned bracket=0;
        string::const_iterator end;
        for(end=iter; end!=expr.end(); ++end)
        {
                if(escape)
                        escape=false;
+               else if(bracket)
+               {
+                       if(bracket==3 && *end==']')
+                               bracket=0;
+                       else if(bracket==1 && *end=='^')
+                               bracket=2;
+                       else
+                               bracket=3;
+               }
                else if(*end=='\\')
                        escape=true;
                else if(*end=='(')
@@ -106,13 +116,15 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns
                        }
                        --level;
                }
-               else if(*end=='|')
+               else if(*end=='|' && level==0)
                {
                        if(branch)
                                break;
-                       else if(level==0)
+                       else
                                has_branches=true;
                }
+               else if(*end=='[')
+                       bracket=1;
        }
 
        if(level>0)
@@ -246,8 +258,7 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns
 
        if(flag)
        {
-               if(static_cast<unsigned char>(*i)<=LAST_INSTRUCTION_)
-                       result+=MATCH_CHAR;
+               result+=MATCH_CHAR;
                result+=*i;
        }
 
@@ -309,14 +320,14 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite
        }
 
        string::const_iterator end=iter;
-       for(; (end!=str.end() && *end!=']'); ++end);
+       for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
        if(end==str.end())
                throw InvalidParameterValue("Unmatched '['");
 
-       uint8_t mask[32]={0};
+       unsigned char mask[32]={0};
        unsigned type=0;
        bool range=false;
-       unsigned char first, last;
+       unsigned char first=0, last=0;
        for(string::const_iterator i=iter; i!=end; ++i)
        {
                unsigned char c=*i;
@@ -390,11 +401,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch
                        bool negate_match=false;
                        for(; j->citer!=code.end();)
                        {
-                               Instruction instr=static_cast<Instruction>(*j->citer);
-                               if(instr>LAST_INSTRUCTION_)
-                                       instr=MATCH_CHAR;
-                               else
-                                       ++j->citer;
+                               Instruction instr=static_cast<Instruction>(*j->citer++);
 
                                if(instr==NEGATE)
                                        negate_match=true;
@@ -461,11 +468,13 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch
                                        }
                                        else if(instr==MATCH_MASK)
                                        {
-                                               uint8_t mask[32];
-                                               for(unsigned k=0; k<32; ++k)
-                                                       mask[k]=*j->citer++;
-                                               match_result=mask[c>>3]&(1<<(c&7));
-                                               input_consumed=true;
+                                               if(c>=0 && c<=0xFF)
+                                               {
+                                                       unsigned char m=*(j->citer+(c>>3));
+                                                       match_result=m&(1<<(c&7));
+                                                       input_consumed=true;
+                                                       j->citer+=32;
+                                               }
                                        }
                                        else if(instr==MATCH_ANY)
                                        {
@@ -502,7 +511,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2)
 {
        if(!g1.match)
                return false;
-       
+
        // Any match is better than no match
        if(!g2.match)
                return true;
@@ -519,11 +528,7 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2)
 
 string Regex::disassemble_instruction(Code::const_iterator &i) const
 {
-       Instruction instr=static_cast<Instruction>(*i);
-       if(instr>=LAST_INSTRUCTION_)
-               instr=MATCH_CHAR;
-       else
-               ++i;
+       Instruction instr=static_cast<Instruction>(*i++);
 
        ostringstream result;
        switch(instr)
@@ -577,8 +582,7 @@ string Regex::disassemble_instruction(Code::const_iterator &i) const
        case MATCH_ANY:
                result<<"MATCH_ANY";
                break;
-       case FIRST_INSTRUCTION_:
-       case LAST_INSTRUCTION_:
+       default:
                result<<"UNKNOWN "<<instr;
        }