]> git.tdb.fi Git - libs/core.git/commitdiff
Improve exceptions for Regex and RegMatch
authorMikko Rasa <tdb@tdb.fi>
Wed, 8 Jun 2011 13:07:31 +0000 (16:07 +0300)
committerMikko Rasa <tdb@tdb.fi>
Wed, 8 Jun 2011 13:07:31 +0000 (16:07 +0300)
source/strings/regex.cpp
source/strings/regex.h
source/strings/regmatch.cpp

index bfd25c7036bffc88fbe5ca4a7db0a8057295c6de..5b047647a1d0b8f9a1b640c63bf2462567503832 100644 (file)
@@ -1,6 +1,6 @@
-#include <stack>
 #include <limits>
-#include <msp/core/except.h>
+#include <list>
+#include <stack>
 #include "format.h"
 #include "regex.h"
 
@@ -31,6 +31,28 @@ T read_int(Msp::Regex::Code::const_iterator &c)
 
 namespace Msp {
 
+bad_regex::bad_regex(const string &w, const string &e, const string::const_iterator &i):
+       logic_error(w+"\n"+make_where(e, i))
+{ }
+
+string bad_regex::make_where(const string &e, const string::const_iterator &i)
+{
+       string result;
+       string::size_type offset = i-e.begin();
+       if(offset>40)
+       {
+               result = e.substr(offset-40, 60);
+               offset = 40;
+       }
+       else
+               result = e.substr(0, 60);
+       result += '\n';
+       result.append(offset, ' ');
+       result += '^';
+       return result;
+}
+
+
 Regex::Regex(const string &expr)
 {
        n_groups = 0;
@@ -42,7 +64,7 @@ Regex::Regex(const string &expr)
 Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch)
 {
        bool has_branches = false;
-       unsigned level = 0;
+       stack<string::const_iterator> parens;
        bool escape = false;
        unsigned bracket = 0;
        string::const_iterator end;
@@ -62,19 +84,19 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns
                else if(*end=='\\')
                        escape = true;
                else if(*end=='(')
-                       ++level;
+                       parens.push(end);
                else if(*end==')')
                {
-                       if(level==0)
+                       if(parens.empty())
                        {
                                if(group==0)
-                                       throw InvalidParameterValue("Unexpected )");
+                                       throw bad_regex("unmatched ')'", expr, end);
                                else
                                        break;
                        }
-                       --level;
+                       parens.pop();
                }
-               else if(*end=='|' && level==0)
+               else if(*end=='|' && parens.empty())
                {
                        if(branch)
                                break;
@@ -85,8 +107,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns
                        bracket = 1;
        }
 
-       if(level>0)
-               throw InvalidParameterValue("Unmatched (");
+       if(!parens.empty())
+               throw bad_regex("unmatched '('", expr, parens.top());
 
        Code result;
 
@@ -107,7 +129,7 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns
 
                        Count repeat_min = 1;
                        Count repeat_max = 1;
-                       parse_repeat(i, repeat_min, repeat_max);
+                       parse_repeat(expr, i, repeat_min, repeat_max);
 
                        for(unsigned j=0; j<repeat_min; ++j)
                                result += atom;
@@ -189,14 +211,14 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns
        if(*i=='\\')
        {
                if(++i==expr.end())
-                       throw InvalidParameterValue("Stray backslash");
+                       throw bad_regex("stray backslash", expr, i-1);
                flag = true;
        }
 
        if(!flag)
        {
                if(*i=='*' || *i=='{' || *i=='}' || *i=='+' || *i=='?' || *i=='|' || *i==')')
-                       throw InvalidParameterValue("Invalid atom");
+                       throw bad_regex("invalid atom", expr, i);
                else if(*i=='[')
                        return parse_brackets(expr, i);
                else if(*i=='.')
@@ -225,7 +247,7 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns
        return result;
 }
 
-bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
+bool Regex::parse_repeat(const string &expr, string::const_iterator &i, Count &rmin, Count &rmax)
 {
        if(*i!='*' && *i!='+' && *i!='?' && *i!='{')
                return false;
@@ -236,6 +258,8 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
                rmin = 0;
        if(*i=='{')
        {
+               string::const_iterator begin = i;
+
                rmin = 0;
                for(++i; isdigit(*i); ++i)
                        rmin = rmin*10+(*i-'0');
@@ -249,7 +273,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
                                for(; isdigit(*i); ++i)
                                        rmax = rmax*10+(*i-'0');
                                if(rmax<rmin)
-                                       throw InvalidParameterValue("Invalid bound");
+                                       throw bad_regex("invalid bound", expr, begin);
                        }
                        else
                                rmax = numeric_limits<Count>::max();
@@ -257,7 +281,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
                else
                        rmax = rmin;
                if(*i!='}')
-                       throw InvalidParameterValue("Invalid bound");
+                       throw bad_regex("invalid bound", expr, begin);
        }
 
        ++i;
@@ -267,6 +291,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax)
 
 Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter)
 {
+       string::const_iterator begin = iter;
        Code result;
 
        ++iter;
@@ -280,7 +305,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite
        string::const_iterator end = iter;
        for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
        if(end==str.end())
-               throw InvalidParameterValue("Unmatched '['");
+               throw bad_regex("unmatched '['", str, begin);
 
        unsigned char mask[32] = {0};
        unsigned type = 0;
@@ -451,7 +476,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch
                                                input_consumed = true;
                                        }
                                        else
-                                               throw Exception("Invalid instruction");
+                                               throw logic_error("invalid instruction in regex bytecode");
 
                                        if(match_result==negate_match)
                                                terminate = true;
index 5b809ab3c363415382a66bbbcf855140a47c5329..67925023e544e76cd3f594addd0693d2b860d2c4 100644 (file)
@@ -1,11 +1,23 @@
 #ifndef MSP_STRINGS_REGEX_H_
 #define MSP_STRINGS_REGEX_H_
 
+#include <stdexcept>
 #include <string>
 #include "regmatch.h"
 
 namespace Msp {
 
+class bad_regex: public std::logic_error
+{
+public:
+       bad_regex(const std::string &, const std::string &, const std::string::const_iterator &);
+       virtual ~bad_regex() throw() { }
+
+private:
+       std::string make_where(const std::string &, const std::string::const_iterator &);
+};
+
+
 /**
 This class provides regular expression matching.  It supports a subset of
 POSIX.2 extended regex syntax.  Character classes, equivalence classes and
@@ -108,7 +120,7 @@ private:
 
        Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &);
        Code parse_brackets(const std::string &, std::string::const_iterator &);
-       bool parse_repeat(std::string::const_iterator &, Count &, Count &);
+       bool parse_repeat(const std::string &, std::string::const_iterator &, Count &, Count &);
 
 public:
        /** Matches the regex against a string.  Refer to RegMatch documentation for
index 61bed84d459f01f4343f75362a5b488ec1bd0334..4799cfbd4e536b1634ba48b5d5bf625e8da06eb6 100644 (file)
@@ -1,4 +1,4 @@
-#include <msp/core/except.h>
+#include <stdexcept>
 #include "regmatch.h"
 
 using namespace std;
@@ -19,7 +19,7 @@ RegMatch::RegMatch(const string &str, const GroupArray &g):
 const RegMatch::Group &RegMatch::group(unsigned i) const
 {
        if(i>=groups.size())
-               throw InvalidParameterValue("Group index out of range");
+               throw out_of_range("RegMatch::group");
        return groups[i];
 }