From 30a1bd63b44a9d59f02231ed7b013164e957da52 Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Wed, 8 Jun 2011 16:07:31 +0300 Subject: [PATCH] Improve exceptions for Regex and RegMatch --- source/strings/regex.cpp | 61 ++++++++++++++++++++++++++----------- source/strings/regex.h | 14 ++++++++- source/strings/regmatch.cpp | 4 +-- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/source/strings/regex.cpp b/source/strings/regex.cpp index bfd25c7..5b04764 100644 --- a/source/strings/regex.cpp +++ b/source/strings/regex.cpp @@ -1,6 +1,6 @@ -#include #include -#include +#include +#include #include "format.h" #include "regex.h" @@ -31,6 +31,28 @@ T read_int(Msp::Regex::Code::const_iterator &c) namespace Msp { +bad_regex::bad_regex(const string &w, const string &e, const string::const_iterator &i): + logic_error(w+"\n"+make_where(e, i)) +{ } + +string bad_regex::make_where(const string &e, const string::const_iterator &i) +{ + string result; + string::size_type offset = i-e.begin(); + if(offset>40) + { + result = e.substr(offset-40, 60); + offset = 40; + } + else + result = e.substr(0, 60); + result += '\n'; + result.append(offset, ' '); + result += '^'; + return result; +} + + Regex::Regex(const string &expr) { n_groups = 0; @@ -42,7 +64,7 @@ Regex::Regex(const string &expr) Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch) { bool has_branches = false; - unsigned level = 0; + stack parens; bool escape = false; unsigned bracket = 0; string::const_iterator end; @@ -62,19 +84,19 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns else if(*end=='\\') escape = true; else if(*end=='(') - ++level; + parens.push(end); else if(*end==')') { - if(level==0) + if(parens.empty()) { if(group==0) - throw InvalidParameterValue("Unexpected )"); + throw bad_regex("unmatched ')'", expr, end); else break; } - --level; + parens.pop(); } - else if(*end=='|' && level==0) + else if(*end=='|' && parens.empty()) { if(branch) break; @@ -85,8 +107,8 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns bracket = 1; } - if(level>0) - throw InvalidParameterValue("Unmatched ("); + if(!parens.empty()) + throw bad_regex("unmatched '('", expr, parens.top()); Code result; @@ -107,7 +129,7 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns Count repeat_min = 1; Count repeat_max = 1; - parse_repeat(i, repeat_min, repeat_max); + parse_repeat(expr, i, repeat_min, repeat_max); for(unsigned j=0; j::max(); @@ -257,7 +281,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) else rmax = rmin; if(*i!='}') - throw InvalidParameterValue("Invalid bound"); + throw bad_regex("invalid bound", expr, begin); } ++i; @@ -267,6 +291,7 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &iter) { + string::const_iterator begin = iter; Code result; ++iter; @@ -280,7 +305,7 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite string::const_iterator end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) - throw InvalidParameterValue("Unmatched '['"); + throw bad_regex("unmatched '['", str, begin); unsigned char mask[32] = {0}; unsigned type = 0; @@ -451,7 +476,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch input_consumed = true; } else - throw Exception("Invalid instruction"); + throw logic_error("invalid instruction in regex bytecode"); if(match_result==negate_match) terminate = true; diff --git a/source/strings/regex.h b/source/strings/regex.h index 5b809ab..6792502 100644 --- a/source/strings/regex.h +++ b/source/strings/regex.h @@ -1,11 +1,23 @@ #ifndef MSP_STRINGS_REGEX_H_ #define MSP_STRINGS_REGEX_H_ +#include #include #include "regmatch.h" namespace Msp { +class bad_regex: public std::logic_error +{ +public: + bad_regex(const std::string &, const std::string &, const std::string::const_iterator &); + virtual ~bad_regex() throw() { } + +private: + std::string make_where(const std::string &, const std::string::const_iterator &); +}; + + /** This class provides regular expression matching. It supports a subset of POSIX.2 extended regex syntax. Character classes, equivalence classes and @@ -108,7 +120,7 @@ private: Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &); Code parse_brackets(const std::string &, std::string::const_iterator &); - bool parse_repeat(std::string::const_iterator &, Count &, Count &); + bool parse_repeat(const std::string &, std::string::const_iterator &, Count &, Count &); public: /** Matches the regex against a string. Refer to RegMatch documentation for diff --git a/source/strings/regmatch.cpp b/source/strings/regmatch.cpp index 61bed84..4799cfb 100644 --- a/source/strings/regmatch.cpp +++ b/source/strings/regmatch.cpp @@ -1,4 +1,4 @@ -#include +#include #include "regmatch.h" using namespace std; @@ -19,7 +19,7 @@ RegMatch::RegMatch(const string &str, const GroupArray &g): const RegMatch::Group &RegMatch::group(unsigned i) const { if(i>=groups.size()) - throw InvalidParameterValue("Group index out of range"); + throw out_of_range("RegMatch::group"); return groups[i]; } -- 2.45.2