X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fregex.h;fp=source%2Fregex.h;h=70f9ebb7f592a4e161c9b5df45430a676e738466;hp=3750025331e73a7601a76bb6b08a5176cc9c9379;hb=5b1368cb791cab043f0435628cacbaff36e39b7b;hpb=36f9e78ae75f5e14b132f37d249340ad3480b8ce diff --git a/source/regex.h b/source/regex.h index 3750025..70f9ebb 100644 --- a/source/regex.h +++ b/source/regex.h @@ -66,23 +66,6 @@ The MATCH_ANY instruction consumes the input character and always succeeds. */ class Regex { -public: - /** - Constructs a new Regex object. - */ - Regex(const std::string &expr); - - /** - Matches the regex against a string. Refer to RegMatch documentation for - more information on the resulting object. - */ - RegMatch match(const std::string &str) const; - - /** - Returns a disassembled representation of the NFA bytecode. For debugging - purposes. - */ - std::string disassemble() const; private: typedef std::string Code; typedef unsigned short Count; @@ -91,7 +74,7 @@ private: enum Instruction { - FIRST_INSTRUCTION_=0, + FIRST_INSTRUCTION_ = 0, JUMP, ND_JUMP, @@ -108,7 +91,7 @@ private: MATCH_MASK, MATCH_ANY, - LAST_INSTRUCTION_=31 + LAST_INSTRUCTION_ = 31 }; struct RunContext @@ -120,25 +103,33 @@ private: Code code; unsigned n_groups; - /** - Compiles a regular expression into NFA bytecode. When compiling a whole - regex, \a group should be set to 0. When the function returns, \a group will - be the index of the last subexpression and \a iter will point to the first - unused character in the expression. - - \param expr Expression to be compiled - \param begin Iterator into the expression - \param group Group counter, gets incremented for each subregex - \param branch Whether we are compiling a branch +public: + /** Constructs a new Regex object from a string representation. */ + Regex(const std::string &expr); - \return Compiled NFA bytecode - */ +private: + /** Compiles a regular expression into NFA bytecode. , 2011The iterator will be + advanced to the first unused character in the string. */ Code compile(const std::string &expr, std::string::const_iterator &iter, unsigned &group, bool branch); + Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &); Code parse_brackets(const std::string &, std::string::const_iterator &); bool parse_repeat(std::string::const_iterator &, Count &, Count &); + +public: + /** Matches the regex against a string. Refer to RegMatch documentation for + more information on the resulting object. */ + RegMatch match(const std::string &str) const; + +private: bool run(const std::string &, const std::string::const_iterator &, RegMatch::GroupArray &) const; bool group_compare(const RegMatch::Group &, const RegMatch::Group &) const; + +public: + /** Returns a disassembled representation of the NFA bytecode. For debugging + purposes. */ + std::string disassemble() const; +private: std::string disassemble_instruction(Code::const_iterator &) const; };