+/* $Id$
+
+This file is part of libmspstrings
+Copyright © 2007 Mikko Rasa
+Distributed under the LGPL
+*/
#ifndef MSP_STRINGS_REGEX_H_
#define MSP_STRINGS_REGEX_H_
The MATCH_CHAR instruction consumes the input character and matches it against
a single character. Since regexes often match sequences of printable character,
-a match for such a character may be encoded as the character itself.
+a match for a non-opcode character may be encoded as the character itself.
The MATCH_RANGE instruction consumes the input character and matches it against
an inclusive character range.
*/
class Regex
{
-public:
- /**
- Constructs a new Regex object.
- */
- Regex(const std::string &expr);
-
- /**
- Matches the regex against a string. Refer to RegMatch documentation for
- more information on the resulting object.
- */
- RegMatch match(const std::string &str) const;
-
- /**
- Returns a disassembled representation of the NFA bytecode. For debugging
- purposes.
- */
- std::string disassemble() const;
private:
typedef std::string Code;
typedef unsigned short Count;
enum Instruction
{
- FIRST_INSTRUCTION_=0,
+ FIRST_INSTRUCTION_ = 0,
JUMP,
ND_JUMP,
MATCH_MASK,
MATCH_ANY,
- LAST_INSTRUCTION_=31
+ LAST_INSTRUCTION_ = 31
};
struct RunContext
Code code;
unsigned n_groups;
- /**
- Compiles a regular expression into NFA bytecode. When compiling a whole
- regex, \a group should be set to 0. When the function returns, \a group will
- be the index of the last subexpression and \a iter will point to the first
- unused character in the expression.
-
- \param expr Expression to be compiled
- \param begin Iterator into the expression
- \param group Group counter, gets incremented for each subregex
- \param branch Whether we are compiling a branch
+public:
+ /** Constructs a new Regex object from a string representation. */
+ Regex(const std::string &expr);
- \return Compiled NFA bytecode
- */
+private:
+ /** Compiles a regular expression into NFA bytecode. , 2011The iterator will be
+ advanced to the first unused character in the string. */
Code compile(const std::string &expr, std::string::const_iterator &iter, unsigned &group, bool branch);
+
Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &);
Code parse_brackets(const std::string &, std::string::const_iterator &);
bool parse_repeat(std::string::const_iterator &, Count &, Count &);
+
+public:
+ /** Matches the regex against a string. Refer to RegMatch documentation for
+ more information on the resulting object. */
+ RegMatch match(const std::string &str) const;
+
+private:
bool run(const std::string &, const std::string::const_iterator &, RegMatch::GroupArray &) const;
bool group_compare(const RegMatch::Group &, const RegMatch::Group &) const;
+
+public:
+ /** Returns a disassembled representation of the NFA bytecode. For debugging
+ purposes. */
+ std::string disassemble() const;
+private:
std::string disassemble_instruction(Code::const_iterator &) const;
};