int main(int argc, char **argv)
{
- bool debug=false;
+ bool debug = false;
GetOpt getopt;
getopt.add_option('d', "debug", debug, GetOpt::NO_ARG);
getopt(argc, argv);
- const vector<string> &args=getopt.get_args();
+ const vector<string> &args = getopt.get_args();
if(args.empty())
{
string line;
while(getline(cin, line))
{
- if(RegMatch match=regex.match(line))
+ if(RegMatch match = regex.match(line))
cout<<line<<'\n';
}
if(ch<0 || ch>0x7F)
return error(ch, buf, "Can't express character in ASCII");
- buf+=ch;
+ buf += ch;
}
void Ascii::Encoder::transliterate(UnicodeChar ch, string &buf)
{
if(ch>=0xC0 && ch<=0xC5)
- buf+='A';
+ buf += 'A';
else if(ch==0xC6)
- buf+="AE";
+ buf += "AE";
else if(ch==0xC7)
- buf+='C';
+ buf += 'C';
else if(ch>=0xC8 && ch<=0xCB)
- buf+='E';
+ buf += 'E';
else if(ch>=0xCC && ch<=0xCF)
- buf+='I';
+ buf += 'I';
else if(ch==0xD0)
- buf+='D';
+ buf += 'D';
else if(ch==0xD1)
- buf+='N';
+ buf += 'N';
else if((ch>=0xD2 && ch<=0xD7) || ch==0xD9)
- buf+='O';
+ buf += 'O';
else if(ch==0xD8)
- buf+='x';
+ buf += 'x';
else if(ch>=0xDA && ch<=0xDC)
- buf+='U';
+ buf += 'U';
else if(ch==0xDD)
- buf+='Y';
+ buf += 'Y';
else if(ch==0xDE)
- buf+='T';
+ buf += 'T';
else if(ch==0xDF)
- buf+="ss";
+ buf += "ss";
else if(ch>=0xE0 && ch<=0xE5)
- buf+='a';
+ buf += 'a';
else if(ch==0xE6)
- buf+="ae";
+ buf += "ae";
else if(ch==0xE7)
- buf+='c';
+ buf += 'c';
else if(ch>=0xE8 && ch<=0xEB)
- buf+='e';
+ buf += 'e';
else if(ch>=0xEC && ch<=0xEF)
- buf+='i';
+ buf += 'i';
else if(ch==0xF0)
- buf+='d';
+ buf += 'd';
else if(ch==0xF1)
- buf+='n';
+ buf += 'n';
else if((ch>=0xF2 && ch<=0xF7) || ch==0xF9)
- buf+='o';
+ buf += 'o';
else if(ch==0xF8)
- buf+='/';
+ buf += '/';
else if(ch>=0xFA && ch<=0xFC)
- buf+='u';
+ buf += 'u';
else if(ch==0xFD)
- buf+='y';
+ buf += 'y';
else if(ch==0xFE)
- buf+='t';
+ buf += 't';
else if(ch==0xFF)
- buf+='y';
+ buf += 'y';
else
- buf+='?';
+ buf += '?';
}
return error("No input");
else if(*i&0x80)
{
- UnicodeChar result=error("Undefined ASCII character");
+ UnicodeChar result = error("Undefined ASCII character");
++i;
return result;
}
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ASCII"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
bool Codec::detect(const string &str) const
{
- Decoder *dec=create_decoder();
- bool result=true;
+ Decoder *dec = create_decoder();
+ bool result = true;
try
{
for(string::const_iterator i=str.begin(); i!=str.end(); )
}
catch(const CodecError &)
{
- result=false;
+ result = false;
}
delete dec;
{
for(string::const_iterator i=str.begin(); i!=str.end();)
{
- UnicodeChar c=decode_char(str, i);
+ UnicodeChar c = decode_char(str, i);
if(c!=-1)
- buf+=c;
+ buf += c;
}
}
for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
{
if(isupper(*i))
- name+=tolower(*i);
+ name += tolower(*i);
else if(islower(*i) || isdigit(*i))
- name+=*i;
+ name += *i;
}
if(name=="ascii") return new Ascii;
Codec *detect_codec(const string &str)
{
- bool is_utf8=true;
- bool is_ascii=true;
- bool is_latin1=true;
- unsigned utf8_mb=0;
+ bool is_utf8 = true;
+ bool is_ascii = true;
+ bool is_latin1 = true;
+ unsigned utf8_mb = 0;
for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
{
- unsigned char c=*i;
+ unsigned char c = *i;
if(c&0x80)
{
- is_ascii=false;
+ is_ascii = false;
if((c&0xC0)==0x80)
{
if((c&0xE0)==0x80)
- is_latin1=false;
+ is_latin1 = false;
if(utf8_mb)
--utf8_mb;
else
- is_utf8=false;
+ is_utf8 = false;
}
else if((c&0xC0)==0xC0)
{
if(utf8_mb)
{
- is_utf8=false;
- utf8_mb=0;
+ is_utf8 = false;
+ utf8_mb = 0;
}
else
{
}
else if(utf8_mb)
{
- is_utf8=false;
- utf8_mb=0;
+ is_utf8 = false;
+ utf8_mb = 0;
}
}
*/
class Encoder
{
+ protected:
+ ErrorMode err_mode;
+
+ Encoder(ErrorMode em): err_mode(em) { }
public:
virtual ~Encoder() { }
- /**
- Encodes a single unicode character. If the character can't be
- represented in this encoding, behavior depends on the error mode
- specified for the encoder:
-
- For THROW_ON_ERROR, an exception is thrown.
-
- For IGNORE_ERRORS, nothing is done.
-
- For TRANSLITERATE, the encoder attempts to select a character or a string
- or characters that closely approximates the non-representable character.
- */
- virtual void encode_char(UnicodeChar ch, std::string &buf) =0;
+ /** Encodes a single unicode character. If the character can't be
+ represented in this encoding, error() should be called. */
+ virtual void encode_char(UnicodeChar ch, std::string &buf) = 0;
- /**
- Encodes a unicode string. This is equivalent to callind encode_char for
- each character in the string with the same buffer.
- */
+ /** Encodes a unicode string. This is equivalent to calling encode_char
+ for each character in the string with the same buffer. */
virtual void encode(const ustring &str, std::string &buf);
std::string encode(const ustring &);
- /**
- Procuces a sequence of bytes that will bring the encoder back to the
- initial state.
- */
+ /** Procuces a sequence of bytes that will bring the encoder back to the
+ initial state. */
virtual void sync(std::string &buf) { (void)buf; }
- /**
- Resets the encoder to the initial state without producing output.
- */
+ /** Resets the encoder to the initial state without producing output. */
virtual void reset() { }
- protected:
- ErrorMode err_mode;
-
- Encoder(ErrorMode em): err_mode(em) { }
-
- /**
- Handles an error depending on the error mode.
-
- For THROW_ON_ERROR, throws CodecError(msg).
- For IGNORE_ERROR, does nothing.
+ protected:
+ /** Handles an error depending on the error mode.
- For TRANSLITERATE, calls transliterate(ch, buf).
- */
+ THROW_ON_ERROR: throws CodecError(msg)
+ IGNORE_ERRORS: does nothing
+ TRANSLITERATE: calls transliterate(ch, buf) */
void error(UnicodeChar ch, std::string &buf, const std::string &msg);
- /**
- Attempts to produce an alternative encoding for a unicode character.
- Typically this includes dropping accent marks or romanizing letters.
- */
- virtual void transliterate(UnicodeChar ch, std::string &buf) =0;
+ /** Attempts to produce an alternative encoding for a unicode character.
+ Typically this includes dropping accent marks or romanizing letters. */
+ virtual void transliterate(UnicodeChar ch, std::string &buf) = 0;
};
/**
*/
class Decoder
{
+ protected:
+ ErrorMode err_mode;
+
+ Decoder(ErrorMode em): err_mode(em) { }
public:
virtual ~Decoder() { }
- /**
- Decodes a single character from a string. The iterator is advanced to
- the next character. For stateful codecs, -1 may be returned if a state
- change sequence was decoded but no character followed it. In case a
- decoding error occurs, behavior depends on the error mode specified for
- the decoder:
-
- For THROW_ON_ERROR, an exception is thrown and the iterator is left at
- the erroneous character.
+ /** Decodes a single character from a string. The iterator is advanced
+ to the next character. For stateful codecs, -1 may be returned if a
+ state change sequence was decoded but no character followed it. If
+ invalid input is encountered, the error() function should be called and
+ the iterator advanced only if it doesn't throw. */
+ virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
- For IGNORE_ERRORS, -1 is returned and the iterator is advanced.
-
- For TRANSLITERATE, 0xFFFE is returned and the iterator is advanced.
- */
- virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0;
-
- /**
- Decodes a string.
- */
+ /** Decodes a string. */
virtual void decode(const std::string &str, ustring &buf);
ustring decode(const std::string &);
- /**
- Resets the decoder to the initial state.
- */
+ /** Resets the decoder to the initial state. */
virtual void reset() { }
- protected:
- ErrorMode err_mode;
-
- Decoder(ErrorMode em): err_mode(em) { }
- /**
- Handles an error depending on the error mode.
- */
- UnicodeChar error(const std::string &);
+ protected:
+ /** Handles an error depending on the error mode. The return value is
+ suitable for returning from decode_char.
+
+ THROW_ON_ERROR: throws CodecError(msg)
+ IGNORE_ERRORS: returns -1
+ TRANSLITERATE: return 0xFFFE */
+ UnicodeChar error(const std::string &msg);
};
+protected:
+ Codec() { }
+public:
virtual ~Codec() { }
- /**
- Returns the name of the encoding handled by this codec.
- */
- virtual const char *get_name() const =0;
+ /** Returns the name of the encoding handled by this codec. */
+ virtual const char *get_name() const = 0;
- /**
- Creates an encoder for this codec.
- */
- virtual Encoder *create_encoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
+ /** Creates an encoder for this codec. */
+ virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
- /**
- Creates a decoder for this codec.
- */
- virtual Decoder *create_decoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
+ /** Creates a decoder for this codec. */
+ virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
- /**
- Determines whether the given string can be successfully decoded with this
- codec. Note that this function returning true does not guarantee that the
- string was actually encoded with this codec. In particular, many 8-bit
- encodings are indistinguishable.
- */
+ /** Determines whether the given string can be successfully decoded with
+ this codec. Note that this function returning true does not guarantee that
+ the string was actually encoded with this codec. In particular, many 8-bit
+ encodings are indistinguishable. */
virtual bool detect(const std::string &) const;
-protected:
- Codec() { }
};
typedef Codec::Encoder Encoder;
typedef Codec::Decoder Decoder;
-/**
-Convenience function that decodes a string.
-*/
+/** Convenience function that decodes a string. */
template<class C>
ustring decode(const std::string &s)
{
return result;
}
-/**
-Convenience function that encodes a string.
-*/
+/** Convenience function that encodes a string. */
template<class C>
std::string encode(const ustring &s)
{
return result;
}
-/**
-Convenience function that transcodes a string from one codec to another.
-*/
+/** Convenience function that transcodes a string from one codec to another. */
template<class F, class T>
std::string transcode(const std::string &s)
{
return result;
}
-/**
-Creates a codec for an encoding by name. The caller is responsible for
-deleting the codec when it's no longer needed.
-*/
+/** Creates a codec for an encoding by name. The caller is responsible for
+deleting the codec when it's no longer needed. */
Codec *create_codec(const std::string &);
-/**
-Automatically detects the encoding of a string and creates a codec for it.
-The codec must be deleted when it's no longer needed.
-*/
+/** Automatically detects the encoding of a string and creates a codec for it.
+The codec must be deleted when it's no longer needed. */
Codec *detect_codec(const std::string &);
} // namespace Codecs
namespace Msp {
-/**
-Resets the format to the default. Mainly used by constructors.
-*/
-Fmt &Fmt::reset()
-{
- wd=0;
- prec=6;
- spos=false;
- fillc=' ';
- base=DEC;
- sbase=false;
- fmode=AUTOFLT;
- spoint=false;
- align=RIGHT;
- ucase=false;
- type=STR;
-
- return *this;
-}
-
-/**
-Applies the format to the given ostream. All existing formatting information
-is overwritten.
-*/
-void Fmt::apply(ostream &out) const
-{
- out.flags(((base==HEX) ? ios_base::hex : (base==OCT) ? ios_base::oct : ios_base::dec)
- | ((fmode==SCI) ? ios_base::scientific : (fmode==FIXED) ? ios_base::fixed : ios_base::fmtflags(0))
- | (fillc=='0' ? ios_base::internal : (align==LEFT) ? ios_base::left : ios_base::right)
- | (sbase ? ios_base::showbase : ios_base::fmtflags(0))
- | (spoint ? ios_base::showpoint : ios_base::fmtflags(0))
- | (spos ? ios_base::showpos : ios_base::fmtflags(0))
- | (ucase ? ios_base::uppercase : ios_base::fmtflags(0)));
- out.fill(fillc);
- out.width(wd);
- out.precision(prec);
-}
-
-/**
-Parses a printf-style conversion specification. Called from constructors.
-*/
void Fmt::parse(const char *f)
{
if(*f=='%') ++f;
{
if(*f=='#')
{
- sbase=true;
- spoint=true;
+ sbase = true;
+ spoint = true;
}
else if(*f=='0')
- fillc='0';
+ fillc = '0';
else if(*f=='-')
- align=LEFT;
+ align = LEFT;
else if(*f=='+')
- spos=true;
+ spos = true;
else
break;
}
- wd=0;
+ wd = 0;
for(; *f; ++f)
{
if(*f>='0' && *f<='9')
- wd=wd*10+(*f-'0');
+ wd = wd*10+(*f-'0');
else
break;
}
if(*f=='.')
{
++f;
- prec=0;
+ prec = 0;
for(; *f; ++f)
{
if(*f>='0' && *f<='9')
- prec=prec*10+(*f-'0');
+ prec = prec*10+(*f-'0');
else
break;
}
}
- type=NUM;
+ type = NUM;
if(*f=='d' || *f=='u')
- base=DEC;
+ base = DEC;
else if(*f=='x' || *f=='X')
- base=HEX;
+ base = HEX;
else if(*f=='o')
- base=OCT;
+ base = OCT;
else if(*f=='b')
- base=BIN;
+ base = BIN;
else if(*f=='e' || *f=='E')
- fmode=SCI;
+ fmode = SCI;
else if(*f=='f' || *f=='F')
- fmode=FIXED;
+ fmode = FIXED;
else if(*f=='g' || *f=='G')
- fmode=AUTOFLT;
+ fmode = AUTOFLT;
else if(*f=='p' || *f=='P')
{
- base=HEX;
- sbase=true;
+ base = HEX;
+ sbase = true;
}
else if(*f=='c')
- type=CHAR;
+ type = CHAR;
else if(*f=='s')
- type=STR;
+ type = STR;
else if(*f=='i')
- base=AUTOBASE;
+ base = AUTOBASE;
else
throw InvalidParameterValue("Invalid conversion specifier");
if(*f=='E' || *f=='F' || *f=='G' || *f=='X' || *f=='P')
- ucase=true;
+ ucase = true;
++f;
throw InvalidParameterValue("Extra characters in conversion specification");
}
+Fmt &Fmt::reset()
+{
+ wd = 0;
+ prec = 6;
+ spos = false;
+ fillc = ' ';
+ base = DEC;
+ sbase = false;
+ fmode = AUTOFLT;
+ spoint = false;
+ align = RIGHT;
+ ucase = false;
+ type = STR;
+
+ return *this;
+}
+
+void Fmt::apply(ostream &out) const
+{
+ out.flags(((base==HEX) ? ios_base::hex : (base==OCT) ? ios_base::oct : ios_base::dec)
+ | ((fmode==SCI) ? ios_base::scientific : (fmode==FIXED) ? ios_base::fixed : ios_base::fmtflags(0))
+ | (fillc=='0' ? ios_base::internal : (align==LEFT) ? ios_base::left : ios_base::right)
+ | (sbase ? ios_base::showbase : ios_base::fmtflags(0))
+ | (spoint ? ios_base::showpoint : ios_base::fmtflags(0))
+ | (spos ? ios_base::showpos : ios_base::fmtflags(0))
+ | (ucase ? ios_base::uppercase : ios_base::fmtflags(0)));
+ out.fill(fillc);
+ out.width(wd);
+ out.precision(prec);
+}
+
} // namespace Msp
};
private:
- unsigned wd;
- unsigned prec;
- bool spos;
- wchar_t fillc;
- Base base;
- bool sbase;
+ unsigned wd;
+ unsigned prec;
+ bool spos;
+ wchar_t fillc;
+ Base base;
+ bool sbase;
FloatMode fmode;
- bool spoint;
- Align align;
- bool ucase;
- Type type;
+ bool spoint;
+ Align align;
+ bool ucase;
+ Type type;
public:
- Fmt() { reset(); }
- Fmt(const char *f) { reset(); parse(f); }
+ Fmt() { reset(); }
+ Fmt(const char *f) { reset(); parse(f); }
Fmt(const std::string &f) { reset(); parse(f.c_str()); }
- Fmt &width(unsigned w) { wd=w; return *this; }
- Fmt &precision(unsigned p) { prec=p; return *this; }
- Fmt &showpos(bool s=true) { spos=s; return *this; }
- Fmt &fill(wchar_t f) { fillc=f; return *this; }
- Fmt &fixed() { fmode=FIXED; return *this; }
- Fmt &scientific() { fmode=SCI; return *this; }
- Fmt &showpoint(bool s=true) { spoint=s; return *this; }
- Fmt &showbase(bool s=true) { sbase=s; return *this; }
- Fmt &left() { align=LEFT; return *this; }
- Fmt &right() { align=RIGHT; return *this; }
- Fmt &dec() { base=DEC; return *this; }
- Fmt &hex() { base=HEX; return *this; }
- Fmt &oct() { base=OCT; return *this; }
- Fmt &bin() { base=BIN; return *this; }
- Fmt &uppercase(bool u=true) { ucase=u; return *this; }
- Fmt &numeric() { type=NUM; return *this; }
- Fmt &character() { type=CHAR; return *this; }
- Fmt &string() { type=STR; return *this; }
+private:
+ void parse(const char *);
+
+public:
+ Fmt &width(unsigned w) { wd = w; return *this; }
+ Fmt &precision(unsigned p) { prec = p; return *this; }
+ Fmt &showpos(bool s=true) { spos = s; return *this; }
+ Fmt &fill(wchar_t f) { fillc = f; return *this; }
+ Fmt &fixed() { fmode = FIXED; return *this; }
+ Fmt &scientific() { fmode = SCI; return *this; }
+ Fmt &showpoint(bool s=true) { spoint = s; return *this; }
+ Fmt &showbase(bool s=true) { sbase = s; return *this; }
+ Fmt &left() { align = LEFT; return *this; }
+ Fmt &right() { align = RIGHT; return *this; }
+ Fmt &dec() { base = DEC; return *this; }
+ Fmt &hex() { base = HEX; return *this; }
+ Fmt &oct() { base = OCT; return *this; }
+ Fmt &bin() { base = BIN; return *this; }
+ Fmt &uppercase(bool u=true) { ucase = u; return *this; }
+ Fmt &numeric() { type = NUM; return *this; }
+ Fmt &character() { type = CHAR; return *this; }
+ Fmt &string() { type = STR; return *this; }
Fmt &reset();
- unsigned get_width() const { return wd; }
- unsigned get_precision() const { return prec; }
- bool get_showpos() const { return spos; }
- wchar_t get_fill() const { return fillc; }
- Base get_base() const { return base; }
- bool get_showbase() const { return sbase; }
+ unsigned get_width() const { return wd; }
+ unsigned get_precision() const { return prec; }
+ bool get_showpos() const { return spos; }
+ wchar_t get_fill() const { return fillc; }
+ Base get_base() const { return base; }
+ bool get_showbase() const { return sbase; }
FloatMode get_floatmode() const { return fmode; }
- bool get_showpoint() const { return spoint; }
- Align get_align() const { return align; }
- bool get_uppercase() const { return ucase; }
- Type get_type() const { return type; }
+ bool get_showpoint() const { return spoint; }
+ Align get_align() const { return align; }
+ bool get_uppercase() const { return ucase; }
+ Type get_type() const { return type; }
void apply(std::ostream &) const;
-private:
- void parse(const char *);
};
inline std::ostream &operator<<(std::ostream &o, const Fmt &f)
break;
}
- result+=*pos;
+ result += *pos;
}
}
if(pos==fmt.end())
throw Exception("Too many arguments for format");
- string::iterator i=pos;
+ string::iterator i = pos;
for(; i!=fmt.end(); ++i)
if(isalpha(*i))
break;
++i;
string c(pos, i);
- pos=i;
+ pos = i;
return Fmt(c);
}
*/
class Formatter
{
+private:
+ std::string fmt;
+ std::string::iterator pos;
+ std::string result;
+
public:
Formatter(const std::string &);
- /**
- Extracts the next conversion from the format string and formats the given
- value with it. Will throw if no more conversions are found.
- */
+ /** Extracts the next conversion from the format string and formats the
+ given value with it. Will throw if no more conversions are found. */
template<typename T>
Formatter &operator()(const T &a)
{
- result+=lexical_cast(a, get_conversion());
+ result += lexical_cast(a, get_conversion());
advance();
return *this;
}
const std::string &str() const;
private:
- std::string fmt;
- std::string::iterator pos;
- std::string result;
-
void advance();
- Fmt get_conversion();
+ Fmt get_conversion();
};
inline Formatter format(const std::string &f)
Copyright © 2007 Mikko Rasa
Distributed under the LGPL
*/
+
#include "glob.h"
using namespace std;
Copyright © 2007 Mikko Rasa
Distributed under the LGPL
*/
+
#ifndef MSP_STRINGS_GLOB_H_
#define MSP_STRINGS_GLOB_H_
{
if(mode!=ASCII && mode!=JISX0201)
switch_mode(ASCII, buf);
- buf+=ch;
+ buf += ch;
}
else if(ch==0x5C || ch==0x7E)
{
if(mode!=ASCII)
switch_mode(ASCII, buf);
- buf+=ch;
+ buf += ch;
}
else if(ch==0xA5 || ch==0x203E)
{
if(mode!=JISX0201)
switch_mode(JISX0201, buf);
if(ch==0xA5)
- buf+=0x5C;
+ buf += 0x5C;
else if(ch==0x203E)
- buf+=0x7E;
+ buf += 0x7E;
}
else
{
- Kuten jis=ucs_to_jisx0208(ch);
+ Kuten jis = ucs_to_jisx0208(ch);
if(!jis)
return error(ch, buf, "Can't express character in ISO-2022-JP");
if(mode!=JISX0208)
switch_mode(JISX0208, buf);
- char jbuf[2]={jis.ku+0x20, jis.ten+0x20};
+ char jbuf[2] = {jis.ku+0x20, jis.ten+0x20};
buf.append(jbuf, 2);
}
}
void Iso2022Jp::Encoder::reset()
{
- mode=ASCII;
+ mode = ASCII;
}
void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf)
{
- mode=m;
+ mode = m;
switch(mode)
{
case ASCII: buf.append("\033(B", 3); break;
{
if(mode!=ASCII)
switch_mode(ASCII, buf);
- buf+='?';
+ buf += '?';
}
while(i!=str.end())
{
- string::const_iterator j=i;
+ string::const_iterator j = i;
- UnicodeChar result=-1;
+ UnicodeChar result = -1;
if(*j==033)
{
- unsigned escape=0;
+ unsigned escape = 0;
for(++j; j!=str.end(); ++j)
{
- escape=escape<<8 | static_cast<unsigned char>(*j);
+ escape = escape<<8 | static_cast<unsigned char>(*j);
if(*j>='@' && *j<='Z')
break;
}
- bool ok=true;
+ bool ok = true;
switch(escape)
{
case 0x2842: switch_mode(ASCII); break; // ESC ( B
case 0x284A: switch_mode(JISX0201); break; // ESC ( J
case 0x2440: // ESC $ @
case 0x2442: switch_mode(JISX0208); break; // ESC $ B
- default: ok=false;
+ default: ok = false;
}
if(ok)
- i=j;
+ i = j;
else
- result=*i;
+ result = *i;
++i;
}
else if(dec)
void Iso2022Jp::Decoder::reset()
{
delete dec;
- mode=ASCII;
- dec=new Ascii::Decoder;
+ mode = ASCII;
+ dec = new Ascii::Decoder;
}
void Iso2022Jp::Decoder::switch_mode(Mode m)
{
delete dec;
- mode=m;
+ mode = m;
switch(mode)
{
- case ASCII: dec=new Ascii::Decoder; break;
- case JISX0201: dec=new JisX0201::Decoder; break;
- case JISX0208: dec=new JisX0208::Decoder; break;
+ case ASCII: dec = new Ascii::Decoder; break;
+ case JISX0201: dec = new JisX0201::Decoder; break;
+ case JISX0208: dec = new JisX0208::Decoder; break;
}
}
class Encoder: public Codec::Encoder
{
+ private:
+ Mode mode;
+
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em), mode(ASCII) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em), mode(ASCII) { }
+
virtual void encode_char(UnicodeChar, std::string &);
virtual void sync(std::string &);
virtual void reset();
private:
- Mode mode;
-
void switch_mode(Mode, std::string &);
virtual void transliterate(UnicodeChar, std::string &);
};
class Decoder: public Codec::Decoder
{
+ private:
+ Mode mode;
+ Codec::Decoder *dec;
+
public:
Decoder(ErrorMode =THROW_ON_ERROR);
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
virtual void reset();
private:
- Mode mode;
- Codec::Decoder *dec;
-
- virtual void switch_mode(Mode);
+ void switch_mode(Mode);
};
virtual const char *get_name() const { return "ISO-2022-JP"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
namespace {
-const unsigned map_size=9;
+const unsigned map_size = 9;
const int mapping[map_size*2]=
{
0xFC, 0x7E
};
-} // namespace
+}
+
namespace Msp {
namespace Codecs {
void Iso646Fi::Encoder::encode_char(UnicodeChar ch, string &buf)
{
- int tch=transform_mapping_or_direct(mapping, map_size, ch, false);
+ int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
if(tch<0 || tch>0x7F)
error(ch, buf, "Can't express character in ISO-646-FI");
else
- buf+=tch;
+ buf += tch;
}
void Iso646Fi::Encoder::transliterate(UnicodeChar, string &buf)
{
- buf+='?';
+ buf += '?';
}
if(i==str.end())
return error("No input");
- unsigned char ch=*i;
- int tch=(ch<=0x7F ? transform_mapping_or_direct(mapping, map_size, ch, true) : -1);
+ unsigned char ch = *i;
+ int tch = (ch<=0x7F ? transform_mapping_or_direct(mapping, map_size, ch, true) : -1);
UnicodeChar result;
if(tch==-1)
- result=error("Undefined ISO-646-FI character");
+ result = error("Undefined ISO-646-FI character");
else
- result=tch;
+ result = tch;
++i;
return result;
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-646-FI"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
if(ch<0 || ch>0xFF)
return error(ch, buf, "Can't express character in ISO-8859-1");
- buf+=ch;
+ buf += ch;
}
void Iso88591::Encoder::transliterate(UnicodeChar, string &buf)
{
- buf+='?';
+ buf += '?';
}
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-8859-1"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
namespace {
-const unsigned map_size=8;
+const unsigned map_size = 8;
const int mapping[map_size*2]=
{
}
+
namespace Msp {
namespace Codecs {
void Iso885915::Encoder::encode_char(UnicodeChar ch, string &buf)
{
- int tch=transform_mapping_or_direct(mapping, map_size, ch, false);
+ int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
if(tch<0 || tch>0xFF)
error(ch, buf, "Can't express character in ISO-8859-15");
else
- buf+=tch;
+ buf += tch;
}
void Iso885915::Encoder::transliterate(UnicodeChar, string &buf)
{
- buf+='?';
+ buf += '?';
}
if(i==str.end())
return error("No input");
- unsigned char ch=*i;
- int tch=transform_mapping_or_direct(mapping, map_size, ch, true);
+ unsigned char ch = *i;
+ int tch = transform_mapping_or_direct(mapping, map_size, ch, true);
UnicodeChar result;
if(tch==-1)
- result=error("Undefined ISO-8859-15 character");
+ result = error("Undefined ISO-8859-15 character");
else
- result=tch;
+ result = tch;
++i;
return result;
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-8859-15"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
void JisX0201::Encoder::encode_char(UnicodeChar ch, string &buf)
{
if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
- buf+=ch;
+ buf += ch;
else if(ch==0xA5)
- buf+=0x5C;
+ buf += 0x5C;
else if(ch==0x203E)
- buf+=0x7E;
+ buf += 0x7E;
else if(ch>=0xFF61 && ch<=0xFF9F)
- buf+=ch-0xFEC0;
+ buf += ch-0xFEC0;
else
error(ch, buf, "Can't express character in JIS X 0201");
}
void JisX0201::Encoder::transliterate(UnicodeChar, string &buf)
{
- buf+='?';
+ buf += '?';
}
if(i==str.end())
return error("No input");
- unsigned char ch=*i;
+ unsigned char ch = *i;
UnicodeChar result;
if(ch==0x5C)
- result=0xA5;
+ result = 0xA5;
else if(ch==0x7E)
- result=0x203E;
+ result = 0x203E;
else if(ch<=0x7F)
- result=ch;
+ result = ch;
else if(ch>=0xA1 && ch<=0xDF)
- result=ch+0xFEC0;
+ result = ch+0xFEC0;
else
- result=error("Undefined JIS X 0201 character");
+ result = error("Undefined JIS X 0201 character");
++i;
return result;
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "JIS X 0201"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf)
{
- unsigned short jis=ucs_to_jisx0208(ucs);
+ unsigned short jis = ucs_to_jisx0208(ucs);
if(jis)
{
- char jbuf[2]={jis>>8, jis};
+ char jbuf[2] = {jis>>8, jis};
buf.append(jbuf, 2);
}
else
if(i==str.end())
return error("No input");
- string::const_iterator j=i;
+ string::const_iterator j = i;
Kuten jis;
- jis.ku=*j++-0x20;
+ jis.ku = *j++-0x20;
UnicodeChar result;
if(j==str.end())
- result=error("Incomplete JIS X 0208 character");
+ result = error("Incomplete JIS X 0208 character");
else
{
- jis.ten=*j++-0x20;
- result=jisx0208_to_ucs(jis);
+ jis.ten = *j++-0x20;
+ result = jisx0208_to_ucs(jis);
if(result==0)
- result=error("Undefined JIS X 0208 character");
+ result = error("Undefined JIS X 0208 character");
}
- i=j;
+ i = j;
return result;
}
if(c<0 || c>0xFFFF)
return Kuten();
- unsigned i=0;
+ unsigned i = 0;
for(unsigned bit=0x1000; bit; bit>>=1)
{
if(i+bit>=ucs_to_jisx0208_table_size)
continue;
if(ucs_to_jisx0208_table[i+bit].ucs<=static_cast<unsigned short>(c))
- i+=bit;
+ i += bit;
}
Kuten result;
if(ucs_to_jisx0208_table[i].ucs==static_cast<unsigned short>(c))
{
- result.ku=(ucs_to_jisx0208_table[i].jis>>8)+1;
- result.ten=ucs_to_jisx0208_table[i].jis+1;
+ result.ku = (ucs_to_jisx0208_table[i].jis>>8)+1;
+ result.ten = ucs_to_jisx0208_table[i].jis+1;
}
return result;
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "JIS X 0208"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
+
struct Kuten
{
unsigned short ku;
unsigned short ten;
Kuten(): ku(0), ten(0) { }
+
operator bool() { return ku!=0 && ten!=0; }
};
template<typename T>
struct IsSigned
-{ enum { result=!(static_cast<T>(-1)>0) }; };
+{ enum { result = !(static_cast<T>(-1)>0) }; };
-template<typename T, bool f=(sizeof(T)>sizeof(unsigned long))>
+template<typename T, bool f = (sizeof(T)>sizeof(unsigned long))>
struct Temporary
{ typedef unsigned long Type; };
};
/* Helper to avoid warnings about an unsigned type never being < 0 */
-template<typename T, bool f=IsSigned<T>::result>
+template<typename T, bool f = IsSigned<T>::result>
struct IsNegative
{ static bool eval(T v) { return v<0; } };
/* Helper to avoid errors about ambiguous function calls since there are no
overloads of abs for unsigned types */
-template<typename T, bool f=IsSigned<T>::result>
+template<typename T, bool f = IsSigned<T>::result>
struct Absolute
{ static T eval(T v) { return v<0 ? -v : v; } };
/*** Integer conversions ***/
-const char udigits[]="0123456789ABCDEF";
-const char ldigits[]="0123456789abcdef";
+const char udigits[] = "0123456789ABCDEF";
+const char ldigits[] = "0123456789abcdef";
template<typename T>
char *int_to_str(T v, const Fmt &f, char *end)
{
if(f.get_type()==Fmt::CHAR)
{
- *--end=v;
+ *--end = v;
return end;
}
- char *ptr=end;
+ char *ptr = end;
// Find out the base to use
- unsigned base=f.get_base();
+ unsigned base = f.get_base();
if(!base)
- base=10;
+ base = 10;
// Format the number, starting from the least significant digit
- const char *digits=(f.get_uppercase() ? udigits : ldigits);
+ const char *digits = (f.get_uppercase() ? udigits : ldigits);
if(v)
{
- typename Temporary<T>::Type w=Absolute<T>::eval(v);
+ typename Temporary<T>::Type w = Absolute<T>::eval(v);
while(w)
{
- *--ptr=digits[w%base];
- w/=base;
+ *--ptr = digits[w%base];
+ w /= base;
}
}
else
- *--ptr=digits[0];
+ *--ptr = digits[0];
- char sign=(IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
+ char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
if(f.get_fill()=='0')
{
/* Zero-fill, taking base/sign size into account. The expression is a
bit ugly, but saves having to write code for creating the prefix both
ways. */
- unsigned pfxsize=((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
+ unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
- *--ptr='0';
+ *--ptr = '0';
}
if(f.get_showbase() && v!=0)
{
// Add base indicator
if(base==2)
- *--ptr=(f.get_uppercase() ? 'B' : 'b');
+ *--ptr = (f.get_uppercase() ? 'B' : 'b');
else if(base==16)
- *--ptr=(f.get_uppercase() ? 'X' : 'x');
+ *--ptr = (f.get_uppercase() ? 'X' : 'x');
if(base!=10)
- *--ptr='0';
+ *--ptr = '0';
}
if(sign)
- *--ptr=sign;
+ *--ptr = sign;
return ptr;
}
template<typename T>
string int_to_str(T v, const Fmt &f)
{
- unsigned size=max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
- char *buf=new char[size];
+ unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
+ char *buf = new char[size];
string result(int_to_str(v, f, buf+size), buf+size);
delete[] buf;
return result;
if(s.empty())
throw LexicalError("Empty input in integer conversion");
- std::string::const_iterator i=s.begin();
+ std::string::const_iterator i = s.begin();
// See if the input starts with a sign
- bool neg=false;
+ bool neg = false;
if(*i=='-')
{
if(!IsSigned<T>::result)
throw LexicalError("Negative sign in unsigned integer conversion");
- neg=true;
+ neg = true;
++i;
}
else if(*i=='+')
if(i==s.end())
throw LexicalError("Missing digits in integer conversion");
- T base=f.get_base();
+ T base = f.get_base();
if(!base && i!=s.end())
{
// Automatic base detection requested, figure it out
{
if(*i=='x' || *i=='X')
{
- base=16;
+ base = 16;
++i;
}
else if(*i=='b' || *i=='B')
{
- base=2;
+ base = 2;
++i;
}
else
- base=8;
+ base = 8;
}
else
- base=10;
+ base = 10;
}
// Parse the digits
- T result=0;
+ T result = 0;
for(; i!=s.end(); ++i)
{
- T digit=base;
+ T digit = base;
if(*i>='0' && *i<='9')
- digit=*i-'0';
+ digit = *i-'0';
else if(*i>='A' && *i<='F')
- digit=*i-'A'+10;
+ digit = *i-'A'+10;
else if(*i>='a' && *i<='f')
- digit=*i-'a'+10;
+ digit = *i-'a'+10;
if(digit>=base)
throw LexicalError("Invalid digit in integer conversion");
- T next=result*base+digit;
+ T next = result*base+digit;
if(next/base!=result)
throw LexicalError("Overflow in integer conversion");
- result=next;
+ result = next;
}
if(neg)
- result=-result;
+ result = -result;
return result;
}
if(f.get_type()==Fmt::CHAR)
throw LexicalError("Character format in floating-point conversion");
- Fmt::FloatMode mode=f.get_floatmode();
- long double w=abs(v);
- char sign=(v<0 ? '-' : f.get_showpos() ? '+' : 0);
+ Fmt::FloatMode mode = f.get_floatmode();
+ long double w = abs(v);
+ char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
// Handle infinity and not-a-number as special cases
if(!(w+w>w) && w!=0)
{
string result;
if(sign)
- result+=sign;
+ result += sign;
if(!(w>=0))
- result+=(f.get_uppercase() ? "NAN" : "nan");
+ result += (f.get_uppercase() ? "NAN" : "nan");
else
- result+=(f.get_uppercase() ? "INF" : "inf");
+ result += (f.get_uppercase() ? "INF" : "inf");
if(result.size()<f.get_width())
- result=string(f.get_width()-result.size(), ' ')+result;
+ result = string(f.get_width()-result.size(), ' ')+result;
return result;
}
/* Find out the base-10 exponent. Building up the multiplier / divisor
first helps with accuracy in some cases. */
- int exp=0;
+ int exp = 0;
if(w>=10)
{
- long double div=1;
+ long double div = 1;
while(div*10<w)
{
++exp;
- div*=10;
+ div *= 10;
}
- w/=div;
+ w /= div;
}
else if(mode!=Fmt::FIXED && w<1 && w!=0)
{
- long double mul=1;
+ long double mul = 1;
while(w*mul<1)
{
--exp;
- mul*=10;
+ mul *= 10;
}
- w*=mul;
+ w *= mul;
}
// Decide how to format the number
unsigned digits;
- unsigned point=1;
- bool showexp=false;
+ unsigned point = 1;
+ bool showexp = false;
if(mode==Fmt::FIXED)
{
- point=exp+1;
- digits=point+f.get_precision();
+ point = exp+1;
+ digits = point+f.get_precision();
}
else if(mode==Fmt::SCI)
{
- digits=f.get_precision()+1;
- showexp=true;
+ digits = f.get_precision()+1;
+ showexp = true;
}
else
{
- digits=max(f.get_precision(), 1U);
+ digits = max(f.get_precision(), 1U);
if(exp<-4 || exp>=static_cast<int>(digits))
{
- point=1;
- showexp=true;
+ point = 1;
+ showexp = true;
}
else
{
- point=max(exp, 0)+1;
+ point = max(exp, 0)+1;
if(exp<0)
- digits+=-exp;
+ digits += -exp;
}
}
// Apply rounding
- w+=5.0l/pow(10.0l, static_cast<long double>(digits));
+ w += 5.0l/pow(10.0l, static_cast<long double>(digits));
if(w>10)
{
// Rounding bumped us to the next exponent, deal with it
- w/=10;
+ w /= 10;
if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
{
- point=1;
- showexp=true;
+ point = 1;
+ showexp = true;
}
if(!showexp)
{
}
// Create a buffer and start from the end
- unsigned size=max(f.get_width(), digits+8);
- char *buf=new char[size];
- char *end=buf+size;
- char *ptr=end;
+ unsigned size = max(f.get_width(), digits+8);
+ char *buf = new char[size];
+ char *end = buf+size;
+ char *ptr = end;
// Format exponent
if(showexp)
{
- ptr=int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
- *--ptr=(f.get_uppercase() ? 'E' : 'e');
+ ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
+ *--ptr = (f.get_uppercase() ? 'E' : 'e');
}
// Format mantissa left-to-right
- char *eptr=ptr;
- ptr-=digits+(point<digits || f.get_showpoint());
- char *mptr=ptr;
+ char *eptr = ptr;
+ ptr -= digits+(point<digits || f.get_showpoint());
+ char *mptr = ptr;
for(unsigned i=0; i<digits; ++i)
{
if(i==point)
- *mptr++='.';
+ *mptr++ = '.';
if(showexp || static_cast<int>(i)>=-exp)
{
- int digit=static_cast<int>(w);
- *mptr++='0'+digit;
- w=(w-digit)*10;
+ int digit = static_cast<int>(w);
+ *mptr++ = '0'+digit;
+ w = (w-digit)*10;
}
else
- *mptr++='0';
+ *mptr++ = '0';
}
if(f.get_showpoint())
{
// Radix point requested but not displayed yet, add it
if(digits<=point)
- *mptr++='.';
+ *mptr++ = '.';
}
else if(mode==Fmt::AUTOFLT && digits>point)
{
if(mptr!=eptr)
{
while(mptr!=ptr)
- *--eptr=*--mptr;
- ptr=eptr;
+ *--eptr = *--mptr;
+ ptr = eptr;
}
}
// Add filling and sign
if(f.get_fill()=='0')
{
- unsigned pfxlen=(sign!=0);
+ unsigned pfxlen = (sign!=0);
while(end-ptr+pfxlen<f.get_width())
- *--ptr='0';
+ *--ptr = '0';
}
if(sign)
- *--ptr=sign;
+ *--ptr = sign;
string result(ptr, end);
delete[] buf;
if(s.empty())
throw LexicalError("Empty input in floating-point conversion");
- std::string::const_iterator i=s.begin();
+ std::string::const_iterator i = s.begin();
// See if the input starts with a sign
- bool neg=false;
+ bool neg = false;
if(*i=='-')
{
- neg=true;
+ neg = true;
++i;
}
else if(*i=='+')
if(i==s.end())
throw LexicalError("Missing digits in floating-point conversion");
- long double v=0;
- int exp=0;
+ long double v = 0;
+ int exp = 0;
// Parse mantissa
- bool point_seen=false;
+ bool point_seen = false;
for(; i!=s.end(); ++i)
{
if(*i=='.')
{
if(point_seen)
throw LexicalError("Extra point in floating-point conversion");
- point_seen=true;
+ point_seen = true;
}
else if(*i>='0' && *i<='9')
{
- v=v*10+(*i-'0');
+ v = v*10+(*i-'0');
if(point_seen)
--exp;
}
// We have an exponent
++i;
- exp+=str_to_int<int>(string(i, s.end()), Fmt());
+ exp += str_to_int<int>(string(i, s.end()), Fmt());
// str_to_int has eaten the rest of the input or thrown
break;
}
// Scale and negate the result as needed
while(exp>0)
{
- v*=10;
+ v *= 10;
--exp;
}
while(exp<0)
{
- v/=10;
+ v /= 10;
++exp;
}
if(neg)
- v=-v;
+ v = -v;
return v;
}
if(s.size()<fmt.get_width())
{
if(fmt.get_align()==Fmt::RIGHT)
- buf=string(fmt.get_width()-s.size(), fmt.get_fill())+s;
+ buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
else
- buf=s+string(fmt.get_width()-s.size(), fmt.get_fill());
+ buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
}
else
- buf=s;
+ buf = s;
}
void operator<<(LexicalConverter &c, char v)
{
- Fmt::Type type=c.get_fmt().get_type();
+ Fmt::Type type = c.get_fmt().get_type();
if(type==Fmt::NUM)
c.result(int_to_str(v, c.get_fmt()));
else
void operator>>(const LexicalConverter &c, char &v)
{
if(c.get_fmt().get_type()==Fmt::NUM)
- v=str_to_int<char>(c.get(), c.get_fmt());
+ v = str_to_int<char>(c.get(), c.get_fmt());
else
{
- const std::string &s=c.get();
+ const std::string &s = c.get();
if(s.empty())
throw LexicalError("Empty input in character conversion");
if(s.size()>1)
throw LexicalError("Extra input in character conversion");
- v=s[0];
+ v = s[0];
}
}
void operator>>(const LexicalConverter &c, signed char &v)
-{ v=str_to_int<signed char>(c.get(), c.get_fmt()); }
+{ v = str_to_int<signed char>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, short &v)
-{ v=str_to_int<short>(c.get(), c.get_fmt()); }
+{ v = str_to_int<short>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, int &v)
-{ v=str_to_int<int>(c.get(), c.get_fmt()); }
+{ v = str_to_int<int>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, long &v)
-{ v=str_to_int<long>(c.get(), c.get_fmt()); }
+{ v = str_to_int<long>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, unsigned char &v)
-{ v=str_to_int<unsigned char>(c.get(), c.get_fmt()); }
+{ v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, unsigned short &v)
-{ v=str_to_int<unsigned short>(c.get(), c.get_fmt()); }
+{ v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, unsigned int &v)
-{ v=str_to_int<unsigned int>(c.get(), c.get_fmt()); }
+{ v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, unsigned long &v)
-{ v=str_to_int<unsigned long>(c.get(), c.get_fmt()); }
+{ v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
#ifdef __GNUC__
void operator>>(const LexicalConverter &c, long long &v)
-{ v=str_to_int<long long>(c.get(), c.get_fmt()); }
+{ v = str_to_int<long long>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, unsigned long long &v)
-{ v=str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
+{ v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
#endif
void operator>>(const LexicalConverter &c, bool &v)
-{ v=str_to_bool(c.get()); }
+{ v = str_to_bool(c.get()); }
void operator>>(const LexicalConverter &c, float &v)
-{ v=str_to_flt<float>(c.get(), c.get_fmt()); }
+{ v = str_to_flt<float>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, double &v)
-{ v=str_to_flt<double>(c.get(), c.get_fmt()); }
+{ v = str_to_flt<double>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, long double &v)
-{ v=str_to_flt<long double>(c.get(), c.get_fmt()); }
+{ v = str_to_flt<long double>(c.get(), c.get_fmt()); }
void operator>>(const LexicalConverter &c, string &s)
-{ s=str_to_str(c.get(), c.get_fmt()); }
+{ s = str_to_str(c.get(), c.get_fmt()); }
} // namespace Msp
LexicalError(const std::string &w_): Exception(w_) { }
};
+
/**
Helper class for lexical_cast to facilitate operator overloading.
*/
void result(const std::string &);
};
+
void operator<<(LexicalConverter &, char);
void operator<<(LexicalConverter &, signed char);
void operator<<(LexicalConverter &, short);
// The main interface to the lexical conversion machinery
template<typename T>
-inline T lexical_cast(const std::string &s, const Fmt &f=Fmt())
+inline T lexical_cast(const std::string &s, const Fmt &f = Fmt())
{
LexicalConverter conv(s, f);
T result;
}
template<typename T>
-inline std::string lexical_cast(const T &v, const Fmt &f=Fmt())
+inline std::string lexical_cast(const T &v, const Fmt &f = Fmt())
{
LexicalConverter conv(f);
conv<<v;
namespace {
-/**
-Writes an integer to a Regex code string, in little-endian order.
-*/
+/** Writes an integer to a Regex code string, in little-endian order. */
template<typename T>
void write_int(T n, Msp::Regex::Code &code)
{
for(unsigned i=0; i<sizeof(T); ++i)
- code+=(n>>i*8)&0xFF;
+ code += (n>>i*8)&0xFF;
}
-/**
-Reads an integer from a Regex code stream, in little-endian order.
-*/
+/** Reads an integer from a Regex code stream, in little-endian order. */
template<typename T>
T read_int(Msp::Regex::Code::const_iterator &c)
{
- T result=0;
+ T result = 0;
for(unsigned i=0; i<sizeof(T); ++i)
- result+=static_cast<unsigned char>(*c++)<<i*8;
+ result += static_cast<unsigned char>(*c++)<<i*8;
return result;
}
}
+
namespace Msp {
Regex::Regex(const string &expr)
{
- n_groups=0;
- string::const_iterator iter=expr.begin();
- code=compile(expr, iter, n_groups, false);
+ n_groups = 0;
+ string::const_iterator iter = expr.begin();
+ code = compile(expr, iter, n_groups, false);
++n_groups;
}
-RegMatch Regex::match(const string &str) const
-{
- RegMatch::GroupArray groups(n_groups);
-
- for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
- if(run(str, i, groups))
- return RegMatch(str, groups);
-
- return RegMatch();
-}
-
-string Regex::disassemble() const
-{
- ostringstream ss;
-
- for(Code::const_iterator i=code.begin(); i!=code.end();)
- {
- Code::const_iterator j=i;
- Offset offset=i-code.begin();
- string decompiled=disassemble_instruction(i);
- string bytes;
- for(; j!=i; ++j)
- bytes+=format(" %02X", static_cast<int>(*j)&0xFF);
- ss<<Fmt("%3d")<<offset<<':'<<Fmt("%-9s")<<bytes;
- if(bytes.size()>9)
- ss<<"\n"<<Fmt("%15s");
- ss<<" "<<decompiled<<'\n';
- }
-
- return ss.str();
-}
-
Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, unsigned &group, bool branch)
{
- bool has_branches=false;
- unsigned level=0;
- bool escape=false;
- unsigned bracket=0;
+ bool has_branches = false;
+ unsigned level = 0;
+ bool escape = false;
+ unsigned bracket = 0;
string::const_iterator end;
for(end=iter; end!=expr.end(); ++end)
{
if(escape)
- escape=false;
+ escape = false;
else if(bracket)
{
if(bracket==3 && *end==']')
- bracket=0;
+ bracket = 0;
else if(bracket==1 && *end=='^')
- bracket=2;
+ bracket = 2;
else
- bracket=3;
+ bracket = 3;
}
else if(*end=='\\')
- escape=true;
+ escape = true;
else if(*end=='(')
++level;
else if(*end==')')
if(branch)
break;
else
- has_branches=true;
+ has_branches = true;
}
else if(*end=='[')
- bracket=1;
+ bracket = 1;
}
if(level>0)
Code result;
- unsigned this_group=group;
+ unsigned this_group = group;
if(!branch)
{
- result+=GROUP_BEGIN;
+ result += GROUP_BEGIN;
write_int<Index>(this_group, result);
}
- const unsigned jump_size=1+sizeof(Offset);
+ const unsigned jump_size = 1+sizeof(Offset);
if(!has_branches)
{
for(string::const_iterator i=iter; i!=end;)
{
- Code atom=parse_atom(expr, i, group);
+ Code atom = parse_atom(expr, i, group);
- Count repeat_min=1;
- Count repeat_max=1;
+ Count repeat_min = 1;
+ Count repeat_max = 1;
parse_repeat(i, repeat_min, repeat_max);
for(unsigned j=0; j<repeat_min; ++j)
- result+=atom;
+ result += atom;
if(repeat_max==numeric_limits<Count>::max())
{
if(repeat_min==0)
{
- result+=ND_JUMP;
+ result += ND_JUMP;
write_int<Offset>(atom.size()+jump_size, result);
- result+=atom;
+ result += atom;
}
- result+=ND_JUMP;
+ result += ND_JUMP;
write_int<Offset>(-(atom.size()+jump_size), result);
}
else if(repeat_max>repeat_min)
{
for(unsigned j=repeat_min; j<repeat_max; ++j)
{
- result+=ND_JUMP;
+ result += ND_JUMP;
write_int<Offset>((repeat_max-j)*(atom.size()+jump_size)-jump_size, result);
- result+=atom;
+ result += atom;
}
}
}
++i;
}
- unsigned n_branches=branches.size();
+ unsigned n_branches = branches.size();
- Offset offset=(n_branches-1)*jump_size+branches.front().size();
+ Offset offset = (n_branches-1)*jump_size+branches.front().size();
for(list<Code>::iterator i=++branches.begin(); i!=branches.end(); ++i)
{
- result+=ND_JUMP;
+ result += ND_JUMP;
write_int<Offset>(offset, result);
- offset+=i->size();
+ offset += i->size();
}
for(list<Code>::iterator i=branches.begin(); i!=branches.end();)
{
- result+=*i;
- offset-=i->size()+jump_size;
+ result += *i;
+ offset -= i->size()+jump_size;
++i;
if(i!=branches.end())
{
- result+=JUMP;
+ result += JUMP;
write_int<Offset>(offset, result);
}
}
if(!branch)
{
- result+=GROUP_END;
+ result += GROUP_END;
write_int<Index>(this_group, result);
}
- iter=end;
+ iter = end;
return result;
}
if(i==expr.end())
return result;
- bool flag=false;
+ bool flag = false;
if(*i=='\\')
{
if(++i==expr.end())
throw InvalidParameterValue("Stray backslash");
- flag=true;
+ flag = true;
}
if(!flag)
else if(*i=='[')
return parse_brackets(expr, i);
else if(*i=='.')
- result+=MATCH_ANY;
+ result += MATCH_ANY;
else if(*i=='^')
- result+=MATCH_BEGIN;
+ result += MATCH_BEGIN;
else if(*i=='$')
- result+=MATCH_END;
+ result += MATCH_END;
else if(*i=='(')
{
++group;
- result=compile(expr, ++i, group, false);
+ result = compile(expr, ++i, group, false);
}
else
- flag=true;
+ flag = true;
}
if(flag)
{
- result+=MATCH_CHAR;
- result+=*i;
+ result += MATCH_CHAR;
+ result += *i;
}
++i;
return false;
if(*i=='*' || *i=='+')
- rmax=numeric_limits<Count>::max();
+ rmax = numeric_limits<Count>::max();
if(*i=='*' || *i=='?')
- rmin=0;
+ rmin = 0;
if(*i=='{')
{
- rmin=0;
+ rmin = 0;
for(++i; isdigit(*i); ++i)
- rmin=rmin*10+(*i-'0');
+ rmin = rmin*10+(*i-'0');
if(*i==',')
{
++i;
if(*i!='}')
{
- rmax=0;
+ rmax = 0;
for(; isdigit(*i); ++i)
- rmax=rmax*10+(*i-'0');
+ rmax = rmax*10+(*i-'0');
if(rmax<rmin)
throw InvalidParameterValue("Invalid bound");
}
else
- rmax=numeric_limits<Count>::max();
+ rmax = numeric_limits<Count>::max();
}
else
- rmax=rmin;
+ rmax = rmin;
if(*i!='}')
throw InvalidParameterValue("Invalid bound");
}
Code result;
++iter;
- bool neg=false;
+ bool neg = false;
if(*iter=='^')
{
- neg=true;
+ neg = true;
++iter;
}
- string::const_iterator end=iter;
+ string::const_iterator end = iter;
for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ;
if(end==str.end())
throw InvalidParameterValue("Unmatched '['");
- unsigned char mask[32]={0};
- unsigned type=0;
- bool range=false;
- unsigned char first=0, last=0;
+ unsigned char mask[32] = {0};
+ unsigned type = 0;
+ bool range = false;
+ unsigned char first=0, last = 0;
for(string::const_iterator i=iter; i!=end; ++i)
{
- unsigned char c=*i;
+ unsigned char c = *i;
if(range)
{
- last=c;
+ last = c;
for(unsigned j=first; j<=c; ++j)
- mask[j>>3]|=1<<(j&7);
- range=false;
+ mask[j>>3] |= 1<<(j&7);
+ range = false;
if(type<2)
- type=2;
+ type = 2;
}
else if(c=='-' && i!=iter && end-i>1)
- range=true;
+ range = true;
else
{
- first=c;
- mask[c>>3]|=1<<(c&7);
+ first = c;
+ mask[c>>3] |= 1<<(c&7);
if(type==0)
- type=1;
+ type = 1;
else
- type=3;
+ type = 3;
}
}
if(neg)
- result+=NEGATE;
+ result += NEGATE;
if(type==1)
{
- result+=MATCH_CHAR;
- result+=first;
+ result += MATCH_CHAR;
+ result += first;
}
else if(type==2)
{
- result+=MATCH_RANGE;
- result+=first;
- result+=last;
+ result += MATCH_RANGE;
+ result += first;
+ result += last;
}
else
{
- result+=MATCH_MASK;
+ result += MATCH_MASK;
result.append(reinterpret_cast<char *>(mask), 32);
}
- iter=end;
+ iter = end;
++iter;
return result;
}
+RegMatch Regex::match(const string &str) const
+{
+ RegMatch::GroupArray groups(n_groups);
+
+ for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+ if(run(str, i, groups))
+ return RegMatch(str, groups);
+
+ return RegMatch();
+}
+
bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch::GroupArray &groups) const
{
- bool result=false;
+ bool result = false;
list<RunContext> ctx;
ctx.push_back(RunContext());
- ctx.front().citer=code.begin();
+ ctx.front().citer = code.begin();
ctx.front().groups.resize(groups.size());
for(string::const_iterator i=begin;;)
{
int c;
if(i!=str.end())
- c=static_cast<unsigned char>(*i);
+ c = static_cast<unsigned char>(*i);
else
- c=-1;
+ c = -1;
for(list<RunContext>::iterator j=ctx.begin(); j!=ctx.end();)
{
- bool terminate=false;
- bool negate_match=false;
+ bool terminate = false;
+ bool negate_match = false;
for(; j->citer!=code.end();)
{
- Instruction instr=static_cast<Instruction>(*j->citer++);
+ Instruction instr = static_cast<Instruction>(*j->citer++);
if(instr==NEGATE)
- negate_match=true;
+ negate_match = true;
else if(instr==JUMP)
{
- Offset offset=read_int<Offset>(j->citer);
- j->citer+=offset;
+ Offset offset = read_int<Offset>(j->citer);
+ j->citer += offset;
}
else if(instr==ND_JUMP)
{
- Offset offset=read_int<Offset>(j->citer);
+ Offset offset = read_int<Offset>(j->citer);
ctx.push_back(*j);
- ctx.back().citer+=offset;
+ ctx.back().citer += offset;
}
else if(instr==GROUP_BEGIN)
{
- Index n=read_int<Index>(j->citer);
+ Index n = read_int<Index>(j->citer);
if(!j->groups[n].match)
- j->groups[n].begin=i-str.begin();
+ j->groups[n].begin = i-str.begin();
}
else if(instr==GROUP_END)
{
- Index n=read_int<Index>(j->citer);
+ Index n = read_int<Index>(j->citer);
if(!j->groups[n].match)
{
- j->groups[n].match=true;
- j->groups[n].end=i-str.begin();
- j->groups[n].length=j->groups[n].end-j->groups[n].begin;
+ j->groups[n].match = true;
+ j->groups[n].end = i-str.begin();
+ j->groups[n].length = j->groups[n].end-j->groups[n].begin;
}
if(n==0)
{
- result=true;
- bool better=false;
+ result = true;
+ bool better = false;
for(unsigned k=0; (k<groups.size() && !better); ++k)
{
- better=group_compare(j->groups[k], groups[k]);
+ better = group_compare(j->groups[k], groups[k]);
if(group_compare(groups[k], j->groups[k]))
break;
}
if(better)
- groups=j->groups;
+ groups = j->groups;
}
}
else
{
- bool match_result=false;
- bool input_consumed=false;
+ bool match_result = false;
+ bool input_consumed = false;
if(instr==MATCH_BEGIN)
- match_result=(i==str.begin());
+ match_result = (i==str.begin());
else if(instr==MATCH_END)
- match_result=(i==str.end());
+ match_result = (i==str.end());
else if(instr==MATCH_CHAR)
{
- match_result=(c==*j->citer++);
- input_consumed=true;
+ match_result = (c==*j->citer++);
+ input_consumed = true;
}
else if(instr==MATCH_RANGE)
{
- unsigned char first=*j->citer++;
- unsigned char last=*j->citer++;
- match_result=(c>=first && c<=last);
- input_consumed=true;
+ unsigned char first = *j->citer++;
+ unsigned char last = *j->citer++;
+ match_result = (c>=first && c<=last);
+ input_consumed = true;
}
else if(instr==MATCH_MASK)
{
if(c>=0 && c<=0xFF)
{
- unsigned char m=*(j->citer+(c>>3));
- match_result=m&(1<<(c&7));
+ unsigned char m = *(j->citer+(c>>3));
+ match_result = m&(1<<(c&7));
}
- input_consumed=true;
- j->citer+=32;
+ input_consumed = true;
+ j->citer += 32;
}
else if(instr==MATCH_ANY)
{
- match_result=true;
- input_consumed=true;
+ match_result = true;
+ input_consumed = true;
}
else
throw Exception("Invalid instruction");
if(match_result==negate_match)
- terminate=true;
- negate_match=false;
+ terminate = true;
+ negate_match = false;
if(input_consumed || terminate)
break;
}
if(terminate || j->citer==code.end())
- j=ctx.erase(j);
+ j = ctx.erase(j);
else
++j;
}
return g1.end>g2.end;
}
+string Regex::disassemble() const
+{
+ ostringstream ss;
+
+ for(Code::const_iterator i=code.begin(); i!=code.end();)
+ {
+ Code::const_iterator j = i;
+ Offset offset = i-code.begin();
+ string decompiled = disassemble_instruction(i);
+ string bytes;
+ for(; j!=i; ++j)
+ bytes += format(" %02X", static_cast<int>(*j)&0xFF);
+ ss<<Fmt("%3d")<<offset<<':'<<Fmt("%-9s")<<bytes;
+ if(bytes.size()>9)
+ ss<<"\n"<<Fmt("%15s");
+ ss<<" "<<decompiled<<'\n';
+ }
+
+ return ss.str();
+}
+
string Regex::disassemble_instruction(Code::const_iterator &i) const
{
- Instruction instr=static_cast<Instruction>(*i++);
+ Instruction instr = static_cast<Instruction>(*i++);
ostringstream result;
switch(instr)
{
case JUMP:
{
- Offset offset=read_int<Offset>(i);
+ Offset offset = read_int<Offset>(i);
result<<"JUMP "<<Fmt("%+d")<<offset<<" ("<<Fmt("%d")<<i-code.begin()+offset<<')';
}
break;
case ND_JUMP:
{
- Offset offset=read_int<Offset>(i);
+ Offset offset = read_int<Offset>(i);
result<<"ND_JUMP "<<Fmt("%+d")<<offset<<" ("<<Fmt("%d")<<i-code.begin()+offset<<')';
}
break;
break;
case MATCH_CHAR:
{
- char c=*i++;
+ char c = *i++;
result<<"MATCH_CHAR ";
if(c>=0x20 && c<=0x7E)
result<<'\''<<c<<'\'';
*/
class Regex
{
-public:
- /**
- Constructs a new Regex object.
- */
- Regex(const std::string &expr);
-
- /**
- Matches the regex against a string. Refer to RegMatch documentation for
- more information on the resulting object.
- */
- RegMatch match(const std::string &str) const;
-
- /**
- Returns a disassembled representation of the NFA bytecode. For debugging
- purposes.
- */
- std::string disassemble() const;
private:
typedef std::string Code;
typedef unsigned short Count;
enum Instruction
{
- FIRST_INSTRUCTION_=0,
+ FIRST_INSTRUCTION_ = 0,
JUMP,
ND_JUMP,
MATCH_MASK,
MATCH_ANY,
- LAST_INSTRUCTION_=31
+ LAST_INSTRUCTION_ = 31
};
struct RunContext
Code code;
unsigned n_groups;
- /**
- Compiles a regular expression into NFA bytecode. When compiling a whole
- regex, \a group should be set to 0. When the function returns, \a group will
- be the index of the last subexpression and \a iter will point to the first
- unused character in the expression.
-
- \param expr Expression to be compiled
- \param begin Iterator into the expression
- \param group Group counter, gets incremented for each subregex
- \param branch Whether we are compiling a branch
+public:
+ /** Constructs a new Regex object from a string representation. */
+ Regex(const std::string &expr);
- \return Compiled NFA bytecode
- */
+private:
+ /** Compiles a regular expression into NFA bytecode. , 2011The iterator will be
+ advanced to the first unused character in the string. */
Code compile(const std::string &expr, std::string::const_iterator &iter, unsigned &group, bool branch);
+
Code parse_atom(const std::string &, std::string::const_iterator &i, unsigned &);
Code parse_brackets(const std::string &, std::string::const_iterator &);
bool parse_repeat(std::string::const_iterator &, Count &, Count &);
+
+public:
+ /** Matches the regex against a string. Refer to RegMatch documentation for
+ more information on the resulting object. */
+ RegMatch match(const std::string &str) const;
+
+private:
bool run(const std::string &, const std::string::const_iterator &, RegMatch::GroupArray &) const;
bool group_compare(const RegMatch::Group &, const RegMatch::Group &) const;
+
+public:
+ /** Returns a disassembled representation of the NFA bytecode. For debugging
+ purposes. */
+ std::string disassemble() const;
+private:
std::string disassemble_instruction(Code::const_iterator &) const;
};
Copyright © 2007 Mikko Rasa
Distributed under the LGPL
*/
+
#include <msp/core/except.h>
#include "regmatch.h"
for(GroupArray::iterator i=groups.begin(); i!=groups.end(); ++i)
if(i->match)
{
- i->length=i->end-i->begin;
- i->str=str.substr(i->begin, i->length);
+ i->length = i->end-i->begin;
+ i->str = str.substr(i->begin, i->length);
}
}
Copyright © 2007 Mikko Rasa
Distributed under the LGPL
*/
+
#ifndef MSP_STRINGS_REGMATCH_H_
#define MSP_STRINGS_REGMATCH_H_
/**
This class stores the result of a Regex being matched against a string. If the
-match was successful, the RegMatch object evaluates to true, allowing it to be
-used in constructs like \code if(RegMatch match=regex.match("foo")) \endcode.
+match was successful, the RegMatch object evaluates to true.
A RegMatch representing a successful match has one or more groups, indicating
matching parts of the string. The first group (with index 0) indicates the
-part matched by the whol regex. Further groups, if present, indicate parts
+part matched by the whole regex. Further groups, if present, indicate parts
matched by subregexes. These are ordered from left to right, by the opening
parenthesis of the subregex.
*/
Group(): match(false) { }
operator bool() const { return match; }
};
+
typedef std::vector<Group> GroupArray;
- /**
- Constructs a RegMatch representig a non-match. Used by Regex.
- */
+private:
+ GroupArray groups;
+
+public:
+ /** Constructs a RegMatch representing a non-match. */
RegMatch() { }
- /**
- Constructs a new RegMatch from a string and groups. The length and str members
- of each group are computed and need not be set. Used by Regex.
- */
+ /** Constructs a new RegMatch from a string and groups. The length and str
+ members of each group are computed and need not be set. Intended to be used
+ by the Regex class. */
RegMatch(const std::string &, const std::vector<Group> &);
- /**
- Returns a reference to a single group in the match. An exception is thrown
- if the requested group does not exist.
- */
+ /** Returns a reference to a single group in the match. */
const Group &group(unsigned) const;
- /**
- Returns true if the RegMatch object represents a non-match.
- */
+ /** Returns true if the RegMatch object represents a non-match. */
bool empty() const { return groups.empty(); }
- /**
- Returns the number of groups in this match.
- */
+ /** Returns the number of groups in this match. */
unsigned size() const { return groups.size(); }
- /**
- Returns the begin offset of the whole match.
- */
- unsigned begin() const { return groups.empty()?0:groups[0].begin; }
+ /** Returns the begin offset of the whole match. */
+ unsigned begin() const { return groups.empty() ? 0 : groups[0].begin; }
- /**
- Returns the end offset of the whole match.
- */
- unsigned end() const { return groups.empty()?0:groups[0].end; }
+ /** Returns the end offset of the whole match. */
+ unsigned end() const { return groups.empty() ? 0 : groups[0].end; }
- /**
- Shortcut for the group() function.
- */
+ /** Shorthand for the group() function. */
const Group &operator[](unsigned i) const { return group(i); }
operator bool() const { return !empty(); }
-private:
- std::vector<Group> groups;
};
} // namespace Msp
if(ch<0 || ch>0x10FFFF)
return error(ch, buf, "Can't express character in UTF-8");
- unsigned bytes=1;
+ unsigned bytes = 1;
if(ch>0xFFFF)
- bytes=4;
+ bytes = 4;
else if(ch>0x7FF)
- bytes=3;
+ bytes = 3;
else if(ch>0x7F)
- bytes=2;
+ bytes = 2;
if(bytes==1)
- buf+=ch;
+ buf += ch;
else
{
char utf[4];
- utf[0]=0xFF<<(8-bytes) | ch>>(bytes*6-6);
+ utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6);
for(unsigned j=bytes-1; j>0; --j)
{
- utf[j]=0x80 | (ch&0x3F);
- ch>>=6;
+ utf[j] = 0x80 | (ch&0x3F);
+ ch >>= 6;
}
buf.append(utf, bytes);
if((*i&0xC0)==0x80)
{
- UnicodeChar result=error("UTF-8 tail byte found when expecting head");
+ UnicodeChar result = error("UTF-8 tail byte found when expecting head");
++i;
return result;
}
else if(*i&0x80)
{
- unsigned bytes=2;
- unsigned mask=0x20;
+ unsigned bytes = 2;
+ unsigned mask = 0x20;
for(; *i&mask; mask>>=1)
++bytes;
- string::const_iterator j=i;
+ string::const_iterator j = i;
- UnicodeChar result=(*j++)&(mask-1);
+ UnicodeChar result = (*j++)&(mask-1);
unsigned k;
for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
- result=(result<<6) | ((*j++)&0x3F);
+ result = (result<<6) | ((*j++)&0x3F);
if(k<bytes)
- result=error("Incomplete UTF-8 character");
+ result = error("Incomplete UTF-8 character");
else if(!(result>>(bytes*5-4)) || !(result>>7))
- result=error("Denormalized UTF-8 multibyte sequence");
+ result = error("Denormalized UTF-8 multibyte sequence");
else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
- result=error("Invalid Unicode code point");
+ result = error("Invalid Unicode code point");
- i=j;
+ i = j;
return result;
}
else
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "UTF-8"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
{
vector<string> result;
- unsigned start=0;
+ unsigned start = 0;
while(start<str.size())
{
- unsigned end=long_sep ? str.find(sep, start) : str.find_first_of(sep, start);
+ unsigned end = long_sep ? str.find(sep, start) : str.find_first_of(sep, start);
if(end!=start || allow_empty)
{
if(max_split>=0 && result.size()==static_cast<unsigned>(max_split))
if(end>str.size())
break;
- start=end+(long_sep ? sep.size() : 1);
+ start = end+(long_sep ? sep.size() : 1);
if(allow_empty && start==str.size())
result.push_back(string());
}
+
namespace Msp {
int strcasecmp(const string &s1, const string &s2)
{
- string::const_iterator i1=s1.begin();
- string::const_iterator i2=s2.begin();
+ string::const_iterator i1 = s1.begin();
+ string::const_iterator i2 = s2.begin();
for(; (i1!=s1.end() && i2!=s2.end()); ++i1, ++i2)
{
- const char c1=::tolower(*i1);
- const char c2=::tolower(*i2);
+ const char c1 = ::tolower(*i1);
+ const char c2 = ::tolower(*i2);
if(c1!=c2) return c1-c2;
}
if(i1!=s1.end()) return *i1;
string strip(const string &s)
{
- string result=s;
+ string result = s;
if(!result.erase(0, result.find_first_not_of(" \t\r\n")).empty())
result.erase(result.find_last_not_of(" \t\r\n")+1);
return result;
string c_unescape(const std::string &str)
{
- bool escape=false;
- unsigned numeric_type=0;
- unsigned numeric_pos=0;
- unsigned numeric_value=0;
+ bool escape = false;
+ unsigned numeric_type = 0;
+ unsigned numeric_pos = 0;
+ unsigned numeric_value = 0;
string result;
for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
{
if(numeric_type==16)
{
- unsigned digit=0;
+ unsigned digit = 0;
if(*i>='0' && *i<='9')
- digit=*i-'0';
+ digit = *i-'0';
else if(*i>='a' && *i<='f')
- digit=*i-'a'+10;
+ digit = *i-'a'+10;
else if(*i>='A' && *i<='F')
- digit=*i-'A'+10;
+ digit = *i-'A'+10;
else
throw InvalidParameterValue("Invalid hexadecimal digit");
- numeric_value=(numeric_value<<4 | digit);
+ numeric_value = (numeric_value<<4 | digit);
++numeric_pos;
if(numeric_pos==2)
{
- result+=numeric_value;
- numeric_type=0;
+ result += numeric_value;
+ numeric_type = 0;
}
}
else if(numeric_type==8)
{
- unsigned digit=0;
+ unsigned digit = 0;
if(*i>='0' && *i<='7')
- digit=*i-'0';
+ digit = *i-'0';
else
throw InvalidParameterValue("Invalid octal digit");
- numeric_value=(numeric_value<<3 | digit);
+ numeric_value = (numeric_value<<3 | digit);
++numeric_pos;
if(numeric_pos==3)
{
- result+=numeric_value;
- numeric_type=0;
+ result += numeric_value;
+ numeric_type = 0;
}
}
else if(escape)
{
if(*i=='x')
{
- numeric_type=16;
- numeric_pos=0;
- numeric_value=0;
+ numeric_type = 16;
+ numeric_pos = 0;
+ numeric_value = 0;
}
else if(*i>='0' && *i<='3')
{
- numeric_type=8;
- numeric_pos=1;
- numeric_value=*i-'0';
+ numeric_type = 8;
+ numeric_pos = 1;
+ numeric_value = *i-'0';
}
else if(*i=='n')
- result+='\n';
+ result += '\n';
else if(*i=='t')
- result+='\t';
+ result += '\t';
else if(*i=='r')
- result+='\r';
+ result += '\r';
else if(*i=='b')
- result+='\b';
+ result += '\b';
else if(*i=='v')
- result+='\v';
+ result += '\v';
else if(*i=='a')
- result+='\a';
+ result += '\a';
else if(*i=='f')
- result+='\f';
+ result += '\f';
else if(*i=='\"')
- result+='\"';
+ result += '\"';
else if(*i=='\'')
- result+='\'';
+ result += '\'';
else if(*i=='\\')
- result+='\\';
+ result += '\\';
else
throw InvalidParameterValue("Invalid escape sequence");
- escape=false;
+ escape = false;
}
else if(*i=='\\')
- escape=true;
+ escape = true;
else
- result+=*i;
+ result += *i;
}
if(escape)
for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
{
if(*i=='\n')
- result+="\\n";
+ result += "\\n";
else if(*i=='\t')
- result+="\\t";
+ result += "\\t";
else if(*i=='\r')
- result+="\\r";
+ result += "\\r";
else if(*i=='\b')
- result+="\\b";
+ result += "\\b";
else if(*i=='\v')
- result+="\\v";
+ result += "\\v";
else if(*i=='\a')
- result+="\\a";
+ result += "\\a";
else if(*i=='\f')
- result+="\\f";
+ result += "\\f";
else if(*i=='\"')
- result+="\\\"";
+ result += "\\\"";
else if(*i=='\'')
- result+="\\\'";
+ result += "\\\'";
else if(*i=='\\')
- result+="\\\\";
+ result += "\\\\";
else if(static_cast<unsigned char>(*i)<' ' || (escape_8bit && (*i&0x80)))
{
- char buf[4]={'\\', '0'+((*i>>6)&3), '0'+((*i>>3)&7), '0'+(*i&7)};
+ char buf[4] = {'\\', '0'+((*i>>6)&3), '0'+((*i>>3)&7), '0'+(*i&7)};
result.append(buf, 4);
}
else
- result+=*i;
+ result += *i;
}
return result;
namespace Msp {
-/**
-Compares two strings, ignoring upper/lower case.
-
-@param s1 First string
-@param s2 Second string
-
-@return -1 if s1<s2, 0 if s1==s2, 1 if s1>s2
-*/
+/** Compares two strings, ignoring upper/lower case. Returns an integer less
+than, equal to or greater than zero depending on whether the first string
+lexicographically precedes, is equal to or follows the second one,
+respectively. */
int strcasecmp(const std::string &s1, const std::string &s2);
-/**
-Converts a string to lower case.
-*/
+/** Converts a string to lower case. */
std::string tolower(const std::string &);
-/**
-Converts a string to upper case.
-*/
+/** Converts a string to upper case. */
std::string toupper(const std::string &);
-/**
-Checks whether a string consists of digits only.
-*/
+/** Checks whether a string consists of digits only. */
bool isnumrc(const std::string &);
-/**
-Checks whether a string consists of alphabetic characters only.
-*/
+/** Checks whether a string consists of alphabetic characters only. */
bool isalpha(const std::string &);
-/**
-Checks whether a string consists of alphanumeric characters only.
-*/
+/** Checks whether a string consists of alphanumeric characters only. */
bool isalnum(const std::string &);
/* These are required to make the standard version work from inside the Msp
using std::isalpha;
using std::isalnum;
-/**
-Splits a string at occurrences of any of the characters in sep. If max_split
-is non-negative, at most that many split will be performed, i.e. the resulting
-vector will contain at most max_split+1 elements. Two or more consecutive
-separator characters will be treated as a single separator.
-
-@param str A string
-@param sep Separator characters
-@param max_split Maximum number of splits to perform
-*/
-std::vector<std::string> split(const std::string &str, const std::string &sep=" \t\r\n", int max_split=-1);
+/** Splits a string at occurrences of any of the characters in sep. Default
+is to split at whitespace. Two or more consecutive separator characters will
+be treated as a single separator.
-/**
-Splits a string on occurrences of a single character.
-*/
-std::vector<std::string> split(const std::string &str, char sep, int max_split=-1);
+If max_split is non-negative, at most that many split will be performed, i.e.
+the resulting vector will contain at most max_split+1 elements. */
+std::vector<std::string> split(const std::string &str, const std::string &sep = " \t\r\n", int max_split = -1);
-/**
-Splits a string on occurrences of another string.
-*/
-std::vector<std::string> split_long(const std::string &str, const std::string &sep, int max_split=-1);
+/** Splits a string on occurrences of a single character. */
+std::vector<std::string> split(const std::string &str, char sep, int max_split = -1);
-/**
-Splits a string on occurrences of another string. Two consecutive separators
-will cause an empty string to be placed in the result.
-*/
-std::vector<std::string> split_fields(const std::string &str, const std::string &sep, int max_split=-1);
+/** Splits a string on occurrences of another string. */
+std::vector<std::string> split_long(const std::string &str, const std::string &sep, int max_split = -1);
-/**
-Splits a string on occurrences of a single character. Two consecutive
-separators will cause an empty string to be placed in the result.
-*/
-std::vector<std::string> split_fields(const std::string &str, char sep, int max_split=-1);
+/** Splits a string on occurrences of another string. Two consecutive
+separators will cause an empty string to be placed in the result. */
+std::vector<std::string> split_fields(const std::string &str, const std::string &sep, int max_split = -1);
-/**
-Concatenates strings from an iterator range.
+/** Splits a string on occurrences of a single character. Two consecutive
+separators will cause an empty string to be placed in the result. */
+std::vector<std::string> split_fields(const std::string &str, char sep, int max_split = -1);
-@param begin First iterator
-@param end Last iterator
-@param sep Separator to be inserted between strings
-*/
+/** Concatenates strings from an iterator range. */
template<typename Iter>
-std::string join(Iter begin, Iter end, const std::string &sep=" ")
+std::string join(Iter begin, Iter end, const std::string &sep = " ")
{
std::string result;
for(Iter i=begin; i!=end; ++i)
{
if(i!=begin)
- result+=sep;
- result+=*i;
+ result += sep;
+ result += *i;
}
return result;
}
-/**
-Strips leading and trailing whitespace from a string.
-*/
+/** Strips leading and trailing whitespace from a string. */
std::string strip(const std::string &);
-/**
-Unescapes a string with C escape sequences.
-*/
+/** Unescapes a string with C escape sequences. */
std::string c_unescape(const std::string &str);
-/**
-Escapes any non-printable characters in a string with C escape sequences.
-
-@param str A string
-@param escape_8bit If true, consider characters with high bit set as
- non-printable
-
-@return An escaped version of the string
-*/
-std::string c_escape(const std::string &str, bool escape_8bit=true);
+/** Escapes any non-printable characters in a string with C escape sequences.
+Optionally, any characters with the high bit set can be escaped as well. */
+std::string c_escape(const std::string &str, bool escape_8bit = true);
} // namespace Msp
}
+
namespace Msp {
namespace Codecs {
void Windows1252::Encoder::encode_char(UnicodeChar ch, string &buf)
{
if((ch>=0 && ch<=0x7F) || (ch>=0xA0 && ch<=0xFF))
- buf+=ch;
+ buf += ch;
else
{
for(unsigned i=0; i<32; ++i)
if(table[i]==ch)
{
- buf+=ch;
+ buf += ch;
return;
}
void Windows1252::Encoder::transliterate(UnicodeChar, string &buf)
{
- buf+='?';
+ buf += '?';
}
if(i==str.end())
return error("No input");
- int ch=static_cast<unsigned char>(*i);
+ int ch = static_cast<unsigned char>(*i);
UnicodeChar result;
if(ch>=0x80 && ch<=0x9F)
{
- result=table[ch-0x80];
+ result = table[ch-0x80];
if(result==0)
- result=error("Undefined Windows-1252 character");
+ result = error("Undefined Windows-1252 character");
}
else
- result=ch;
+ result = ch;
++i;
return result;
class Encoder: public Codec::Encoder
{
public:
- Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { }
+ Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
+
virtual void encode_char(UnicodeChar, std::string &);
private:
virtual void transliterate(UnicodeChar, std::string &);
class Decoder: public Codec::Decoder
{
public:
- Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { }
+ Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
+
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "Windows-1252"; }
- virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
return 1;
}
- Codecs::Codec *from=Codecs::create_codec(argv[1]);
- Codecs::Codec *to=Codecs::create_codec(argv[2]);
+ Codecs::Codec *from = Codecs::create_codec(argv[1]);
+ Codecs::Codec *to = Codecs::create_codec(argv[2]);
- Codecs::Decoder *from_dec=from->create_decoder(Codecs::TRANSLITERATE);
- Codecs::Encoder *to_enc=to->create_encoder(Codecs::TRANSLITERATE);
+ Codecs::Decoder *from_dec = from->create_decoder(Codecs::TRANSLITERATE);
+ Codecs::Encoder *to_enc = to->create_encoder(Codecs::TRANSLITERATE);
string line;
while(getline(cin, line))
{
- line+='\n';
+ line += '\n';
Codecs::ustring ustr;
from_dec->decode(line, ustr);
string result;