From: Mikko Rasa Date: Wed, 25 May 2011 07:08:45 +0000 (+0300) Subject: Further style and comment adjustments X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=commitdiff_plain;h=5b1368cb791cab043f0435628cacbaff36e39b7b Further style and comment adjustments --- diff --git a/grep.cpp b/grep.cpp index 0a3cd8d..68ecf87 100644 --- a/grep.cpp +++ b/grep.cpp @@ -9,12 +9,12 @@ using namespace Msp; int main(int argc, char **argv) { - bool debug=false; + bool debug = false; GetOpt getopt; getopt.add_option('d', "debug", debug, GetOpt::NO_ARG); getopt(argc, argv); - const vector &args=getopt.get_args(); + const vector &args = getopt.get_args(); if(args.empty()) { @@ -28,7 +28,7 @@ int main(int argc, char **argv) string line; while(getline(cin, line)) { - if(RegMatch match=regex.match(line)) + if(RegMatch match = regex.match(line)) cout<0x7F) return error(ch, buf, "Can't express character in ASCII"); - buf+=ch; + buf += ch; } void Ascii::Encoder::transliterate(UnicodeChar ch, string &buf) { if(ch>=0xC0 && ch<=0xC5) - buf+='A'; + buf += 'A'; else if(ch==0xC6) - buf+="AE"; + buf += "AE"; else if(ch==0xC7) - buf+='C'; + buf += 'C'; else if(ch>=0xC8 && ch<=0xCB) - buf+='E'; + buf += 'E'; else if(ch>=0xCC && ch<=0xCF) - buf+='I'; + buf += 'I'; else if(ch==0xD0) - buf+='D'; + buf += 'D'; else if(ch==0xD1) - buf+='N'; + buf += 'N'; else if((ch>=0xD2 && ch<=0xD7) || ch==0xD9) - buf+='O'; + buf += 'O'; else if(ch==0xD8) - buf+='x'; + buf += 'x'; else if(ch>=0xDA && ch<=0xDC) - buf+='U'; + buf += 'U'; else if(ch==0xDD) - buf+='Y'; + buf += 'Y'; else if(ch==0xDE) - buf+='T'; + buf += 'T'; else if(ch==0xDF) - buf+="ss"; + buf += "ss"; else if(ch>=0xE0 && ch<=0xE5) - buf+='a'; + buf += 'a'; else if(ch==0xE6) - buf+="ae"; + buf += "ae"; else if(ch==0xE7) - buf+='c'; + buf += 'c'; else if(ch>=0xE8 && ch<=0xEB) - buf+='e'; + buf += 'e'; else if(ch>=0xEC && ch<=0xEF) - buf+='i'; + buf += 'i'; else if(ch==0xF0) - buf+='d'; + buf += 'd'; else if(ch==0xF1) - buf+='n'; + buf += 'n'; else if((ch>=0xF2 && ch<=0xF7) || ch==0xF9) - buf+='o'; + buf += 'o'; else if(ch==0xF8) - buf+='/'; + buf += '/'; else if(ch>=0xFA && ch<=0xFC) - buf+='u'; + buf += 'u'; else if(ch==0xFD) - buf+='y'; + buf += 'y'; else if(ch==0xFE) - buf+='t'; + buf += 't'; else if(ch==0xFF) - buf+='y'; + buf += 'y'; else - buf+='?'; + buf += '?'; } @@ -101,7 +101,7 @@ UnicodeChar Ascii::Decoder::decode_char(const string &str, string::const_iterato return error("No input"); else if(*i&0x80) { - UnicodeChar result=error("Undefined ASCII character"); + UnicodeChar result = error("Undefined ASCII character"); ++i; return result; } diff --git a/source/ascii.h b/source/ascii.h index ecff0db..0fee50e 100644 --- a/source/ascii.h +++ b/source/ascii.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "ASCII"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/codec.cpp b/source/codec.cpp index 42315e5..0014847 100644 --- a/source/codec.cpp +++ b/source/codec.cpp @@ -23,8 +23,8 @@ namespace Codecs { bool Codec::detect(const string &str) const { - Decoder *dec=create_decoder(); - bool result=true; + Decoder *dec = create_decoder(); + bool result = true; try { for(string::const_iterator i=str.begin(); i!=str.end(); ) @@ -32,7 +32,7 @@ bool Codec::detect(const string &str) const } catch(const CodecError &) { - result=false; + result = false; } delete dec; @@ -72,9 +72,9 @@ void Codec::Decoder::decode(const string &str, ustring &buf) { for(string::const_iterator i=str.begin(); i!=str.end();) { - UnicodeChar c=decode_char(str, i); + UnicodeChar c = decode_char(str, i); if(c!=-1) - buf+=c; + buf += c; } } @@ -104,9 +104,9 @@ Codec *create_codec(const string &n) for(string::const_iterator i=n.begin(); i!=n.end(); ++i) { if(isupper(*i)) - name+=tolower(*i); + name += tolower(*i); else if(islower(*i) || isdigit(*i)) - name+=*i; + name += *i; } if(name=="ascii") return new Ascii; @@ -123,32 +123,32 @@ Codec *create_codec(const string &n) Codec *detect_codec(const string &str) { - bool is_utf8=true; - bool is_ascii=true; - bool is_latin1=true; - unsigned utf8_mb=0; + bool is_utf8 = true; + bool is_ascii = true; + bool is_latin1 = true; + unsigned utf8_mb = 0; for(string::const_iterator i=str.begin(); i!=str.end(); ++i) { - unsigned char c=*i; + unsigned char c = *i; if(c&0x80) { - is_ascii=false; + is_ascii = false; if((c&0xC0)==0x80) { if((c&0xE0)==0x80) - is_latin1=false; + is_latin1 = false; if(utf8_mb) --utf8_mb; else - is_utf8=false; + is_utf8 = false; } else if((c&0xC0)==0xC0) { if(utf8_mb) { - is_utf8=false; - utf8_mb=0; + is_utf8 = false; + utf8_mb = 0; } else { @@ -158,8 +158,8 @@ Codec *detect_codec(const string &str) } else if(utf8_mb) { - is_utf8=false; - utf8_mb=0; + is_utf8 = false; + utf8_mb = 0; } } diff --git a/source/codec.h b/source/codec.h index e8b172e..e04e909 100644 --- a/source/codec.h +++ b/source/codec.h @@ -56,62 +56,41 @@ public: */ class Encoder { + protected: + ErrorMode err_mode; + + Encoder(ErrorMode em): err_mode(em) { } public: virtual ~Encoder() { } - /** - Encodes a single unicode character. If the character can't be - represented in this encoding, behavior depends on the error mode - specified for the encoder: - - For THROW_ON_ERROR, an exception is thrown. - - For IGNORE_ERRORS, nothing is done. - - For TRANSLITERATE, the encoder attempts to select a character or a string - or characters that closely approximates the non-representable character. - */ - virtual void encode_char(UnicodeChar ch, std::string &buf) =0; + /** Encodes a single unicode character. If the character can't be + represented in this encoding, error() should be called. */ + virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; - /** - Encodes a unicode string. This is equivalent to callind encode_char for - each character in the string with the same buffer. - */ + /** Encodes a unicode string. This is equivalent to calling encode_char + for each character in the string with the same buffer. */ virtual void encode(const ustring &str, std::string &buf); std::string encode(const ustring &); - /** - Procuces a sequence of bytes that will bring the encoder back to the - initial state. - */ + /** Procuces a sequence of bytes that will bring the encoder back to the + initial state. */ virtual void sync(std::string &buf) { (void)buf; } - /** - Resets the encoder to the initial state without producing output. - */ + /** Resets the encoder to the initial state without producing output. */ virtual void reset() { } - protected: - ErrorMode err_mode; - - Encoder(ErrorMode em): err_mode(em) { } - - /** - Handles an error depending on the error mode. - - For THROW_ON_ERROR, throws CodecError(msg). - For IGNORE_ERROR, does nothing. + protected: + /** Handles an error depending on the error mode. - For TRANSLITERATE, calls transliterate(ch, buf). - */ + THROW_ON_ERROR: throws CodecError(msg) + IGNORE_ERRORS: does nothing + TRANSLITERATE: calls transliterate(ch, buf) */ void error(UnicodeChar ch, std::string &buf, const std::string &msg); - /** - Attempts to produce an alternative encoding for a unicode character. - Typically this includes dropping accent marks or romanizing letters. - */ - virtual void transliterate(UnicodeChar ch, std::string &buf) =0; + /** Attempts to produce an alternative encoding for a unicode character. + Typically this includes dropping accent marks or romanizing letters. */ + virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; }; /** @@ -121,81 +100,63 @@ public: */ class Decoder { + protected: + ErrorMode err_mode; + + Decoder(ErrorMode em): err_mode(em) { } public: virtual ~Decoder() { } - /** - Decodes a single character from a string. The iterator is advanced to - the next character. For stateful codecs, -1 may be returned if a state - change sequence was decoded but no character followed it. In case a - decoding error occurs, behavior depends on the error mode specified for - the decoder: - - For THROW_ON_ERROR, an exception is thrown and the iterator is left at - the erroneous character. + /** Decodes a single character from a string. The iterator is advanced + to the next character. For stateful codecs, -1 may be returned if a + state change sequence was decoded but no character followed it. If + invalid input is encountered, the error() function should be called and + the iterator advanced only if it doesn't throw. */ + virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; - For IGNORE_ERRORS, -1 is returned and the iterator is advanced. - - For TRANSLITERATE, 0xFFFE is returned and the iterator is advanced. - */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0; - - /** - Decodes a string. - */ + /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); ustring decode(const std::string &); - /** - Resets the decoder to the initial state. - */ + /** Resets the decoder to the initial state. */ virtual void reset() { } - protected: - ErrorMode err_mode; - - Decoder(ErrorMode em): err_mode(em) { } - /** - Handles an error depending on the error mode. - */ - UnicodeChar error(const std::string &); + protected: + /** Handles an error depending on the error mode. The return value is + suitable for returning from decode_char. + + THROW_ON_ERROR: throws CodecError(msg) + IGNORE_ERRORS: returns -1 + TRANSLITERATE: return 0xFFFE */ + UnicodeChar error(const std::string &msg); }; +protected: + Codec() { } +public: virtual ~Codec() { } - /** - Returns the name of the encoding handled by this codec. - */ - virtual const char *get_name() const =0; + /** Returns the name of the encoding handled by this codec. */ + virtual const char *get_name() const = 0; - /** - Creates an encoder for this codec. - */ - virtual Encoder *create_encoder(ErrorMode err_mode=THROW_ON_ERROR) const =0; + /** Creates an encoder for this codec. */ + virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - /** - Creates a decoder for this codec. - */ - virtual Decoder *create_decoder(ErrorMode err_mode=THROW_ON_ERROR) const =0; + /** Creates a decoder for this codec. */ + virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - /** - Determines whether the given string can be successfully decoded with this - codec. Note that this function returning true does not guarantee that the - string was actually encoded with this codec. In particular, many 8-bit - encodings are indistinguishable. - */ + /** Determines whether the given string can be successfully decoded with + this codec. Note that this function returning true does not guarantee that + the string was actually encoded with this codec. In particular, many 8-bit + encodings are indistinguishable. */ virtual bool detect(const std::string &) const; -protected: - Codec() { } }; typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; -/** -Convenience function that decodes a string. -*/ +/** Convenience function that decodes a string. */ template ustring decode(const std::string &s) { @@ -205,9 +166,7 @@ ustring decode(const std::string &s) return result; } -/** -Convenience function that encodes a string. -*/ +/** Convenience function that encodes a string. */ template std::string encode(const ustring &s) { @@ -218,9 +177,7 @@ std::string encode(const ustring &s) return result; } -/** -Convenience function that transcodes a string from one codec to another. -*/ +/** Convenience function that transcodes a string from one codec to another. */ template std::string transcode(const std::string &s) { @@ -234,16 +191,12 @@ std::string transcode(const std::string &s) return result; } -/** -Creates a codec for an encoding by name. The caller is responsible for -deleting the codec when it's no longer needed. -*/ +/** Creates a codec for an encoding by name. The caller is responsible for +deleting the codec when it's no longer needed. */ Codec *create_codec(const std::string &); -/** -Automatically detects the encoding of a string and creates a codec for it. -The codec must be deleted when it's no longer needed. -*/ +/** Automatically detects the encoding of a string and creates a codec for it. +The codec must be deleted when it's no longer needed. */ Codec *detect_codec(const std::string &); } // namespace Codecs diff --git a/source/fmt.cpp b/source/fmt.cpp index 0c1531d..e06f71a 100644 --- a/source/fmt.cpp +++ b/source/fmt.cpp @@ -12,47 +12,6 @@ using namespace std; namespace Msp { -/** -Resets the format to the default. Mainly used by constructors. -*/ -Fmt &Fmt::reset() -{ - wd=0; - prec=6; - spos=false; - fillc=' '; - base=DEC; - sbase=false; - fmode=AUTOFLT; - spoint=false; - align=RIGHT; - ucase=false; - type=STR; - - return *this; -} - -/** -Applies the format to the given ostream. All existing formatting information -is overwritten. -*/ -void Fmt::apply(ostream &out) const -{ - out.flags(((base==HEX) ? ios_base::hex : (base==OCT) ? ios_base::oct : ios_base::dec) - | ((fmode==SCI) ? ios_base::scientific : (fmode==FIXED) ? ios_base::fixed : ios_base::fmtflags(0)) - | (fillc=='0' ? ios_base::internal : (align==LEFT) ? ios_base::left : ios_base::right) - | (sbase ? ios_base::showbase : ios_base::fmtflags(0)) - | (spoint ? ios_base::showpoint : ios_base::fmtflags(0)) - | (spos ? ios_base::showpos : ios_base::fmtflags(0)) - | (ucase ? ios_base::uppercase : ios_base::fmtflags(0))); - out.fill(fillc); - out.width(wd); - out.precision(prec); -} - -/** -Parses a printf-style conversion specification. Called from constructors. -*/ void Fmt::parse(const char *f) { if(*f=='%') ++f; @@ -61,24 +20,24 @@ void Fmt::parse(const char *f) { if(*f=='#') { - sbase=true; - spoint=true; + sbase = true; + spoint = true; } else if(*f=='0') - fillc='0'; + fillc = '0'; else if(*f=='-') - align=LEFT; + align = LEFT; else if(*f=='+') - spos=true; + spos = true; else break; } - wd=0; + wd = 0; for(; *f; ++f) { if(*f>='0' && *f<='9') - wd=wd*10+(*f-'0'); + wd = wd*10+(*f-'0'); else break; } @@ -86,47 +45,47 @@ void Fmt::parse(const char *f) if(*f=='.') { ++f; - prec=0; + prec = 0; for(; *f; ++f) { if(*f>='0' && *f<='9') - prec=prec*10+(*f-'0'); + prec = prec*10+(*f-'0'); else break; } } - type=NUM; + type = NUM; if(*f=='d' || *f=='u') - base=DEC; + base = DEC; else if(*f=='x' || *f=='X') - base=HEX; + base = HEX; else if(*f=='o') - base=OCT; + base = OCT; else if(*f=='b') - base=BIN; + base = BIN; else if(*f=='e' || *f=='E') - fmode=SCI; + fmode = SCI; else if(*f=='f' || *f=='F') - fmode=FIXED; + fmode = FIXED; else if(*f=='g' || *f=='G') - fmode=AUTOFLT; + fmode = AUTOFLT; else if(*f=='p' || *f=='P') { - base=HEX; - sbase=true; + base = HEX; + sbase = true; } else if(*f=='c') - type=CHAR; + type = CHAR; else if(*f=='s') - type=STR; + type = STR; else if(*f=='i') - base=AUTOBASE; + base = AUTOBASE; else throw InvalidParameterValue("Invalid conversion specifier"); if(*f=='E' || *f=='F' || *f=='G' || *f=='X' || *f=='P') - ucase=true; + ucase = true; ++f; @@ -134,4 +93,35 @@ void Fmt::parse(const char *f) throw InvalidParameterValue("Extra characters in conversion specification"); } +Fmt &Fmt::reset() +{ + wd = 0; + prec = 6; + spos = false; + fillc = ' '; + base = DEC; + sbase = false; + fmode = AUTOFLT; + spoint = false; + align = RIGHT; + ucase = false; + type = STR; + + return *this; +} + +void Fmt::apply(ostream &out) const +{ + out.flags(((base==HEX) ? ios_base::hex : (base==OCT) ? ios_base::oct : ios_base::dec) + | ((fmode==SCI) ? ios_base::scientific : (fmode==FIXED) ? ios_base::fixed : ios_base::fmtflags(0)) + | (fillc=='0' ? ios_base::internal : (align==LEFT) ? ios_base::left : ios_base::right) + | (sbase ? ios_base::showbase : ios_base::fmtflags(0)) + | (spoint ? ios_base::showpoint : ios_base::fmtflags(0)) + | (spos ? ios_base::showpos : ios_base::fmtflags(0)) + | (ucase ? ios_base::uppercase : ios_base::fmtflags(0))); + out.fill(fillc); + out.width(wd); + out.precision(prec); +} + } // namespace Msp diff --git a/source/fmt.h b/source/fmt.h index 7dc605f..ba75958 100644 --- a/source/fmt.h +++ b/source/fmt.h @@ -66,58 +66,60 @@ public: }; private: - unsigned wd; - unsigned prec; - bool spos; - wchar_t fillc; - Base base; - bool sbase; + unsigned wd; + unsigned prec; + bool spos; + wchar_t fillc; + Base base; + bool sbase; FloatMode fmode; - bool spoint; - Align align; - bool ucase; - Type type; + bool spoint; + Align align; + bool ucase; + Type type; public: - Fmt() { reset(); } - Fmt(const char *f) { reset(); parse(f); } + Fmt() { reset(); } + Fmt(const char *f) { reset(); parse(f); } Fmt(const std::string &f) { reset(); parse(f.c_str()); } - Fmt &width(unsigned w) { wd=w; return *this; } - Fmt &precision(unsigned p) { prec=p; return *this; } - Fmt &showpos(bool s=true) { spos=s; return *this; } - Fmt &fill(wchar_t f) { fillc=f; return *this; } - Fmt &fixed() { fmode=FIXED; return *this; } - Fmt &scientific() { fmode=SCI; return *this; } - Fmt &showpoint(bool s=true) { spoint=s; return *this; } - Fmt &showbase(bool s=true) { sbase=s; return *this; } - Fmt &left() { align=LEFT; return *this; } - Fmt &right() { align=RIGHT; return *this; } - Fmt &dec() { base=DEC; return *this; } - Fmt &hex() { base=HEX; return *this; } - Fmt &oct() { base=OCT; return *this; } - Fmt &bin() { base=BIN; return *this; } - Fmt &uppercase(bool u=true) { ucase=u; return *this; } - Fmt &numeric() { type=NUM; return *this; } - Fmt &character() { type=CHAR; return *this; } - Fmt &string() { type=STR; return *this; } +private: + void parse(const char *); + +public: + Fmt &width(unsigned w) { wd = w; return *this; } + Fmt &precision(unsigned p) { prec = p; return *this; } + Fmt &showpos(bool s=true) { spos = s; return *this; } + Fmt &fill(wchar_t f) { fillc = f; return *this; } + Fmt &fixed() { fmode = FIXED; return *this; } + Fmt &scientific() { fmode = SCI; return *this; } + Fmt &showpoint(bool s=true) { spoint = s; return *this; } + Fmt &showbase(bool s=true) { sbase = s; return *this; } + Fmt &left() { align = LEFT; return *this; } + Fmt &right() { align = RIGHT; return *this; } + Fmt &dec() { base = DEC; return *this; } + Fmt &hex() { base = HEX; return *this; } + Fmt &oct() { base = OCT; return *this; } + Fmt &bin() { base = BIN; return *this; } + Fmt &uppercase(bool u=true) { ucase = u; return *this; } + Fmt &numeric() { type = NUM; return *this; } + Fmt &character() { type = CHAR; return *this; } + Fmt &string() { type = STR; return *this; } Fmt &reset(); - unsigned get_width() const { return wd; } - unsigned get_precision() const { return prec; } - bool get_showpos() const { return spos; } - wchar_t get_fill() const { return fillc; } - Base get_base() const { return base; } - bool get_showbase() const { return sbase; } + unsigned get_width() const { return wd; } + unsigned get_precision() const { return prec; } + bool get_showpos() const { return spos; } + wchar_t get_fill() const { return fillc; } + Base get_base() const { return base; } + bool get_showbase() const { return sbase; } FloatMode get_floatmode() const { return fmode; } - bool get_showpoint() const { return spoint; } - Align get_align() const { return align; } - bool get_uppercase() const { return ucase; } - Type get_type() const { return type; } + bool get_showpoint() const { return spoint; } + Align get_align() const { return align; } + bool get_uppercase() const { return ucase; } + Type get_type() const { return type; } void apply(std::ostream &) const; -private: - void parse(const char *); }; inline std::ostream &operator<<(std::ostream &o, const Fmt &f) diff --git a/source/formatter.cpp b/source/formatter.cpp index c7b24a5..d9d311d 100644 --- a/source/formatter.cpp +++ b/source/formatter.cpp @@ -48,7 +48,7 @@ void Formatter::advance() break; } - result+=*pos; + result += *pos; } } @@ -61,7 +61,7 @@ Fmt Formatter::get_conversion() if(pos==fmt.end()) throw Exception("Too many arguments for format"); - string::iterator i=pos; + string::iterator i = pos; for(; i!=fmt.end(); ++i) if(isalpha(*i)) break; @@ -71,7 +71,7 @@ Fmt Formatter::get_conversion() ++i; string c(pos, i); - pos=i; + pos = i; return Fmt(c); } diff --git a/source/formatter.h b/source/formatter.h index 521dd55..cde508b 100644 --- a/source/formatter.h +++ b/source/formatter.h @@ -18,29 +18,28 @@ Printf-like string formatter class. */ class Formatter { +private: + std::string fmt; + std::string::iterator pos; + std::string result; + public: Formatter(const std::string &); - /** - Extracts the next conversion from the format string and formats the given - value with it. Will throw if no more conversions are found. - */ + /** Extracts the next conversion from the format string and formats the + given value with it. Will throw if no more conversions are found. */ template Formatter &operator()(const T &a) { - result+=lexical_cast(a, get_conversion()); + result += lexical_cast(a, get_conversion()); advance(); return *this; } const std::string &str() const; private: - std::string fmt; - std::string::iterator pos; - std::string result; - void advance(); - Fmt get_conversion(); + Fmt get_conversion(); }; inline Formatter format(const std::string &f) diff --git a/source/glob.cpp b/source/glob.cpp index 359fdfa..0af6b9b 100644 --- a/source/glob.cpp +++ b/source/glob.cpp @@ -4,6 +4,7 @@ This file is part of libmspstrings Copyright © 2007 Mikko Rasa Distributed under the LGPL */ + #include "glob.h" using namespace std; diff --git a/source/glob.h b/source/glob.h index b487372..7942073 100644 --- a/source/glob.h +++ b/source/glob.h @@ -4,6 +4,7 @@ This file is part of libmspstrings Copyright © 2007 Mikko Rasa Distributed under the LGPL */ + #ifndef MSP_STRINGS_GLOB_H_ #define MSP_STRINGS_GLOB_H_ diff --git a/source/iso2022jp.cpp b/source/iso2022jp.cpp index 4d3a1df..d9c7cea 100644 --- a/source/iso2022jp.cpp +++ b/source/iso2022jp.cpp @@ -20,33 +20,33 @@ void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf) { if(mode!=ASCII && mode!=JISX0201) switch_mode(ASCII, buf); - buf+=ch; + buf += ch; } else if(ch==0x5C || ch==0x7E) { if(mode!=ASCII) switch_mode(ASCII, buf); - buf+=ch; + buf += ch; } else if(ch==0xA5 || ch==0x203E) { if(mode!=JISX0201) switch_mode(JISX0201, buf); if(ch==0xA5) - buf+=0x5C; + buf += 0x5C; else if(ch==0x203E) - buf+=0x7E; + buf += 0x7E; } else { - Kuten jis=ucs_to_jisx0208(ch); + Kuten jis = ucs_to_jisx0208(ch); if(!jis) return error(ch, buf, "Can't express character in ISO-2022-JP"); if(mode!=JISX0208) switch_mode(JISX0208, buf); - char jbuf[2]={jis.ku+0x20, jis.ten+0x20}; + char jbuf[2] = {jis.ku+0x20, jis.ten+0x20}; buf.append(jbuf, 2); } } @@ -59,12 +59,12 @@ void Iso2022Jp::Encoder::sync(string &buf) void Iso2022Jp::Encoder::reset() { - mode=ASCII; + mode = ASCII; } void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf) { - mode=m; + mode = m; switch(mode) { case ASCII: buf.append("\033(B", 3); break; @@ -78,7 +78,7 @@ void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf) { if(mode!=ASCII) switch_mode(ASCII, buf); - buf+='?'; + buf += '?'; } @@ -95,33 +95,33 @@ UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_ite while(i!=str.end()) { - string::const_iterator j=i; + string::const_iterator j = i; - UnicodeChar result=-1; + UnicodeChar result = -1; if(*j==033) { - unsigned escape=0; + unsigned escape = 0; for(++j; j!=str.end(); ++j) { - escape=escape<<8 | static_cast(*j); + escape = escape<<8 | static_cast(*j); if(*j>='@' && *j<='Z') break; } - bool ok=true; + bool ok = true; switch(escape) { case 0x2842: switch_mode(ASCII); break; // ESC ( B case 0x284A: switch_mode(JISX0201); break; // ESC ( J case 0x2440: // ESC $ @ case 0x2442: switch_mode(JISX0208); break; // ESC $ B - default: ok=false; + default: ok = false; } if(ok) - i=j; + i = j; else - result=*i; + result = *i; ++i; } else if(dec) @@ -139,20 +139,20 @@ UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_ite void Iso2022Jp::Decoder::reset() { delete dec; - mode=ASCII; - dec=new Ascii::Decoder; + mode = ASCII; + dec = new Ascii::Decoder; } void Iso2022Jp::Decoder::switch_mode(Mode m) { delete dec; - mode=m; + mode = m; switch(mode) { - case ASCII: dec=new Ascii::Decoder; break; - case JISX0201: dec=new JisX0201::Decoder; break; - case JISX0208: dec=new JisX0208::Decoder; break; + case ASCII: dec = new Ascii::Decoder; break; + case JISX0201: dec = new JisX0201::Decoder; break; + case JISX0208: dec = new JisX0208::Decoder; break; } } diff --git a/source/iso2022jp.h b/source/iso2022jp.h index 4a452d4..8a95f9d 100644 --- a/source/iso2022jp.h +++ b/source/iso2022jp.h @@ -25,35 +25,39 @@ public: class Encoder: public Codec::Encoder { + private: + Mode mode; + public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em), mode(ASCII) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em), mode(ASCII) { } + virtual void encode_char(UnicodeChar, std::string &); virtual void sync(std::string &); virtual void reset(); private: - Mode mode; - void switch_mode(Mode, std::string &); virtual void transliterate(UnicodeChar, std::string &); }; class Decoder: public Codec::Decoder { + private: + Mode mode; + Codec::Decoder *dec; + public: Decoder(ErrorMode =THROW_ON_ERROR); + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); virtual void reset(); private: - Mode mode; - Codec::Decoder *dec; - - virtual void switch_mode(Mode); + void switch_mode(Mode); }; virtual const char *get_name() const { return "ISO-2022-JP"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/iso646fi.cpp b/source/iso646fi.cpp index 779eadf..100ce13 100644 --- a/source/iso646fi.cpp +++ b/source/iso646fi.cpp @@ -12,7 +12,7 @@ using namespace std; namespace { -const unsigned map_size=9; +const unsigned map_size = 9; const int mapping[map_size*2]= { @@ -27,23 +27,24 @@ const int mapping[map_size*2]= 0xFC, 0x7E }; -} // namespace +} + namespace Msp { namespace Codecs { void Iso646Fi::Encoder::encode_char(UnicodeChar ch, string &buf) { - int tch=transform_mapping_or_direct(mapping, map_size, ch, false); + int tch = transform_mapping_or_direct(mapping, map_size, ch, false); if(tch<0 || tch>0x7F) error(ch, buf, "Can't express character in ISO-646-FI"); else - buf+=tch; + buf += tch; } void Iso646Fi::Encoder::transliterate(UnicodeChar, string &buf) { - buf+='?'; + buf += '?'; } @@ -52,14 +53,14 @@ UnicodeChar Iso646Fi::Decoder::decode_char(const string &str, string::const_iter if(i==str.end()) return error("No input"); - unsigned char ch=*i; - int tch=(ch<=0x7F ? transform_mapping_or_direct(mapping, map_size, ch, true) : -1); + unsigned char ch = *i; + int tch = (ch<=0x7F ? transform_mapping_or_direct(mapping, map_size, ch, true) : -1); UnicodeChar result; if(tch==-1) - result=error("Undefined ISO-646-FI character"); + result = error("Undefined ISO-646-FI character"); else - result=tch; + result = tch; ++i; return result; diff --git a/source/iso646fi.h b/source/iso646fi.h index 3047edf..084e57f 100644 --- a/source/iso646fi.h +++ b/source/iso646fi.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "ISO-646-FI"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/iso88591.cpp b/source/iso88591.cpp index 17f84b1..064d4a7 100644 --- a/source/iso88591.cpp +++ b/source/iso88591.cpp @@ -17,12 +17,12 @@ void Iso88591::Encoder::encode_char(UnicodeChar ch, string &buf) if(ch<0 || ch>0xFF) return error(ch, buf, "Can't express character in ISO-8859-1"); - buf+=ch; + buf += ch; } void Iso88591::Encoder::transliterate(UnicodeChar, string &buf) { - buf+='?'; + buf += '?'; } diff --git a/source/iso88591.h b/source/iso88591.h index 80c706b..4365c0d 100644 --- a/source/iso88591.h +++ b/source/iso88591.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "ISO-8859-1"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/iso885915.cpp b/source/iso885915.cpp index b0fc2ef..3eccbcf 100644 --- a/source/iso885915.cpp +++ b/source/iso885915.cpp @@ -12,7 +12,7 @@ using namespace std; namespace { -const unsigned map_size=8; +const unsigned map_size = 8; const int mapping[map_size*2]= { @@ -28,22 +28,23 @@ const int mapping[map_size*2]= } + namespace Msp { namespace Codecs { void Iso885915::Encoder::encode_char(UnicodeChar ch, string &buf) { - int tch=transform_mapping_or_direct(mapping, map_size, ch, false); + int tch = transform_mapping_or_direct(mapping, map_size, ch, false); if(tch<0 || tch>0xFF) error(ch, buf, "Can't express character in ISO-8859-15"); else - buf+=tch; + buf += tch; } void Iso885915::Encoder::transliterate(UnicodeChar, string &buf) { - buf+='?'; + buf += '?'; } @@ -52,14 +53,14 @@ UnicodeChar Iso885915::Decoder::decode_char(const string &str, string::const_ite if(i==str.end()) return error("No input"); - unsigned char ch=*i; - int tch=transform_mapping_or_direct(mapping, map_size, ch, true); + unsigned char ch = *i; + int tch = transform_mapping_or_direct(mapping, map_size, ch, true); UnicodeChar result; if(tch==-1) - result=error("Undefined ISO-8859-15 character"); + result = error("Undefined ISO-8859-15 character"); else - result=tch; + result = tch; ++i; return result; diff --git a/source/iso885915.h b/source/iso885915.h index c42ad19..3237c84 100644 --- a/source/iso885915.h +++ b/source/iso885915.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "ISO-8859-15"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/jisx0201.cpp b/source/jisx0201.cpp index 9c1e929..d3fe635 100644 --- a/source/jisx0201.cpp +++ b/source/jisx0201.cpp @@ -15,20 +15,20 @@ namespace Codecs { void JisX0201::Encoder::encode_char(UnicodeChar ch, string &buf) { if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E) - buf+=ch; + buf += ch; else if(ch==0xA5) - buf+=0x5C; + buf += 0x5C; else if(ch==0x203E) - buf+=0x7E; + buf += 0x7E; else if(ch>=0xFF61 && ch<=0xFF9F) - buf+=ch-0xFEC0; + buf += ch-0xFEC0; else error(ch, buf, "Can't express character in JIS X 0201"); } void JisX0201::Encoder::transliterate(UnicodeChar, string &buf) { - buf+='?'; + buf += '?'; } @@ -37,18 +37,18 @@ UnicodeChar JisX0201::Decoder::decode_char(const string &str, string::const_iter if(i==str.end()) return error("No input"); - unsigned char ch=*i; + unsigned char ch = *i; UnicodeChar result; if(ch==0x5C) - result=0xA5; + result = 0xA5; else if(ch==0x7E) - result=0x203E; + result = 0x203E; else if(ch<=0x7F) - result=ch; + result = ch; else if(ch>=0xA1 && ch<=0xDF) - result=ch+0xFEC0; + result = ch+0xFEC0; else - result=error("Undefined JIS X 0201 character"); + result = error("Undefined JIS X 0201 character"); ++i; return result; diff --git a/source/jisx0201.h b/source/jisx0201.h index 180f710..41332de 100644 --- a/source/jisx0201.h +++ b/source/jisx0201.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "JIS X 0201"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/jisx0208.cpp b/source/jisx0208.cpp index 0dd0d21..6b46b1b 100644 --- a/source/jisx0208.cpp +++ b/source/jisx0208.cpp @@ -16,10 +16,10 @@ namespace Codecs { void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf) { - unsigned short jis=ucs_to_jisx0208(ucs); + unsigned short jis = ucs_to_jisx0208(ucs); if(jis) { - char jbuf[2]={jis>>8, jis}; + char jbuf[2] = {jis>>8, jis}; buf.append(jbuf, 2); } else @@ -37,22 +37,22 @@ UnicodeChar JisX0208::Decoder::decode_char(const string &str, string::const_iter if(i==str.end()) return error("No input"); - string::const_iterator j=i; + string::const_iterator j = i; Kuten jis; - jis.ku=*j++-0x20; + jis.ku = *j++-0x20; UnicodeChar result; if(j==str.end()) - result=error("Incomplete JIS X 0208 character"); + result = error("Incomplete JIS X 0208 character"); else { - jis.ten=*j++-0x20; - result=jisx0208_to_ucs(jis); + jis.ten = *j++-0x20; + result = jisx0208_to_ucs(jis); if(result==0) - result=error("Undefined JIS X 0208 character"); + result = error("Undefined JIS X 0208 character"); } - i=j; + i = j; return result; } @@ -70,20 +70,20 @@ Kuten ucs_to_jisx0208(UnicodeChar c) if(c<0 || c>0xFFFF) return Kuten(); - unsigned i=0; + unsigned i = 0; for(unsigned bit=0x1000; bit; bit>>=1) { if(i+bit>=ucs_to_jisx0208_table_size) continue; if(ucs_to_jisx0208_table[i+bit].ucs<=static_cast(c)) - i+=bit; + i += bit; } Kuten result; if(ucs_to_jisx0208_table[i].ucs==static_cast(c)) { - result.ku=(ucs_to_jisx0208_table[i].jis>>8)+1; - result.ten=ucs_to_jisx0208_table[i].jis+1; + result.ku = (ucs_to_jisx0208_table[i].jis>>8)+1; + result.ten = ucs_to_jisx0208_table[i].jis+1; } return result; diff --git a/source/jisx0208.h b/source/jisx0208.h index 27609e6..0c4c4a6 100644 --- a/source/jisx0208.h +++ b/source/jisx0208.h @@ -24,7 +24,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -33,22 +34,25 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "JIS X 0208"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; + struct Kuten { unsigned short ku; unsigned short ten; Kuten(): ku(0), ten(0) { } + operator bool() { return ku!=0 && ten!=0; } }; diff --git a/source/lexicalcast.cpp b/source/lexicalcast.cpp index a1270d6..5903bb1 100644 --- a/source/lexicalcast.cpp +++ b/source/lexicalcast.cpp @@ -17,9 +17,9 @@ using namespace Msp; template struct IsSigned -{ enum { result=!(static_cast(-1)>0) }; }; +{ enum { result = !(static_cast(-1)>0) }; }; -templatesizeof(unsigned long))> +templatesizeof(unsigned long))> struct Temporary { typedef unsigned long Type; }; @@ -34,7 +34,7 @@ struct Temporary }; /* Helper to avoid warnings about an unsigned type never being < 0 */ -template::result> +template::result> struct IsNegative { static bool eval(T v) { return v<0; } }; @@ -44,7 +44,7 @@ struct IsNegative /* Helper to avoid errors about ambiguous function calls since there are no overloads of abs for unsigned types */ -template::result> +template::result> struct Absolute { static T eval(T v) { return v<0 ? -v : v; } }; @@ -55,63 +55,63 @@ struct Absolute /*** Integer conversions ***/ -const char udigits[]="0123456789ABCDEF"; -const char ldigits[]="0123456789abcdef"; +const char udigits[] = "0123456789ABCDEF"; +const char ldigits[] = "0123456789abcdef"; template char *int_to_str(T v, const Fmt &f, char *end) { if(f.get_type()==Fmt::CHAR) { - *--end=v; + *--end = v; return end; } - char *ptr=end; + char *ptr = end; // Find out the base to use - unsigned base=f.get_base(); + unsigned base = f.get_base(); if(!base) - base=10; + base = 10; // Format the number, starting from the least significant digit - const char *digits=(f.get_uppercase() ? udigits : ldigits); + const char *digits = (f.get_uppercase() ? udigits : ldigits); if(v) { - typename Temporary::Type w=Absolute::eval(v); + typename Temporary::Type w = Absolute::eval(v); while(w) { - *--ptr=digits[w%base]; - w/=base; + *--ptr = digits[w%base]; + w /= base; } } else - *--ptr=digits[0]; + *--ptr = digits[0]; - char sign=(IsNegative::eval(v) ? '-' : f.get_showpos() ? '+' : 0); + char sign = (IsNegative::eval(v) ? '-' : f.get_showpos() ? '+' : 0); if(f.get_fill()=='0') { /* Zero-fill, taking base/sign size into account. The expression is a bit ugly, but saves having to write code for creating the prefix both ways. */ - unsigned pfxsize=((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0); + unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0); for(unsigned i=(end-ptr)+pfxsize; i string int_to_str(T v, const Fmt &f) { - unsigned size=max(f.get_width(), max(f.get_precision(), sizeof(T)*8+3)); - char *buf=new char[size]; + unsigned size = max(f.get_width(), max(f.get_precision(), sizeof(T)*8+3)); + char *buf = new char[size]; string result(int_to_str(v, f, buf+size), buf+size); delete[] buf; return result; @@ -132,15 +132,15 @@ T str_to_int(const std::string &s, const Fmt &f) if(s.empty()) throw LexicalError("Empty input in integer conversion"); - std::string::const_iterator i=s.begin(); + std::string::const_iterator i = s.begin(); // See if the input starts with a sign - bool neg=false; + bool neg = false; if(*i=='-') { if(!IsSigned::result) throw LexicalError("Negative sign in unsigned integer conversion"); - neg=true; + neg = true; ++i; } else if(*i=='+') @@ -150,7 +150,7 @@ T str_to_int(const std::string &s, const Fmt &f) if(i==s.end()) throw LexicalError("Missing digits in integer conversion"); - T base=f.get_base(); + T base = f.get_base(); if(!base && i!=s.end()) { // Automatic base detection requested, figure it out @@ -158,42 +158,42 @@ T str_to_int(const std::string &s, const Fmt &f) { if(*i=='x' || *i=='X') { - base=16; + base = 16; ++i; } else if(*i=='b' || *i=='B') { - base=2; + base = 2; ++i; } else - base=8; + base = 8; } else - base=10; + base = 10; } // Parse the digits - T result=0; + T result = 0; for(; i!=s.end(); ++i) { - T digit=base; + T digit = base; if(*i>='0' && *i<='9') - digit=*i-'0'; + digit = *i-'0'; else if(*i>='A' && *i<='F') - digit=*i-'A'+10; + digit = *i-'A'+10; else if(*i>='a' && *i<='f') - digit=*i-'a'+10; + digit = *i-'a'+10; if(digit>=base) throw LexicalError("Invalid digit in integer conversion"); - T next=result*base+digit; + T next = result*base+digit; if(next/base!=result) throw LexicalError("Overflow in integer conversion"); - result=next; + result = next; } if(neg) - result=-result; + result = -result; return result; } @@ -231,89 +231,89 @@ string flt_to_str(T v, const Fmt &f) if(f.get_type()==Fmt::CHAR) throw LexicalError("Character format in floating-point conversion"); - Fmt::FloatMode mode=f.get_floatmode(); - long double w=abs(v); - char sign=(v<0 ? '-' : f.get_showpos() ? '+' : 0); + Fmt::FloatMode mode = f.get_floatmode(); + long double w = abs(v); + char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0); // Handle infinity and not-a-number as special cases if(!(w+w>w) && w!=0) { string result; if(sign) - result+=sign; + result += sign; if(!(w>=0)) - result+=(f.get_uppercase() ? "NAN" : "nan"); + result += (f.get_uppercase() ? "NAN" : "nan"); else - result+=(f.get_uppercase() ? "INF" : "inf"); + result += (f.get_uppercase() ? "INF" : "inf"); if(result.size()=10) { - long double div=1; + long double div = 1; while(div*10=static_cast(digits)) { - point=1; - showexp=true; + point = 1; + showexp = true; } else { - point=max(exp, 0)+1; + point = max(exp, 0)+1; if(exp<0) - digits+=-exp; + digits += -exp; } } // Apply rounding - w+=5.0l/pow(10.0l, static_cast(digits)); + w += 5.0l/pow(10.0l, static_cast(digits)); if(w>10) { // Rounding bumped us to the next exponent, deal with it - w/=10; + w /= 10; if(mode==Fmt::AUTOFLT && exp+1==static_cast(digits)) { - point=1; - showexp=true; + point = 1; + showexp = true; } if(!showexp) { @@ -325,41 +325,41 @@ string flt_to_str(T v, const Fmt &f) } // Create a buffer and start from the end - unsigned size=max(f.get_width(), digits+8); - char *buf=new char[size]; - char *end=buf+size; - char *ptr=end; + unsigned size = max(f.get_width(), digits+8); + char *buf = new char[size]; + char *end = buf+size; + char *ptr = end; // Format exponent if(showexp) { - ptr=int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr); - *--ptr=(f.get_uppercase() ? 'E' : 'e'); + ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr); + *--ptr = (f.get_uppercase() ? 'E' : 'e'); } // Format mantissa left-to-right - char *eptr=ptr; - ptr-=digits+(point(i)>=-exp) { - int digit=static_cast(w); - *mptr++='0'+digit; - w=(w-digit)*10; + int digit = static_cast(w); + *mptr++ = '0'+digit; + w = (w-digit)*10; } else - *mptr++='0'; + *mptr++ = '0'; } if(f.get_showpoint()) { // Radix point requested but not displayed yet, add it if(digits<=point) - *mptr++='.'; + *mptr++ = '.'; } else if(mode==Fmt::AUTOFLT && digits>point) { @@ -371,20 +371,20 @@ string flt_to_str(T v, const Fmt &f) if(mptr!=eptr) { while(mptr!=ptr) - *--eptr=*--mptr; - ptr=eptr; + *--eptr = *--mptr; + ptr = eptr; } } // Add filling and sign if(f.get_fill()=='0') { - unsigned pfxlen=(sign!=0); + unsigned pfxlen = (sign!=0); while(end-ptr+pfxlen='0' && *i<='9') { - v=v*10+(*i-'0'); + v = v*10+(*i-'0'); if(point_seen) --exp; } @@ -437,7 +437,7 @@ T str_to_flt(const string &s, const Fmt &) // We have an exponent ++i; - exp+=str_to_int(string(i, s.end()), Fmt()); + exp += str_to_int(string(i, s.end()), Fmt()); // str_to_int has eaten the rest of the input or thrown break; } @@ -448,17 +448,17 @@ T str_to_flt(const string &s, const Fmt &) // Scale and negate the result as needed while(exp>0) { - v*=10; + v *= 10; --exp; } while(exp<0) { - v/=10; + v /= 10; ++exp; } if(neg) - v=-v; + v = -v; return v; } @@ -482,12 +482,12 @@ void LexicalConverter::result(const string &s) if(s.size()>(const LexicalConverter &c, char &v) { if(c.get_fmt().get_type()==Fmt::NUM) - v=str_to_int(c.get(), c.get_fmt()); + v = str_to_int(c.get(), c.get_fmt()); else { - const std::string &s=c.get(); + const std::string &s = c.get(); if(s.empty()) throw LexicalError("Empty input in character conversion"); if(s.size()>1) throw LexicalError("Extra input in character conversion"); - v=s[0]; + v = s[0]; } } void operator>>(const LexicalConverter &c, signed char &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, short &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, int &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, long &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, unsigned char &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, unsigned short &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, unsigned int &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, unsigned long &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } #ifdef __GNUC__ void operator>>(const LexicalConverter &c, long long &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, unsigned long long &v) -{ v=str_to_int(c.get(), c.get_fmt()); } +{ v = str_to_int(c.get(), c.get_fmt()); } #endif void operator>>(const LexicalConverter &c, bool &v) -{ v=str_to_bool(c.get()); } +{ v = str_to_bool(c.get()); } void operator>>(const LexicalConverter &c, float &v) -{ v=str_to_flt(c.get(), c.get_fmt()); } +{ v = str_to_flt(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, double &v) -{ v=str_to_flt(c.get(), c.get_fmt()); } +{ v = str_to_flt(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, long double &v) -{ v=str_to_flt(c.get(), c.get_fmt()); } +{ v = str_to_flt(c.get(), c.get_fmt()); } void operator>>(const LexicalConverter &c, string &s) -{ s=str_to_str(c.get(), c.get_fmt()); } +{ s = str_to_str(c.get(), c.get_fmt()); } } // namespace Msp diff --git a/source/lexicalcast.h b/source/lexicalcast.h index 42393fe..3bfc342 100644 --- a/source/lexicalcast.h +++ b/source/lexicalcast.h @@ -24,6 +24,7 @@ public: LexicalError(const std::string &w_): Exception(w_) { } }; + /** Helper class for lexical_cast to facilitate operator overloading. */ @@ -42,6 +43,7 @@ public: void result(const std::string &); }; + void operator<<(LexicalConverter &, char); void operator<<(LexicalConverter &, signed char); void operator<<(LexicalConverter &, short); @@ -105,7 +107,7 @@ void operator>>(const LexicalConverter &c, T &v) // The main interface to the lexical conversion machinery template -inline T lexical_cast(const std::string &s, const Fmt &f=Fmt()) +inline T lexical_cast(const std::string &s, const Fmt &f = Fmt()) { LexicalConverter conv(s, f); T result; @@ -114,7 +116,7 @@ inline T lexical_cast(const std::string &s, const Fmt &f=Fmt()) } template -inline std::string lexical_cast(const T &v, const Fmt &f=Fmt()) +inline std::string lexical_cast(const T &v, const Fmt &f = Fmt()) { LexicalConverter conv(f); conv< void write_int(T n, Msp::Regex::Code &code) { for(unsigned i=0; i>i*8)&0xFF; + code += (n>>i*8)&0xFF; } -/** -Reads an integer from a Regex code stream, in little-endian order. -*/ +/** Reads an integer from a Regex code stream, in little-endian order. */ template T read_int(Msp::Regex::Code::const_iterator &c) { - T result=0; + T result = 0; for(unsigned i=0; i(*c++)<(*c++)<(*j)&0xFF); - ss<9) - ss<<"\n"<0) @@ -132,45 +97,45 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns Code result; - unsigned this_group=group; + unsigned this_group = group; if(!branch) { - result+=GROUP_BEGIN; + result += GROUP_BEGIN; write_int(this_group, result); } - const unsigned jump_size=1+sizeof(Offset); + const unsigned jump_size = 1+sizeof(Offset); if(!has_branches) { for(string::const_iterator i=iter; i!=end;) { - Code atom=parse_atom(expr, i, group); + Code atom = parse_atom(expr, i, group); - Count repeat_min=1; - Count repeat_max=1; + Count repeat_min = 1; + Count repeat_max = 1; parse_repeat(i, repeat_min, repeat_max); for(unsigned j=0; j::max()) { if(repeat_min==0) { - result+=ND_JUMP; + result += ND_JUMP; write_int(atom.size()+jump_size, result); - result+=atom; + result += atom; } - result+=ND_JUMP; + result += ND_JUMP; write_int(-(atom.size()+jump_size), result); } else if(repeat_max>repeat_min) { for(unsigned j=repeat_min; j((repeat_max-j)*(atom.size()+jump_size)-jump_size, result); - result+=atom; + result += atom; } } } @@ -186,24 +151,24 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns ++i; } - unsigned n_branches=branches.size(); + unsigned n_branches = branches.size(); - Offset offset=(n_branches-1)*jump_size+branches.front().size(); + Offset offset = (n_branches-1)*jump_size+branches.front().size(); for(list::iterator i=++branches.begin(); i!=branches.end(); ++i) { - result+=ND_JUMP; + result += ND_JUMP; write_int(offset, result); - offset+=i->size(); + offset += i->size(); } for(list::iterator i=branches.begin(); i!=branches.end();) { - result+=*i; - offset-=i->size()+jump_size; + result += *i; + offset -= i->size()+jump_size; ++i; if(i!=branches.end()) { - result+=JUMP; + result += JUMP; write_int(offset, result); } } @@ -211,11 +176,11 @@ Regex::Code Regex::compile(const string &expr, string::const_iterator &iter, uns if(!branch) { - result+=GROUP_END; + result += GROUP_END; write_int(this_group, result); } - iter=end; + iter = end; return result; } @@ -227,12 +192,12 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns if(i==expr.end()) return result; - bool flag=false; + bool flag = false; if(*i=='\\') { if(++i==expr.end()) throw InvalidParameterValue("Stray backslash"); - flag=true; + flag = true; } if(!flag) @@ -242,24 +207,24 @@ Regex::Code Regex::parse_atom(const string &expr, string::const_iterator &i, uns else if(*i=='[') return parse_brackets(expr, i); else if(*i=='.') - result+=MATCH_ANY; + result += MATCH_ANY; else if(*i=='^') - result+=MATCH_BEGIN; + result += MATCH_BEGIN; else if(*i=='$') - result+=MATCH_END; + result += MATCH_END; else if(*i=='(') { ++group; - result=compile(expr, ++i, group, false); + result = compile(expr, ++i, group, false); } else - flag=true; + flag = true; } if(flag) { - result+=MATCH_CHAR; - result+=*i; + result += MATCH_CHAR; + result += *i; } ++i; @@ -273,31 +238,31 @@ bool Regex::parse_repeat(string::const_iterator &i, Count &rmin, Count &rmax) return false; if(*i=='*' || *i=='+') - rmax=numeric_limits::max(); + rmax = numeric_limits::max(); if(*i=='*' || *i=='?') - rmin=0; + rmin = 0; if(*i=='{') { - rmin=0; + rmin = 0; for(++i; isdigit(*i); ++i) - rmin=rmin*10+(*i-'0'); + rmin = rmin*10+(*i-'0'); if(*i==',') { ++i; if(*i!='}') { - rmax=0; + rmax = 0; for(; isdigit(*i); ++i) - rmax=rmax*10+(*i-'0'); + rmax = rmax*10+(*i-'0'); if(rmax::max(); + rmax = numeric_limits::max(); } else - rmax=rmin; + rmax = rmin; if(*i!='}') throw InvalidParameterValue("Invalid bound"); } @@ -312,181 +277,192 @@ Regex::Code Regex::parse_brackets(const string &str, string::const_iterator &ite Code result; ++iter; - bool neg=false; + bool neg = false; if(*iter=='^') { - neg=true; + neg = true; ++iter; } - string::const_iterator end=iter; + string::const_iterator end = iter; for(; (end!=str.end() && (end==iter || *end!=']')); ++end) ; if(end==str.end()) throw InvalidParameterValue("Unmatched '['"); - unsigned char mask[32]={0}; - unsigned type=0; - bool range=false; - unsigned char first=0, last=0; + unsigned char mask[32] = {0}; + unsigned type = 0; + bool range = false; + unsigned char first=0, last = 0; for(string::const_iterator i=iter; i!=end; ++i) { - unsigned char c=*i; + unsigned char c = *i; if(range) { - last=c; + last = c; for(unsigned j=first; j<=c; ++j) - mask[j>>3]|=1<<(j&7); - range=false; + mask[j>>3] |= 1<<(j&7); + range = false; if(type<2) - type=2; + type = 2; } else if(c=='-' && i!=iter && end-i>1) - range=true; + range = true; else { - first=c; - mask[c>>3]|=1<<(c&7); + first = c; + mask[c>>3] |= 1<<(c&7); if(type==0) - type=1; + type = 1; else - type=3; + type = 3; } } if(neg) - result+=NEGATE; + result += NEGATE; if(type==1) { - result+=MATCH_CHAR; - result+=first; + result += MATCH_CHAR; + result += first; } else if(type==2) { - result+=MATCH_RANGE; - result+=first; - result+=last; + result += MATCH_RANGE; + result += first; + result += last; } else { - result+=MATCH_MASK; + result += MATCH_MASK; result.append(reinterpret_cast(mask), 32); } - iter=end; + iter = end; ++iter; return result; } +RegMatch Regex::match(const string &str) const +{ + RegMatch::GroupArray groups(n_groups); + + for(string::const_iterator i=str.begin(); i!=str.end(); ++i) + if(run(str, i, groups)) + return RegMatch(str, groups); + + return RegMatch(); +} + bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch::GroupArray &groups) const { - bool result=false; + bool result = false; list ctx; ctx.push_back(RunContext()); - ctx.front().citer=code.begin(); + ctx.front().citer = code.begin(); ctx.front().groups.resize(groups.size()); for(string::const_iterator i=begin;;) { int c; if(i!=str.end()) - c=static_cast(*i); + c = static_cast(*i); else - c=-1; + c = -1; for(list::iterator j=ctx.begin(); j!=ctx.end();) { - bool terminate=false; - bool negate_match=false; + bool terminate = false; + bool negate_match = false; for(; j->citer!=code.end();) { - Instruction instr=static_cast(*j->citer++); + Instruction instr = static_cast(*j->citer++); if(instr==NEGATE) - negate_match=true; + negate_match = true; else if(instr==JUMP) { - Offset offset=read_int(j->citer); - j->citer+=offset; + Offset offset = read_int(j->citer); + j->citer += offset; } else if(instr==ND_JUMP) { - Offset offset=read_int(j->citer); + Offset offset = read_int(j->citer); ctx.push_back(*j); - ctx.back().citer+=offset; + ctx.back().citer += offset; } else if(instr==GROUP_BEGIN) { - Index n=read_int(j->citer); + Index n = read_int(j->citer); if(!j->groups[n].match) - j->groups[n].begin=i-str.begin(); + j->groups[n].begin = i-str.begin(); } else if(instr==GROUP_END) { - Index n=read_int(j->citer); + Index n = read_int(j->citer); if(!j->groups[n].match) { - j->groups[n].match=true; - j->groups[n].end=i-str.begin(); - j->groups[n].length=j->groups[n].end-j->groups[n].begin; + j->groups[n].match = true; + j->groups[n].end = i-str.begin(); + j->groups[n].length = j->groups[n].end-j->groups[n].begin; } if(n==0) { - result=true; - bool better=false; + result = true; + bool better = false; for(unsigned k=0; (kgroups[k], groups[k]); + better = group_compare(j->groups[k], groups[k]); if(group_compare(groups[k], j->groups[k])) break; } if(better) - groups=j->groups; + groups = j->groups; } } else { - bool match_result=false; - bool input_consumed=false; + bool match_result = false; + bool input_consumed = false; if(instr==MATCH_BEGIN) - match_result=(i==str.begin()); + match_result = (i==str.begin()); else if(instr==MATCH_END) - match_result=(i==str.end()); + match_result = (i==str.end()); else if(instr==MATCH_CHAR) { - match_result=(c==*j->citer++); - input_consumed=true; + match_result = (c==*j->citer++); + input_consumed = true; } else if(instr==MATCH_RANGE) { - unsigned char first=*j->citer++; - unsigned char last=*j->citer++; - match_result=(c>=first && c<=last); - input_consumed=true; + unsigned char first = *j->citer++; + unsigned char last = *j->citer++; + match_result = (c>=first && c<=last); + input_consumed = true; } else if(instr==MATCH_MASK) { if(c>=0 && c<=0xFF) { - unsigned char m=*(j->citer+(c>>3)); - match_result=m&(1<<(c&7)); + unsigned char m = *(j->citer+(c>>3)); + match_result = m&(1<<(c&7)); } - input_consumed=true; - j->citer+=32; + input_consumed = true; + j->citer += 32; } else if(instr==MATCH_ANY) { - match_result=true; - input_consumed=true; + match_result = true; + input_consumed = true; } else throw Exception("Invalid instruction"); if(match_result==negate_match) - terminate=true; - negate_match=false; + terminate = true; + negate_match = false; if(input_consumed || terminate) break; @@ -494,7 +470,7 @@ bool Regex::run(const string &str, const string::const_iterator &begin, RegMatch } if(terminate || j->citer==code.end()) - j=ctx.erase(j); + j = ctx.erase(j); else ++j; } @@ -526,22 +502,43 @@ bool Regex::group_compare(const RegMatch::Group &g1, const RegMatch::Group &g2) return g1.end>g2.end; } +string Regex::disassemble() const +{ + ostringstream ss; + + for(Code::const_iterator i=code.begin(); i!=code.end();) + { + Code::const_iterator j = i; + Offset offset = i-code.begin(); + string decompiled = disassemble_instruction(i); + string bytes; + for(; j!=i; ++j) + bytes += format(" %02X", static_cast(*j)&0xFF); + ss<9) + ss<<"\n"<(*i++); + Instruction instr = static_cast(*i++); ostringstream result; switch(instr) { case JUMP: { - Offset offset=read_int(i); + Offset offset = read_int(i); result<<"JUMP "<(i); + Offset offset = read_int(i); result<<"ND_JUMP "<=0x20 && c<=0x7E) result<<'\''< #include "regmatch.h" @@ -17,8 +18,8 @@ RegMatch::RegMatch(const string &str, const GroupArray &g): for(GroupArray::iterator i=groups.begin(); i!=groups.end(); ++i) if(i->match) { - i->length=i->end-i->begin; - i->str=str.substr(i->begin, i->length); + i->length = i->end-i->begin; + i->str = str.substr(i->begin, i->length); } } diff --git a/source/regmatch.h b/source/regmatch.h index f1f2448..19cac11 100644 --- a/source/regmatch.h +++ b/source/regmatch.h @@ -4,6 +4,7 @@ This file is part of libmspstrings Copyright © 2007 Mikko Rasa Distributed under the LGPL */ + #ifndef MSP_STRINGS_REGMATCH_H_ #define MSP_STRINGS_REGMATCH_H_ @@ -14,12 +15,11 @@ namespace Msp { /** This class stores the result of a Regex being matched against a string. If the -match was successful, the RegMatch object evaluates to true, allowing it to be -used in constructs like \code if(RegMatch match=regex.match("foo")) \endcode. +match was successful, the RegMatch object evaluates to true. A RegMatch representing a successful match has one or more groups, indicating matching parts of the string. The first group (with index 0) indicates the -part matched by the whol regex. Further groups, if present, indicate parts +part matched by the whole regex. Further groups, if present, indicate parts matched by subregexes. These are ordered from left to right, by the opening parenthesis of the subregex. */ @@ -40,53 +40,40 @@ public: Group(): match(false) { } operator bool() const { return match; } }; + typedef std::vector GroupArray; - /** - Constructs a RegMatch representig a non-match. Used by Regex. - */ +private: + GroupArray groups; + +public: + /** Constructs a RegMatch representing a non-match. */ RegMatch() { } - /** - Constructs a new RegMatch from a string and groups. The length and str members - of each group are computed and need not be set. Used by Regex. - */ + /** Constructs a new RegMatch from a string and groups. The length and str + members of each group are computed and need not be set. Intended to be used + by the Regex class. */ RegMatch(const std::string &, const std::vector &); - /** - Returns a reference to a single group in the match. An exception is thrown - if the requested group does not exist. - */ + /** Returns a reference to a single group in the match. */ const Group &group(unsigned) const; - /** - Returns true if the RegMatch object represents a non-match. - */ + /** Returns true if the RegMatch object represents a non-match. */ bool empty() const { return groups.empty(); } - /** - Returns the number of groups in this match. - */ + /** Returns the number of groups in this match. */ unsigned size() const { return groups.size(); } - /** - Returns the begin offset of the whole match. - */ - unsigned begin() const { return groups.empty()?0:groups[0].begin; } + /** Returns the begin offset of the whole match. */ + unsigned begin() const { return groups.empty() ? 0 : groups[0].begin; } - /** - Returns the end offset of the whole match. - */ - unsigned end() const { return groups.empty()?0:groups[0].end; } + /** Returns the end offset of the whole match. */ + unsigned end() const { return groups.empty() ? 0 : groups[0].end; } - /** - Shortcut for the group() function. - */ + /** Shorthand for the group() function. */ const Group &operator[](unsigned i) const { return group(i); } operator bool() const { return !empty(); } -private: - std::vector groups; }; } // namespace Msp diff --git a/source/utf8.cpp b/source/utf8.cpp index dd01150..c7e1705 100644 --- a/source/utf8.cpp +++ b/source/utf8.cpp @@ -17,25 +17,25 @@ void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf) if(ch<0 || ch>0x10FFFF) return error(ch, buf, "Can't express character in UTF-8"); - unsigned bytes=1; + unsigned bytes = 1; if(ch>0xFFFF) - bytes=4; + bytes = 4; else if(ch>0x7FF) - bytes=3; + bytes = 3; else if(ch>0x7F) - bytes=2; + bytes = 2; if(bytes==1) - buf+=ch; + buf += ch; else { char utf[4]; - utf[0]=0xFF<<(8-bytes) | ch>>(bytes*6-6); + utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6); for(unsigned j=bytes-1; j>0; --j) { - utf[j]=0x80 | (ch&0x3F); - ch>>=6; + utf[j] = 0x80 | (ch&0x3F); + ch >>= 6; } buf.append(utf, bytes); @@ -55,33 +55,33 @@ UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator if((*i&0xC0)==0x80) { - UnicodeChar result=error("UTF-8 tail byte found when expecting head"); + UnicodeChar result = error("UTF-8 tail byte found when expecting head"); ++i; return result; } else if(*i&0x80) { - unsigned bytes=2; - unsigned mask=0x20; + unsigned bytes = 2; + unsigned mask = 0x20; for(; *i&mask; mask>>=1) ++bytes; - string::const_iterator j=i; + string::const_iterator j = i; - UnicodeChar result=(*j++)&(mask-1); + UnicodeChar result = (*j++)&(mask-1); unsigned k; for(k=1; (k>(bytes*5-4)) || !(result>>7)) - result=error("Denormalized UTF-8 multibyte sequence"); + result = error("Denormalized UTF-8 multibyte sequence"); else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) - result=error("Invalid Unicode code point"); + result = error("Invalid Unicode code point"); - i=j; + i = j; return result; } else diff --git a/source/utf8.h b/source/utf8.h index 0e5e068..0281b31 100644 --- a/source/utf8.h +++ b/source/utf8.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "UTF-8"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/source/utils.cpp b/source/utils.cpp index 5a6bccd..5728332 100644 --- a/source/utils.cpp +++ b/source/utils.cpp @@ -19,10 +19,10 @@ vector do_split(const string &str, const string &sep, int max_split) { vector result; - unsigned start=0; + unsigned start = 0; while(start=0 && result.size()==static_cast(max_split)) @@ -37,7 +37,7 @@ vector do_split(const string &str, const string &sep, int max_split) if(end>str.size()) break; - start=end+(long_sep ? sep.size() : 1); + start = end+(long_sep ? sep.size() : 1); if(allow_empty && start==str.size()) result.push_back(string()); @@ -56,16 +56,17 @@ bool check_str(const std::string &str, int (*pred)(int)) } + namespace Msp { int strcasecmp(const string &s1, const string &s2) { - string::const_iterator i1=s1.begin(); - string::const_iterator i2=s2.begin(); + string::const_iterator i1 = s1.begin(); + string::const_iterator i2 = s2.begin(); for(; (i1!=s1.end() && i2!=s2.end()); ++i1, ++i2) { - const char c1=::tolower(*i1); - const char c2=::tolower(*i2); + const char c1 = ::tolower(*i1); + const char c2 = ::tolower(*i2); if(c1!=c2) return c1-c2; } if(i1!=s1.end()) return *i1; @@ -129,7 +130,7 @@ vector split_fields(const string &str, char sep, int max_split) string strip(const string &s) { - string result=s; + string result = s; if(!result.erase(0, result.find_first_not_of(" \t\r\n")).empty()) result.erase(result.find_last_not_of(" \t\r\n")+1); return result; @@ -137,92 +138,92 @@ string strip(const string &s) string c_unescape(const std::string &str) { - bool escape=false; - unsigned numeric_type=0; - unsigned numeric_pos=0; - unsigned numeric_value=0; + bool escape = false; + unsigned numeric_type = 0; + unsigned numeric_pos = 0; + unsigned numeric_value = 0; string result; for(string::const_iterator i=str.begin(); i!=str.end(); ++i) { if(numeric_type==16) { - unsigned digit=0; + unsigned digit = 0; if(*i>='0' && *i<='9') - digit=*i-'0'; + digit = *i-'0'; else if(*i>='a' && *i<='f') - digit=*i-'a'+10; + digit = *i-'a'+10; else if(*i>='A' && *i<='F') - digit=*i-'A'+10; + digit = *i-'A'+10; else throw InvalidParameterValue("Invalid hexadecimal digit"); - numeric_value=(numeric_value<<4 | digit); + numeric_value = (numeric_value<<4 | digit); ++numeric_pos; if(numeric_pos==2) { - result+=numeric_value; - numeric_type=0; + result += numeric_value; + numeric_type = 0; } } else if(numeric_type==8) { - unsigned digit=0; + unsigned digit = 0; if(*i>='0' && *i<='7') - digit=*i-'0'; + digit = *i-'0'; else throw InvalidParameterValue("Invalid octal digit"); - numeric_value=(numeric_value<<3 | digit); + numeric_value = (numeric_value<<3 | digit); ++numeric_pos; if(numeric_pos==3) { - result+=numeric_value; - numeric_type=0; + result += numeric_value; + numeric_type = 0; } } else if(escape) { if(*i=='x') { - numeric_type=16; - numeric_pos=0; - numeric_value=0; + numeric_type = 16; + numeric_pos = 0; + numeric_value = 0; } else if(*i>='0' && *i<='3') { - numeric_type=8; - numeric_pos=1; - numeric_value=*i-'0'; + numeric_type = 8; + numeric_pos = 1; + numeric_value = *i-'0'; } else if(*i=='n') - result+='\n'; + result += '\n'; else if(*i=='t') - result+='\t'; + result += '\t'; else if(*i=='r') - result+='\r'; + result += '\r'; else if(*i=='b') - result+='\b'; + result += '\b'; else if(*i=='v') - result+='\v'; + result += '\v'; else if(*i=='a') - result+='\a'; + result += '\a'; else if(*i=='f') - result+='\f'; + result += '\f'; else if(*i=='\"') - result+='\"'; + result += '\"'; else if(*i=='\'') - result+='\''; + result += '\''; else if(*i=='\\') - result+='\\'; + result += '\\'; else throw InvalidParameterValue("Invalid escape sequence"); - escape=false; + escape = false; } else if(*i=='\\') - escape=true; + escape = true; else - result+=*i; + result += *i; } if(escape) @@ -238,32 +239,32 @@ string c_escape(const string &str, bool escape_8bit) for(string::const_iterator i=str.begin(); i!=str.end(); ++i) { if(*i=='\n') - result+="\\n"; + result += "\\n"; else if(*i=='\t') - result+="\\t"; + result += "\\t"; else if(*i=='\r') - result+="\\r"; + result += "\\r"; else if(*i=='\b') - result+="\\b"; + result += "\\b"; else if(*i=='\v') - result+="\\v"; + result += "\\v"; else if(*i=='\a') - result+="\\a"; + result += "\\a"; else if(*i=='\f') - result+="\\f"; + result += "\\f"; else if(*i=='\"') - result+="\\\""; + result += "\\\""; else if(*i=='\'') - result+="\\\'"; + result += "\\\'"; else if(*i=='\\') - result+="\\\\"; + result += "\\\\"; else if(static_cast(*i)<' ' || (escape_8bit && (*i&0x80))) { - char buf[4]={'\\', '0'+((*i>>6)&3), '0'+((*i>>3)&7), '0'+(*i&7)}; + char buf[4] = {'\\', '0'+((*i>>6)&3), '0'+((*i>>3)&7), '0'+(*i&7)}; result.append(buf, 4); } else - result+=*i; + result += *i; } return result; diff --git a/source/utils.h b/source/utils.h index 44c2edb..6e14591 100644 --- a/source/utils.h +++ b/source/utils.h @@ -13,39 +13,25 @@ Distributed under the LGPL namespace Msp { -/** -Compares two strings, ignoring upper/lower case. - -@param s1 First string -@param s2 Second string - -@return -1 if s1s2 -*/ +/** Compares two strings, ignoring upper/lower case. Returns an integer less +than, equal to or greater than zero depending on whether the first string +lexicographically precedes, is equal to or follows the second one, +respectively. */ int strcasecmp(const std::string &s1, const std::string &s2); -/** -Converts a string to lower case. -*/ +/** Converts a string to lower case. */ std::string tolower(const std::string &); -/** -Converts a string to upper case. -*/ +/** Converts a string to upper case. */ std::string toupper(const std::string &); -/** -Checks whether a string consists of digits only. -*/ +/** Checks whether a string consists of digits only. */ bool isnumrc(const std::string &); -/** -Checks whether a string consists of alphabetic characters only. -*/ +/** Checks whether a string consists of alphabetic characters only. */ bool isalpha(const std::string &); -/** -Checks whether a string consists of alphanumeric characters only. -*/ +/** Checks whether a string consists of alphanumeric characters only. */ bool isalnum(const std::string &); /* These are required to make the standard version work from inside the Msp @@ -55,81 +41,52 @@ using std::toupper; using std::isalpha; using std::isalnum; -/** -Splits a string at occurrences of any of the characters in sep. If max_split -is non-negative, at most that many split will be performed, i.e. the resulting -vector will contain at most max_split+1 elements. Two or more consecutive -separator characters will be treated as a single separator. - -@param str A string -@param sep Separator characters -@param max_split Maximum number of splits to perform -*/ -std::vector split(const std::string &str, const std::string &sep=" \t\r\n", int max_split=-1); +/** Splits a string at occurrences of any of the characters in sep. Default +is to split at whitespace. Two or more consecutive separator characters will +be treated as a single separator. -/** -Splits a string on occurrences of a single character. -*/ -std::vector split(const std::string &str, char sep, int max_split=-1); +If max_split is non-negative, at most that many split will be performed, i.e. +the resulting vector will contain at most max_split+1 elements. */ +std::vector split(const std::string &str, const std::string &sep = " \t\r\n", int max_split = -1); -/** -Splits a string on occurrences of another string. -*/ -std::vector split_long(const std::string &str, const std::string &sep, int max_split=-1); +/** Splits a string on occurrences of a single character. */ +std::vector split(const std::string &str, char sep, int max_split = -1); -/** -Splits a string on occurrences of another string. Two consecutive separators -will cause an empty string to be placed in the result. -*/ -std::vector split_fields(const std::string &str, const std::string &sep, int max_split=-1); +/** Splits a string on occurrences of another string. */ +std::vector split_long(const std::string &str, const std::string &sep, int max_split = -1); -/** -Splits a string on occurrences of a single character. Two consecutive -separators will cause an empty string to be placed in the result. -*/ -std::vector split_fields(const std::string &str, char sep, int max_split=-1); +/** Splits a string on occurrences of another string. Two consecutive +separators will cause an empty string to be placed in the result. */ +std::vector split_fields(const std::string &str, const std::string &sep, int max_split = -1); -/** -Concatenates strings from an iterator range. +/** Splits a string on occurrences of a single character. Two consecutive +separators will cause an empty string to be placed in the result. */ +std::vector split_fields(const std::string &str, char sep, int max_split = -1); -@param begin First iterator -@param end Last iterator -@param sep Separator to be inserted between strings -*/ +/** Concatenates strings from an iterator range. */ template -std::string join(Iter begin, Iter end, const std::string &sep=" ") +std::string join(Iter begin, Iter end, const std::string &sep = " ") { std::string result; for(Iter i=begin; i!=end; ++i) { if(i!=begin) - result+=sep; - result+=*i; + result += sep; + result += *i; } return result; } -/** -Strips leading and trailing whitespace from a string. -*/ +/** Strips leading and trailing whitespace from a string. */ std::string strip(const std::string &); -/** -Unescapes a string with C escape sequences. -*/ +/** Unescapes a string with C escape sequences. */ std::string c_unescape(const std::string &str); -/** -Escapes any non-printable characters in a string with C escape sequences. - -@param str A string -@param escape_8bit If true, consider characters with high bit set as - non-printable - -@return An escaped version of the string -*/ -std::string c_escape(const std::string &str, bool escape_8bit=true); +/** Escapes any non-printable characters in a string with C escape sequences. +Optionally, any characters with the high bit set can be escaped as well. */ +std::string c_escape(const std::string &str, bool escape_8bit = true); } // namespace Msp diff --git a/source/windows1252.cpp b/source/windows1252.cpp index d809c3f..bbd6753 100644 --- a/source/windows1252.cpp +++ b/source/windows1252.cpp @@ -21,19 +21,20 @@ unsigned short table[32]= } + namespace Msp { namespace Codecs { void Windows1252::Encoder::encode_char(UnicodeChar ch, string &buf) { if((ch>=0 && ch<=0x7F) || (ch>=0xA0 && ch<=0xFF)) - buf+=ch; + buf += ch; else { for(unsigned i=0; i<32; ++i) if(table[i]==ch) { - buf+=ch; + buf += ch; return; } @@ -43,7 +44,7 @@ void Windows1252::Encoder::encode_char(UnicodeChar ch, string &buf) void Windows1252::Encoder::transliterate(UnicodeChar, string &buf) { - buf+='?'; + buf += '?'; } @@ -52,16 +53,16 @@ UnicodeChar Windows1252::Decoder::decode_char(const string &str, string::const_i if(i==str.end()) return error("No input"); - int ch=static_cast(*i); + int ch = static_cast(*i); UnicodeChar result; if(ch>=0x80 && ch<=0x9F) { - result=table[ch-0x80]; + result = table[ch-0x80]; if(result==0) - result=error("Undefined Windows-1252 character"); + result = error("Undefined Windows-1252 character"); } else - result=ch; + result = ch; ++i; return result; diff --git a/source/windows1252.h b/source/windows1252.h index f817a0d..47b2cf0 100644 --- a/source/windows1252.h +++ b/source/windows1252.h @@ -19,7 +19,8 @@ public: class Encoder: public Codec::Encoder { public: - Encoder(ErrorMode em=THROW_ON_ERROR): Codec::Encoder(em) { } + Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { } + virtual void encode_char(UnicodeChar, std::string &); private: virtual void transliterate(UnicodeChar, std::string &); @@ -28,14 +29,15 @@ public: class Decoder: public Codec::Decoder { public: - Decoder(ErrorMode em=THROW_ON_ERROR): Codec::Decoder(em) { } + Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { } + virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &); }; virtual const char *get_name() const { return "Windows-1252"; } - virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); } + virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } + virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } }; } // namespace Codecs diff --git a/transcode.cpp b/transcode.cpp index bba64ab..cfa24a9 100644 --- a/transcode.cpp +++ b/transcode.cpp @@ -14,16 +14,16 @@ int main(int argc, char **argv) return 1; } - Codecs::Codec *from=Codecs::create_codec(argv[1]); - Codecs::Codec *to=Codecs::create_codec(argv[2]); + Codecs::Codec *from = Codecs::create_codec(argv[1]); + Codecs::Codec *to = Codecs::create_codec(argv[2]); - Codecs::Decoder *from_dec=from->create_decoder(Codecs::TRANSLITERATE); - Codecs::Encoder *to_enc=to->create_encoder(Codecs::TRANSLITERATE); + Codecs::Decoder *from_dec = from->create_decoder(Codecs::TRANSLITERATE); + Codecs::Encoder *to_enc = to->create_encoder(Codecs::TRANSLITERATE); string line; while(getline(cin, line)) { - line+='\n'; + line += '\n'; Codecs::ustring ustr; from_dec->decode(line, ustr); string result;