X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.cpp;h=0014847a67cebd30a12585980f466cdf7e5ea98c;hp=865176edf2a8cb5223e721cdb6040a0511b4acd3;hb=5b1368cb791cab043f0435628cacbaff36e39b7b;hpb=c88a239ff3b218ad0226e8c53356350ccc1f7014 diff --git a/source/codec.cpp b/source/codec.cpp index 865176e..0014847 100644 --- a/source/codec.cpp +++ b/source/codec.cpp @@ -23,8 +23,8 @@ namespace Codecs { bool Codec::detect(const string &str) const { - Decoder *dec=create_decoder(); - bool result=true; + Decoder *dec = create_decoder(); + bool result = true; try { for(string::const_iterator i=str.begin(); i!=str.end(); ) @@ -32,7 +32,7 @@ bool Codec::detect(const string &str) const } catch(const CodecError &) { - result=false; + result = false; } delete dec; @@ -46,6 +46,14 @@ void Codec::Encoder::encode(const ustring &str, string &buf) encode_char(*i, buf); } +string Codec::Encoder::encode(const ustring &str) +{ + string buf; + encode(str, buf); + sync(buf); + return buf; +} + void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg) { switch(err_mode) @@ -64,12 +72,19 @@ void Codec::Decoder::decode(const string &str, ustring &buf) { for(string::const_iterator i=str.begin(); i!=str.end();) { - UnicodeChar c=decode_char(str, i); + UnicodeChar c = decode_char(str, i); if(c!=-1) - buf+=c; + buf += c; } } +ustring Codec::Decoder::decode(const string &str) +{ + ustring buf; + decode(str, buf); + return buf; +} + UnicodeChar Codec::Decoder::error(const string &msg) { switch(err_mode) @@ -89,9 +104,9 @@ Codec *create_codec(const string &n) for(string::const_iterator i=n.begin(); i!=n.end(); ++i) { if(isupper(*i)) - name+=tolower(*i); + name += tolower(*i); else if(islower(*i) || isdigit(*i)) - name+=*i; + name += *i; } if(name=="ascii") return new Ascii; @@ -102,9 +117,61 @@ Codec *create_codec(const string &n) if(name=="jisx0201") return new JisX0201; if(name=="jisx0208") return new JisX0208; if(name=="utf8") return new Utf8; - if(name=="windows1252") return new Windows1252; + if(name=="windows1252" || name=="cp1252") return new Windows1252; throw InvalidParameterValue("Unknown string codec"); } +Codec *detect_codec(const string &str) +{ + bool is_utf8 = true; + bool is_ascii = true; + bool is_latin1 = true; + unsigned utf8_mb = 0; + + for(string::const_iterator i=str.begin(); i!=str.end(); ++i) + { + unsigned char c = *i; + if(c&0x80) + { + is_ascii = false; + if((c&0xC0)==0x80) + { + if((c&0xE0)==0x80) + is_latin1 = false; + if(utf8_mb) + --utf8_mb; + else + is_utf8 = false; + } + else if((c&0xC0)==0xC0) + { + if(utf8_mb) + { + is_utf8 = false; + utf8_mb = 0; + } + else + { + for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ; + } + } + } + else if(utf8_mb) + { + is_utf8 = false; + utf8_mb = 0; + } + } + + if(is_ascii) + return new Ascii; + else if(is_utf8) + return new Utf8; + else if(is_latin1) + return new Iso88591; + else + return new Windows1252; +} + } // namespace Codecs } // namespace Msp