X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.cpp;h=abec134b1a0fd00a25f936952368beba22c62742;hp=0014847a67cebd30a12585980f466cdf7e5ea98c;hb=HEAD;hpb=b56eb5ec1da675da0c66abc53c1e4f6c4e4cccbd diff --git a/source/stringcodec/codec.cpp b/source/stringcodec/codec.cpp index 0014847..ff2bd63 100644 --- a/source/stringcodec/codec.cpp +++ b/source/stringcodec/codec.cpp @@ -1,10 +1,3 @@ -/* $Id$ - -This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa -Distributed under the LGPL -*/ - #include "ascii.h" #include "codec.h" #include "iso2022jp.h" @@ -13,27 +6,22 @@ Distributed under the LGPL #include "iso885915.h" #include "jisx0201.h" #include "jisx0208.h" +#include "utf16.h" #include "utf8.h" #include "windows1252.h" using namespace std; namespace Msp { -namespace Codecs { +namespace StringCodec { bool Codec::detect(const string &str) const { - Decoder *dec = create_decoder(); + Decoder *dec = create_decoder(IGNORE_ERRORS); + bool result = true; - try - { - for(string::const_iterator i=str.begin(); i!=str.end(); ) - dec->decode_char(str, i); - } - catch(const CodecError &) - { - result = false; - } + for(auto i=str.begin(); (result && i!=str.end()); ) + result = (dec->decode_char(str, i)!=-1); delete dec; @@ -42,8 +30,8 @@ bool Codec::detect(const string &str) const void Codec::Encoder::encode(const ustring &str, string &buf) { - for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i) - encode_char(*i, buf); + for(unichar c: str) + encode_char(c, buf); } string Codec::Encoder::encode(const ustring &str) @@ -54,25 +42,13 @@ string Codec::Encoder::encode(const ustring &str) return buf; } -void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - transliterate(ch, buf); - case IGNORE_ERRORS: - break; - default: - throw CodecError(msg); - } -} void Codec::Decoder::decode(const string &str, ustring &buf) { - for(string::const_iterator i=str.begin(); i!=str.end();) + for(auto i=str.begin(); i!=str.end();) { - UnicodeChar c = decode_char(str, i); + unichar c = decode_char(str, i); if(c!=-1) buf += c; } @@ -85,40 +61,47 @@ ustring Codec::Decoder::decode(const string &str) return buf; } -UnicodeChar Codec::Decoder::error(const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - return 0xFFFE; - case IGNORE_ERRORS: - return -1; - default: - throw CodecError(msg); - } -} - Codec *create_codec(const string &n) { string name; - for(string::const_iterator i=n.begin(); i!=n.end(); ++i) + string::const_iterator i; + for(i=n.begin(); i!=n.end(); ++i) { - if(isupper(*i)) + if(*i==':') + break; + else if(isupper(*i)) name += tolower(*i); else if(islower(*i) || isdigit(*i)) name += *i; } - if(name=="ascii") return new Ascii; - if(name=="iso2022jp") return new Iso2022Jp; - if(name=="iso646fi") return new Iso646Fi; - if(name=="iso88591" || name=="latin1") return new Iso88591; - if(name=="iso885915" || name=="latin9") return new Iso885915; - if(name=="jisx0201") return new JisX0201; - if(name=="jisx0208") return new JisX0208; - if(name=="utf8") return new Utf8; - if(name=="windows1252" || name=="cp1252") return new Windows1252; - throw InvalidParameterValue("Unknown string codec"); + ErrorMode em = DEFAULT; + if(i!=n.end() && *i==':') + { + string em_str(i+1, n.end()); + if(em_str=="throw") + em = THROW_ON_ERROR; + else if(em_str=="ignore") + em = IGNORE_ERRORS; + else if(em_str=="trans" || em_str=="transliterate") + em = TRANSLITERATE; + else + throw invalid_argument("StringCodec::create_codec"); + } + + if(name=="ascii") return new Ascii(em); + if(name=="iso2022jp") return new Iso2022Jp(em); + if(name=="iso646fi") return new Iso646Fi(em); + if(name=="iso88591" || name=="latin1") return new Iso88591(em); + if(name=="iso885915" || name=="latin9") return new Iso885915(em); + if(name=="jisx0201") return new JisX0201(em); + if(name=="jisx0208") return new JisX0208(em); + if(name=="utf8") return new Utf8(em); + if(name=="utf16") return new Utf16(em, Utf16::AUTO); + if(name=="utf16be") return new Utf16(em, Utf16::BIG); + if(name=="utf16le") return new Utf16(em, Utf16::LITTLE); + if(name=="windows1252" || name=="cp1252") return new Windows1252(em); + throw invalid_argument("StringCodec::create_codec"); } Codec *detect_codec(const string &str) @@ -128,9 +111,8 @@ Codec *detect_codec(const string &str) bool is_latin1 = true; unsigned utf8_mb = 0; - for(string::const_iterator i=str.begin(); i!=str.end(); ++i) + for(char c: str) { - unsigned char c = *i; if(c&0x80) { is_ascii = false; @@ -173,5 +155,5 @@ Codec *detect_codec(const string &str) return new Windows1252; } -} // namespace Codecs +} // namespace StringCodec } // namespace Msp