]> git.tdb.fi Git - libs/core.git/blobdiff - source/stringcodec/codec.cpp
Add move semantics to Variant
[libs/core.git] / source / stringcodec / codec.cpp
index b7c600d74d6fbf0c70ccfd5fed9b766bd96a1c56..ff2bd63d5e657b8547392163d12aa50b94f40e13 100644 (file)
@@ -6,6 +6,7 @@
 #include "iso885915.h"
 #include "jisx0201.h"
 #include "jisx0208.h"
+#include "utf16.h"
 #include "utf8.h"
 #include "windows1252.h"
 
@@ -16,17 +17,11 @@ namespace StringCodec {
 
 bool Codec::detect(const string &str) const
 {
-       Decoder *dec = create_decoder();
+       Decoder *dec = create_decoder(IGNORE_ERRORS);
+
        bool result = true;
-       try
-       {
-               for(string::const_iterator i=str.begin(); i!=str.end(); )
-                       dec->decode_char(str, i);
-       }
-       catch(const CodecError &)
-       {
-               result = false;
-       }
+       for(auto i=str.begin(); (result && i!=str.end()); )
+               result = (dec->decode_char(str, i)!=-1);
 
        delete dec;
 
@@ -35,8 +30,8 @@ bool Codec::detect(const string &str) const
 
 void Codec::Encoder::encode(const ustring &str, string &buf)
 {
-       for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i)
-               encode_char(*i, buf);
+       for(unichar c: str)
+               encode_char(c, buf);
 }
 
 string Codec::Encoder::encode(const ustring &str)
@@ -47,23 +42,11 @@ string Codec::Encoder::encode(const ustring &str)
        return buf;
 }
 
-void Codec::Encoder::error(unichar ch, string &buf, const string &msg)
-{
-       switch(err_mode)
-       {
-       case TRANSLITERATE:
-               transliterate(ch, buf);
-       case IGNORE_ERRORS:
-               break;
-       default:
-               throw CodecError(msg);
-       }
-}
 
 
 void Codec::Decoder::decode(const string &str, ustring &buf)
 {
-       for(string::const_iterator i=str.begin(); i!=str.end();)
+       for(auto i=str.begin(); i!=str.end();)
        {
                unichar c = decode_char(str, i);
                if(c!=-1)
@@ -78,40 +61,47 @@ ustring Codec::Decoder::decode(const string &str)
        return buf;
 }
 
-unichar Codec::Decoder::error(const string &msg)
-{
-       switch(err_mode)
-       {
-       case TRANSLITERATE:
-               return 0xFFFE;
-       case IGNORE_ERRORS:
-               return -1;
-       default:
-               throw CodecError(msg);
-       }
-}
-
 Codec *create_codec(const string &n)
 {
        string name;
-       for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
+       string::const_iterator i;
+       for(i=n.begin(); i!=n.end(); ++i)
        {
-               if(isupper(*i))
+               if(*i==':')
+                       break;
+               else if(isupper(*i))
                        name += tolower(*i);
                else if(islower(*i) || isdigit(*i))
                        name += *i;
        }
 
-       if(name=="ascii") return new Ascii;
-       if(name=="iso2022jp") return new Iso2022Jp;
-       if(name=="iso646fi") return new Iso646Fi;
-       if(name=="iso88591" || name=="latin1") return new Iso88591;
-       if(name=="iso885915" || name=="latin9") return new Iso885915;
-       if(name=="jisx0201") return new JisX0201;
-       if(name=="jisx0208") return new JisX0208;
-       if(name=="utf8") return new Utf8;
-       if(name=="windows1252" || name=="cp1252") return new Windows1252;
-       throw InvalidParameterValue("Unknown string codec");
+       ErrorMode em = DEFAULT;
+       if(i!=n.end() && *i==':')
+       {
+               string em_str(i+1, n.end());
+               if(em_str=="throw")
+                       em = THROW_ON_ERROR;
+               else if(em_str=="ignore")
+                       em = IGNORE_ERRORS;
+               else if(em_str=="trans" || em_str=="transliterate")
+                       em = TRANSLITERATE;
+               else
+                       throw invalid_argument("StringCodec::create_codec");
+       }
+
+       if(name=="ascii") return new Ascii(em);
+       if(name=="iso2022jp") return new Iso2022Jp(em);
+       if(name=="iso646fi") return new Iso646Fi(em);
+       if(name=="iso88591" || name=="latin1") return new Iso88591(em);
+       if(name=="iso885915" || name=="latin9") return new Iso885915(em);
+       if(name=="jisx0201") return new JisX0201(em);
+       if(name=="jisx0208") return new JisX0208(em);
+       if(name=="utf8") return new Utf8(em);
+       if(name=="utf16") return new Utf16(em, Utf16::AUTO);
+       if(name=="utf16be") return new Utf16(em, Utf16::BIG);
+       if(name=="utf16le") return new Utf16(em, Utf16::LITTLE);
+       if(name=="windows1252" || name=="cp1252") return new Windows1252(em);
+       throw invalid_argument("StringCodec::create_codec");
 }
 
 Codec *detect_codec(const string &str)
@@ -121,9 +111,8 @@ Codec *detect_codec(const string &str)
        bool is_latin1 = true;
        unsigned utf8_mb = 0;
 
-       for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+       for(char c: str)
        {
-               unsigned char c = *i;
                if(c&0x80)
                {
                        is_ascii = false;