]> git.tdb.fi Git - libs/core.git/blobdiff - source/codec.cpp
Further style and comment adjustments
[libs/core.git] / source / codec.cpp
index 865176edf2a8cb5223e721cdb6040a0511b4acd3..0014847a67cebd30a12585980f466cdf7e5ea98c 100644 (file)
@@ -23,8 +23,8 @@ namespace Codecs {
 
 bool Codec::detect(const string &str) const
 {
-       Decoder *dec=create_decoder();
-       bool result=true;
+       Decoder *dec = create_decoder();
+       bool result = true;
        try
        {
                for(string::const_iterator i=str.begin(); i!=str.end(); )
@@ -32,7 +32,7 @@ bool Codec::detect(const string &str) const
        }
        catch(const CodecError &)
        {
-               result=false;
+               result = false;
        }
 
        delete dec;
@@ -46,6 +46,14 @@ void Codec::Encoder::encode(const ustring &str, string &buf)
                encode_char(*i, buf);
 }
 
+string Codec::Encoder::encode(const ustring &str)
+{
+       string buf;
+       encode(str, buf);
+       sync(buf);
+       return buf;
+}
+
 void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg)
 {
        switch(err_mode)
@@ -64,12 +72,19 @@ void Codec::Decoder::decode(const string &str, ustring &buf)
 {
        for(string::const_iterator i=str.begin(); i!=str.end();)
        {
-               UnicodeChar c=decode_char(str, i);
+               UnicodeChar c = decode_char(str, i);
                if(c!=-1)
-                       buf+=c;
+                       buf += c;
        }
 }
 
+ustring Codec::Decoder::decode(const string &str)
+{
+       ustring buf;
+       decode(str, buf);
+       return buf;
+}
+
 UnicodeChar Codec::Decoder::error(const string &msg)
 {
        switch(err_mode)
@@ -89,9 +104,9 @@ Codec *create_codec(const string &n)
        for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
        {
                if(isupper(*i))
-                       name+=tolower(*i);
+                       name += tolower(*i);
                else if(islower(*i) || isdigit(*i))
-                       name+=*i;
+                       name += *i;
        }
 
        if(name=="ascii") return new Ascii;
@@ -102,9 +117,61 @@ Codec *create_codec(const string &n)
        if(name=="jisx0201") return new JisX0201;
        if(name=="jisx0208") return new JisX0208;
        if(name=="utf8") return new Utf8;
-       if(name=="windows1252") return new Windows1252;
+       if(name=="windows1252" || name=="cp1252") return new Windows1252;
        throw InvalidParameterValue("Unknown string codec");
 }
 
+Codec *detect_codec(const string &str)
+{
+       bool is_utf8 = true;
+       bool is_ascii = true;
+       bool is_latin1 = true;
+       unsigned utf8_mb = 0;
+
+       for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+       {
+               unsigned char c = *i;
+               if(c&0x80)
+               {
+                       is_ascii = false;
+                       if((c&0xC0)==0x80)
+                       {
+                               if((c&0xE0)==0x80)
+                                       is_latin1 = false;
+                               if(utf8_mb)
+                                       --utf8_mb;
+                               else
+                                       is_utf8 = false;
+                       }
+                       else if((c&0xC0)==0xC0)
+                       {
+                               if(utf8_mb)
+                               {
+                                       is_utf8 = false;
+                                       utf8_mb = 0;
+                               }
+                               else
+                               {
+                                       for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
+                               }
+                       }
+               }
+               else if(utf8_mb)
+               {
+                       is_utf8 = false;
+                       utf8_mb = 0;
+               }
+       }
+
+       if(is_ascii)
+               return new Ascii;
+       else if(is_utf8)
+               return new Utf8;
+       else if(is_latin1)
+               return new Iso88591;
+       else
+               return new Windows1252;
+}
+
 } // namespace Codecs
 } // namespace Msp