]> git.tdb.fi Git - libs/core.git/blobdiff - source/codec.cpp
Make codecs able to tell their name
[libs/core.git] / source / codec.cpp
index 865176edf2a8cb5223e721cdb6040a0511b4acd3..521dfea2094c3eb2c8a0cffe96eca98c54fb4c70 100644 (file)
@@ -102,9 +102,53 @@ Codec *create_codec(const string &n)
        if(name=="jisx0201") return new JisX0201;
        if(name=="jisx0208") return new JisX0208;
        if(name=="utf8") return new Utf8;
-       if(name=="windows1252") return new Windows1252;
+       if(name=="windows1252" || name=="cp1252") return new Windows1252;
        throw InvalidParameterValue("Unknown string codec");
 }
 
+Codec *detect_codec(const string &str)
+{
+       bool is_utf8=true;
+       bool is_ascii=true;
+       bool is_latin1=true;
+       unsigned utf8_mb=0;
+
+       for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+       {
+               unsigned char c=*i;
+               if(c&0x80)
+               {
+                       is_ascii=false;
+                       if((c&0xC0)==0x80)
+                       {
+                               if((c&0xE0)==0x80)
+                                       is_latin1=false;
+                               if(utf8_mb)
+                                       --utf8_mb;
+                               else
+                                       is_utf8=false;
+                       }
+                       else if((c&0xC0)==0xC0)
+                       {
+                               if(utf8_mb)
+                                       is_utf8=false;
+                               else
+                               {
+                                       for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
+                               }
+                       }
+               }
+       }
+
+       if(is_ascii)
+               return new Ascii;
+       else if(is_utf8)
+               return new Utf8;
+       else if(is_latin1)
+               return new Iso88591;
+       else
+               return new Windows1252;
+}
+
 } // namespace Codecs
 } // namespace Msp