X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.cpp;h=521dfea2094c3eb2c8a0cffe96eca98c54fb4c70;hp=c38d82889502c8804bcd56d0b1af0b8a481bde30;hb=271ffa9434c8d9397bb5170cf1ee670c5265ec60;hpb=f47bc86e6ce900c5323e593db003c93110538268 diff --git a/source/codec.cpp b/source/codec.cpp index c38d828..521dfea 100644 --- a/source/codec.cpp +++ b/source/codec.cpp @@ -10,6 +10,7 @@ Distributed under the LGPL #include "iso2022jp.h" #include "iso646fi.h" #include "iso88591.h" +#include "iso885915.h" #include "jisx0201.h" #include "jisx0208.h" #include "utf8.h" @@ -97,12 +98,57 @@ Codec *create_codec(const string &n) if(name=="iso2022jp") return new Iso2022Jp; if(name=="iso646fi") return new Iso646Fi; if(name=="iso88591" || name=="latin1") return new Iso88591; + if(name=="iso885915" || name=="latin9") return new Iso885915; if(name=="jisx0201") return new JisX0201; if(name=="jisx0208") return new JisX0208; if(name=="utf8") return new Utf8; - if(name=="windows1252") return new Windows1252; + if(name=="windows1252" || name=="cp1252") return new Windows1252; throw InvalidParameterValue("Unknown string codec"); } +Codec *detect_codec(const string &str) +{ + bool is_utf8=true; + bool is_ascii=true; + bool is_latin1=true; + unsigned utf8_mb=0; + + for(string::const_iterator i=str.begin(); i!=str.end(); ++i) + { + unsigned char c=*i; + if(c&0x80) + { + is_ascii=false; + if((c&0xC0)==0x80) + { + if((c&0xE0)==0x80) + is_latin1=false; + if(utf8_mb) + --utf8_mb; + else + is_utf8=false; + } + else if((c&0xC0)==0xC0) + { + if(utf8_mb) + is_utf8=false; + else + { + for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ; + } + } + } + } + + if(is_ascii) + return new Ascii; + else if(is_utf8) + return new Utf8; + else if(is_latin1) + return new Iso88591; + else + return new Windows1252; +} + } // namespace Codecs } // namespace Msp