X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.cpp;fp=source%2Fcodec.cpp;h=0000000000000000000000000000000000000000;hp=0014847a67cebd30a12585980f466cdf7e5ea98c;hb=b42ed73a1b241c0e93ee03c43c4584b41c549bac;hpb=5b1368cb791cab043f0435628cacbaff36e39b7b diff --git a/source/codec.cpp b/source/codec.cpp deleted file mode 100644 index 0014847..0000000 --- a/source/codec.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* $Id$ - -This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa -Distributed under the LGPL -*/ - -#include "ascii.h" -#include "codec.h" -#include "iso2022jp.h" -#include "iso646fi.h" -#include "iso88591.h" -#include "iso885915.h" -#include "jisx0201.h" -#include "jisx0208.h" -#include "utf8.h" -#include "windows1252.h" - -using namespace std; - -namespace Msp { -namespace Codecs { - -bool Codec::detect(const string &str) const -{ - Decoder *dec = create_decoder(); - bool result = true; - try - { - for(string::const_iterator i=str.begin(); i!=str.end(); ) - dec->decode_char(str, i); - } - catch(const CodecError &) - { - result = false; - } - - delete dec; - - return result; -} - -void Codec::Encoder::encode(const ustring &str, string &buf) -{ - for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i) - encode_char(*i, buf); -} - -string Codec::Encoder::encode(const ustring &str) -{ - string buf; - encode(str, buf); - sync(buf); - return buf; -} - -void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - transliterate(ch, buf); - case IGNORE_ERRORS: - break; - default: - throw CodecError(msg); - } -} - - -void Codec::Decoder::decode(const string &str, ustring &buf) -{ - for(string::const_iterator i=str.begin(); i!=str.end();) - { - UnicodeChar c = decode_char(str, i); - if(c!=-1) - buf += c; - } -} - -ustring Codec::Decoder::decode(const string &str) -{ - ustring buf; - decode(str, buf); - return buf; -} - -UnicodeChar Codec::Decoder::error(const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - return 0xFFFE; - case IGNORE_ERRORS: - return -1; - default: - throw CodecError(msg); - } -} - -Codec *create_codec(const string &n) -{ - string name; - for(string::const_iterator i=n.begin(); i!=n.end(); ++i) - { - if(isupper(*i)) - name += tolower(*i); - else if(islower(*i) || isdigit(*i)) - name += *i; - } - - if(name=="ascii") return new Ascii; - if(name=="iso2022jp") return new Iso2022Jp; - if(name=="iso646fi") return new Iso646Fi; - if(name=="iso88591" || name=="latin1") return new Iso88591; - if(name=="iso885915" || name=="latin9") return new Iso885915; - if(name=="jisx0201") return new JisX0201; - if(name=="jisx0208") return new JisX0208; - if(name=="utf8") return new Utf8; - if(name=="windows1252" || name=="cp1252") return new Windows1252; - throw InvalidParameterValue("Unknown string codec"); -} - -Codec *detect_codec(const string &str) -{ - bool is_utf8 = true; - bool is_ascii = true; - bool is_latin1 = true; - unsigned utf8_mb = 0; - - for(string::const_iterator i=str.begin(); i!=str.end(); ++i) - { - unsigned char c = *i; - if(c&0x80) - { - is_ascii = false; - if((c&0xC0)==0x80) - { - if((c&0xE0)==0x80) - is_latin1 = false; - if(utf8_mb) - --utf8_mb; - else - is_utf8 = false; - } - else if((c&0xC0)==0xC0) - { - if(utf8_mb) - { - is_utf8 = false; - utf8_mb = 0; - } - else - { - for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ; - } - } - } - else if(utf8_mb) - { - is_utf8 = false; - utf8_mb = 0; - } - } - - if(is_ascii) - return new Ascii; - else if(is_utf8) - return new Utf8; - else if(is_latin1) - return new Iso88591; - else - return new Windows1252; -} - -} // namespace Codecs -} // namespace Msp