]> git.tdb.fi Git - libs/core.git/blob - source/stringcodec/codec.cpp
Drop copyright and license notices from source files
[libs/core.git] / source / stringcodec / codec.cpp
1 #include "ascii.h"
2 #include "codec.h"
3 #include "iso2022jp.h"
4 #include "iso646fi.h"
5 #include "iso88591.h"
6 #include "iso885915.h"
7 #include "jisx0201.h"
8 #include "jisx0208.h"
9 #include "utf8.h"
10 #include "windows1252.h"
11
12 using namespace std;
13
14 namespace Msp {
15 namespace Codecs {
16
17 bool Codec::detect(const string &str) const
18 {
19         Decoder *dec = create_decoder();
20         bool result = true;
21         try
22         {
23                 for(string::const_iterator i=str.begin(); i!=str.end(); )
24                         dec->decode_char(str, i);
25         }
26         catch(const CodecError &)
27         {
28                 result = false;
29         }
30
31         delete dec;
32
33         return result;
34 }
35
36 void Codec::Encoder::encode(const ustring &str, string &buf)
37 {
38         for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i)
39                 encode_char(*i, buf);
40 }
41
42 string Codec::Encoder::encode(const ustring &str)
43 {
44         string buf;
45         encode(str, buf);
46         sync(buf);
47         return buf;
48 }
49
50 void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg)
51 {
52         switch(err_mode)
53         {
54         case TRANSLITERATE:
55                 transliterate(ch, buf);
56         case IGNORE_ERRORS:
57                 break;
58         default:
59                 throw CodecError(msg);
60         }
61 }
62
63
64 void Codec::Decoder::decode(const string &str, ustring &buf)
65 {
66         for(string::const_iterator i=str.begin(); i!=str.end();)
67         {
68                 UnicodeChar c = decode_char(str, i);
69                 if(c!=-1)
70                         buf += c;
71         }
72 }
73
74 ustring Codec::Decoder::decode(const string &str)
75 {
76         ustring buf;
77         decode(str, buf);
78         return buf;
79 }
80
81 UnicodeChar Codec::Decoder::error(const string &msg)
82 {
83         switch(err_mode)
84         {
85         case TRANSLITERATE:
86                 return 0xFFFE;
87         case IGNORE_ERRORS:
88                 return -1;
89         default:
90                 throw CodecError(msg);
91         }
92 }
93
94 Codec *create_codec(const string &n)
95 {
96         string name;
97         for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
98         {
99                 if(isupper(*i))
100                         name += tolower(*i);
101                 else if(islower(*i) || isdigit(*i))
102                         name += *i;
103         }
104
105         if(name=="ascii") return new Ascii;
106         if(name=="iso2022jp") return new Iso2022Jp;
107         if(name=="iso646fi") return new Iso646Fi;
108         if(name=="iso88591" || name=="latin1") return new Iso88591;
109         if(name=="iso885915" || name=="latin9") return new Iso885915;
110         if(name=="jisx0201") return new JisX0201;
111         if(name=="jisx0208") return new JisX0208;
112         if(name=="utf8") return new Utf8;
113         if(name=="windows1252" || name=="cp1252") return new Windows1252;
114         throw InvalidParameterValue("Unknown string codec");
115 }
116
117 Codec *detect_codec(const string &str)
118 {
119         bool is_utf8 = true;
120         bool is_ascii = true;
121         bool is_latin1 = true;
122         unsigned utf8_mb = 0;
123
124         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
125         {
126                 unsigned char c = *i;
127                 if(c&0x80)
128                 {
129                         is_ascii = false;
130                         if((c&0xC0)==0x80)
131                         {
132                                 if((c&0xE0)==0x80)
133                                         is_latin1 = false;
134                                 if(utf8_mb)
135                                         --utf8_mb;
136                                 else
137                                         is_utf8 = false;
138                         }
139                         else if((c&0xC0)==0xC0)
140                         {
141                                 if(utf8_mb)
142                                 {
143                                         is_utf8 = false;
144                                         utf8_mb = 0;
145                                 }
146                                 else
147                                 {
148                                         for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
149                                 }
150                         }
151                 }
152                 else if(utf8_mb)
153                 {
154                         is_utf8 = false;
155                         utf8_mb = 0;
156                 }
157         }
158
159         if(is_ascii)
160                 return new Ascii;
161         else if(is_utf8)
162                 return new Utf8;
163         else if(is_latin1)
164                 return new Iso88591;
165         else
166                 return new Windows1252;
167 }
168
169 } // namespace Codecs
170 } // namespace Msp