]> git.tdb.fi Git - libs/core.git/blob - source/stringcodec/codec.cpp
Avoid using an exception in a non-error situation
[libs/core.git] / source / stringcodec / codec.cpp
1 #include "ascii.h"
2 #include "codec.h"
3 #include "iso2022jp.h"
4 #include "iso646fi.h"
5 #include "iso88591.h"
6 #include "iso885915.h"
7 #include "jisx0201.h"
8 #include "jisx0208.h"
9 #include "utf8.h"
10 #include "windows1252.h"
11
12 using namespace std;
13
14 namespace Msp {
15 namespace StringCodec {
16
17 bool Codec::detect(const string &str) const
18 {
19         Decoder *dec = create_decoder(IGNORE_ERRORS);
20
21         bool result = true;
22         for(string::const_iterator i=str.begin(); (result && i!=str.end()); )
23                 result = (dec->decode_char(str, i)!=-1);
24
25         delete dec;
26
27         return result;
28 }
29
30 void Codec::Encoder::encode(const ustring &str, string &buf)
31 {
32         for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i)
33                 encode_char(*i, buf);
34 }
35
36 string Codec::Encoder::encode(const ustring &str)
37 {
38         string buf;
39         encode(str, buf);
40         sync(buf);
41         return buf;
42 }
43
44 void Codec::Encoder::error(unichar ch, string &buf, const string &msg)
45 {
46         switch(err_mode)
47         {
48         case TRANSLITERATE:
49                 transliterate(ch, buf);
50         case IGNORE_ERRORS:
51                 break;
52         default:
53                 throw CodecError(msg);
54         }
55 }
56
57
58 void Codec::Decoder::decode(const string &str, ustring &buf)
59 {
60         for(string::const_iterator i=str.begin(); i!=str.end();)
61         {
62                 unichar c = decode_char(str, i);
63                 if(c!=-1)
64                         buf += c;
65         }
66 }
67
68 ustring Codec::Decoder::decode(const string &str)
69 {
70         ustring buf;
71         decode(str, buf);
72         return buf;
73 }
74
75 unichar Codec::Decoder::error(const string &msg)
76 {
77         switch(err_mode)
78         {
79         case TRANSLITERATE:
80                 return 0xFFFD;
81         case IGNORE_ERRORS:
82                 return -1;
83         default:
84                 throw CodecError(msg);
85         }
86 }
87
88 Codec *create_codec(const string &n)
89 {
90         string name;
91         for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
92         {
93                 if(isupper(*i))
94                         name += tolower(*i);
95                 else if(islower(*i) || isdigit(*i))
96                         name += *i;
97         }
98
99         if(name=="ascii") return new Ascii;
100         if(name=="iso2022jp") return new Iso2022Jp;
101         if(name=="iso646fi") return new Iso646Fi;
102         if(name=="iso88591" || name=="latin1") return new Iso88591;
103         if(name=="iso885915" || name=="latin9") return new Iso885915;
104         if(name=="jisx0201") return new JisX0201;
105         if(name=="jisx0208") return new JisX0208;
106         if(name=="utf8") return new Utf8;
107         if(name=="windows1252" || name=="cp1252") return new Windows1252;
108         throw InvalidParameterValue("Unknown string codec");
109 }
110
111 Codec *detect_codec(const string &str)
112 {
113         bool is_utf8 = true;
114         bool is_ascii = true;
115         bool is_latin1 = true;
116         unsigned utf8_mb = 0;
117
118         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
119         {
120                 unsigned char c = *i;
121                 if(c&0x80)
122                 {
123                         is_ascii = false;
124                         if((c&0xC0)==0x80)
125                         {
126                                 if((c&0xE0)==0x80)
127                                         is_latin1 = false;
128                                 if(utf8_mb)
129                                         --utf8_mb;
130                                 else
131                                         is_utf8 = false;
132                         }
133                         else if((c&0xC0)==0xC0)
134                         {
135                                 if(utf8_mb)
136                                 {
137                                         is_utf8 = false;
138                                         utf8_mb = 0;
139                                 }
140                                 else
141                                 {
142                                         for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
143                                 }
144                         }
145                 }
146                 else if(utf8_mb)
147                 {
148                         is_utf8 = false;
149                         utf8_mb = 0;
150                 }
151         }
152
153         if(is_ascii)
154                 return new Ascii;
155         else if(is_utf8)
156                 return new Utf8;
157         else if(is_latin1)
158                 return new Iso88591;
159         else
160                 return new Windows1252;
161 }
162
163 } // namespace StringCodec
164 } // namespace Msp