]> git.tdb.fi Git - libs/core.git/blob - source/stringcodec/codec.cpp
Use C++11 features with containers
[libs/core.git] / source / stringcodec / codec.cpp
1 #include "ascii.h"
2 #include "codec.h"
3 #include "iso2022jp.h"
4 #include "iso646fi.h"
5 #include "iso88591.h"
6 #include "iso885915.h"
7 #include "jisx0201.h"
8 #include "jisx0208.h"
9 #include "utf16.h"
10 #include "utf8.h"
11 #include "windows1252.h"
12
13 using namespace std;
14
15 namespace Msp {
16 namespace StringCodec {
17
18 bool Codec::detect(const string &str) const
19 {
20         Decoder *dec = create_decoder(IGNORE_ERRORS);
21
22         bool result = true;
23         for(auto i=str.begin(); (result && i!=str.end()); )
24                 result = (dec->decode_char(str, i)!=-1);
25
26         delete dec;
27
28         return result;
29 }
30
31 void Codec::Encoder::encode(const ustring &str, string &buf)
32 {
33         for(unichar c: str)
34                 encode_char(c, buf);
35 }
36
37 string Codec::Encoder::encode(const ustring &str)
38 {
39         string buf;
40         encode(str, buf);
41         sync(buf);
42         return buf;
43 }
44
45
46
47 void Codec::Decoder::decode(const string &str, ustring &buf)
48 {
49         for(auto i=str.begin(); i!=str.end();)
50         {
51                 unichar c = decode_char(str, i);
52                 if(c!=-1)
53                         buf += c;
54         }
55 }
56
57 ustring Codec::Decoder::decode(const string &str)
58 {
59         ustring buf;
60         decode(str, buf);
61         return buf;
62 }
63
64 Codec *create_codec(const string &n)
65 {
66         string name;
67         string::const_iterator i;
68         for(i=n.begin(); i!=n.end(); ++i)
69         {
70                 if(*i==':')
71                         break;
72                 else if(isupper(*i))
73                         name += tolower(*i);
74                 else if(islower(*i) || isdigit(*i))
75                         name += *i;
76         }
77
78         ErrorMode em = DEFAULT;
79         if(i!=n.end() && *i==':')
80         {
81                 string em_str(i+1, n.end());
82                 if(em_str=="throw")
83                         em = THROW_ON_ERROR;
84                 else if(em_str=="ignore")
85                         em = IGNORE_ERRORS;
86                 else if(em_str=="trans" || em_str=="transliterate")
87                         em = TRANSLITERATE;
88                 else
89                         throw invalid_argument("invalid error mode");
90         }
91
92         if(name=="ascii") return new Ascii(em);
93         if(name=="iso2022jp") return new Iso2022Jp(em);
94         if(name=="iso646fi") return new Iso646Fi(em);
95         if(name=="iso88591" || name=="latin1") return new Iso88591(em);
96         if(name=="iso885915" || name=="latin9") return new Iso885915(em);
97         if(name=="jisx0201") return new JisX0201(em);
98         if(name=="jisx0208") return new JisX0208(em);
99         if(name=="utf8") return new Utf8(em);
100         if(name=="utf16") return new Utf16(em, Utf16::AUTO);
101         if(name=="utf16be") return new Utf16(em, Utf16::BIG);
102         if(name=="utf16le") return new Utf16(em, Utf16::LITTLE);
103         if(name=="windows1252" || name=="cp1252") return new Windows1252(em);
104         throw invalid_argument("unknown string codec");
105 }
106
107 Codec *detect_codec(const string &str)
108 {
109         bool is_utf8 = true;
110         bool is_ascii = true;
111         bool is_latin1 = true;
112         unsigned utf8_mb = 0;
113
114         for(char c: str)
115         {
116                 if(c&0x80)
117                 {
118                         is_ascii = false;
119                         if((c&0xC0)==0x80)
120                         {
121                                 if((c&0xE0)==0x80)
122                                         is_latin1 = false;
123                                 if(utf8_mb)
124                                         --utf8_mb;
125                                 else
126                                         is_utf8 = false;
127                         }
128                         else if((c&0xC0)==0xC0)
129                         {
130                                 if(utf8_mb)
131                                 {
132                                         is_utf8 = false;
133                                         utf8_mb = 0;
134                                 }
135                                 else
136                                 {
137                                         for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
138                                 }
139                         }
140                 }
141                 else if(utf8_mb)
142                 {
143                         is_utf8 = false;
144                         utf8_mb = 0;
145                 }
146         }
147
148         if(is_ascii)
149                 return new Ascii;
150         else if(is_utf8)
151                 return new Utf8;
152         else if(is_latin1)
153                 return new Iso88591;
154         else
155                 return new Windows1252;
156 }
157
158 } // namespace StringCodec
159 } // namespace Msp