]> git.tdb.fi Git - libs/core.git/blob - source/stringcodec/codec.cpp
Exception changes
[libs/core.git] / source / stringcodec / codec.cpp
1 #include "ascii.h"
2 #include "codec.h"
3 #include "iso2022jp.h"
4 #include "iso646fi.h"
5 #include "iso88591.h"
6 #include "iso885915.h"
7 #include "jisx0201.h"
8 #include "jisx0208.h"
9 #include "utf8.h"
10 #include "windows1252.h"
11
12 using namespace std;
13
14 namespace Msp {
15 namespace StringCodec {
16
17 bool Codec::detect(const string &str) const
18 {
19         Decoder *dec = create_decoder(IGNORE_ERRORS);
20
21         bool result = true;
22         for(string::const_iterator i=str.begin(); (result && i!=str.end()); )
23                 result = (dec->decode_char(str, i)!=-1);
24
25         delete dec;
26
27         return result;
28 }
29
30 void Codec::Encoder::encode(const ustring &str, string &buf)
31 {
32         for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i)
33                 encode_char(*i, buf);
34 }
35
36 string Codec::Encoder::encode(const ustring &str)
37 {
38         string buf;
39         encode(str, buf);
40         sync(buf);
41         return buf;
42 }
43
44
45
46 void Codec::Decoder::decode(const string &str, ustring &buf)
47 {
48         for(string::const_iterator i=str.begin(); i!=str.end();)
49         {
50                 unichar c = decode_char(str, i);
51                 if(c!=-1)
52                         buf += c;
53         }
54 }
55
56 ustring Codec::Decoder::decode(const string &str)
57 {
58         ustring buf;
59         decode(str, buf);
60         return buf;
61 }
62
63 Codec *create_codec(const string &n)
64 {
65         string name;
66         for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
67         {
68                 if(isupper(*i))
69                         name += tolower(*i);
70                 else if(islower(*i) || isdigit(*i))
71                         name += *i;
72         }
73
74         if(name=="ascii") return new Ascii;
75         if(name=="iso2022jp") return new Iso2022Jp;
76         if(name=="iso646fi") return new Iso646Fi;
77         if(name=="iso88591" || name=="latin1") return new Iso88591;
78         if(name=="iso885915" || name=="latin9") return new Iso885915;
79         if(name=="jisx0201") return new JisX0201;
80         if(name=="jisx0208") return new JisX0208;
81         if(name=="utf8") return new Utf8;
82         if(name=="windows1252" || name=="cp1252") return new Windows1252;
83         throw invalid_argument("unknown string codec");
84 }
85
86 Codec *detect_codec(const string &str)
87 {
88         bool is_utf8 = true;
89         bool is_ascii = true;
90         bool is_latin1 = true;
91         unsigned utf8_mb = 0;
92
93         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
94         {
95                 unsigned char c = *i;
96                 if(c&0x80)
97                 {
98                         is_ascii = false;
99                         if((c&0xC0)==0x80)
100                         {
101                                 if((c&0xE0)==0x80)
102                                         is_latin1 = false;
103                                 if(utf8_mb)
104                                         --utf8_mb;
105                                 else
106                                         is_utf8 = false;
107                         }
108                         else if((c&0xC0)==0xC0)
109                         {
110                                 if(utf8_mb)
111                                 {
112                                         is_utf8 = false;
113                                         utf8_mb = 0;
114                                 }
115                                 else
116                                 {
117                                         for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
118                                 }
119                         }
120                 }
121                 else if(utf8_mb)
122                 {
123                         is_utf8 = false;
124                         utf8_mb = 0;
125                 }
126         }
127
128         if(is_ascii)
129                 return new Ascii;
130         else if(is_utf8)
131                 return new Utf8;
132         else if(is_latin1)
133                 return new Iso88591;
134         else
135                 return new Windows1252;
136 }
137
138 } // namespace StringCodec
139 } // namespace Msp