]> git.tdb.fi Git - libs/core.git/blob - source/codec.cpp
Further style and comment adjustments
[libs/core.git] / source / codec.cpp
1 /* $Id$
2
3 This file is part of libmspstrings
4 Copyright © 2006-2007 Mikko Rasa
5 Distributed under the LGPL
6 */
7
8 #include "ascii.h"
9 #include "codec.h"
10 #include "iso2022jp.h"
11 #include "iso646fi.h"
12 #include "iso88591.h"
13 #include "iso885915.h"
14 #include "jisx0201.h"
15 #include "jisx0208.h"
16 #include "utf8.h"
17 #include "windows1252.h"
18
19 using namespace std;
20
21 namespace Msp {
22 namespace Codecs {
23
24 bool Codec::detect(const string &str) const
25 {
26         Decoder *dec = create_decoder();
27         bool result = true;
28         try
29         {
30                 for(string::const_iterator i=str.begin(); i!=str.end(); )
31                         dec->decode_char(str, i);
32         }
33         catch(const CodecError &)
34         {
35                 result = false;
36         }
37
38         delete dec;
39
40         return result;
41 }
42
43 void Codec::Encoder::encode(const ustring &str, string &buf)
44 {
45         for(ustring::const_iterator i=str.begin(); i!=str.end(); ++i)
46                 encode_char(*i, buf);
47 }
48
49 string Codec::Encoder::encode(const ustring &str)
50 {
51         string buf;
52         encode(str, buf);
53         sync(buf);
54         return buf;
55 }
56
57 void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg)
58 {
59         switch(err_mode)
60         {
61         case TRANSLITERATE:
62                 transliterate(ch, buf);
63         case IGNORE_ERRORS:
64                 break;
65         default:
66                 throw CodecError(msg);
67         }
68 }
69
70
71 void Codec::Decoder::decode(const string &str, ustring &buf)
72 {
73         for(string::const_iterator i=str.begin(); i!=str.end();)
74         {
75                 UnicodeChar c = decode_char(str, i);
76                 if(c!=-1)
77                         buf += c;
78         }
79 }
80
81 ustring Codec::Decoder::decode(const string &str)
82 {
83         ustring buf;
84         decode(str, buf);
85         return buf;
86 }
87
88 UnicodeChar Codec::Decoder::error(const string &msg)
89 {
90         switch(err_mode)
91         {
92         case TRANSLITERATE:
93                 return 0xFFFE;
94         case IGNORE_ERRORS:
95                 return -1;
96         default:
97                 throw CodecError(msg);
98         }
99 }
100
101 Codec *create_codec(const string &n)
102 {
103         string name;
104         for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
105         {
106                 if(isupper(*i))
107                         name += tolower(*i);
108                 else if(islower(*i) || isdigit(*i))
109                         name += *i;
110         }
111
112         if(name=="ascii") return new Ascii;
113         if(name=="iso2022jp") return new Iso2022Jp;
114         if(name=="iso646fi") return new Iso646Fi;
115         if(name=="iso88591" || name=="latin1") return new Iso88591;
116         if(name=="iso885915" || name=="latin9") return new Iso885915;
117         if(name=="jisx0201") return new JisX0201;
118         if(name=="jisx0208") return new JisX0208;
119         if(name=="utf8") return new Utf8;
120         if(name=="windows1252" || name=="cp1252") return new Windows1252;
121         throw InvalidParameterValue("Unknown string codec");
122 }
123
124 Codec *detect_codec(const string &str)
125 {
126         bool is_utf8 = true;
127         bool is_ascii = true;
128         bool is_latin1 = true;
129         unsigned utf8_mb = 0;
130
131         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
132         {
133                 unsigned char c = *i;
134                 if(c&0x80)
135                 {
136                         is_ascii = false;
137                         if((c&0xC0)==0x80)
138                         {
139                                 if((c&0xE0)==0x80)
140                                         is_latin1 = false;
141                                 if(utf8_mb)
142                                         --utf8_mb;
143                                 else
144                                         is_utf8 = false;
145                         }
146                         else if((c&0xC0)==0xC0)
147                         {
148                                 if(utf8_mb)
149                                 {
150                                         is_utf8 = false;
151                                         utf8_mb = 0;
152                                 }
153                                 else
154                                 {
155                                         for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
156                                 }
157                         }
158                 }
159                 else if(utf8_mb)
160                 {
161                         is_utf8 = false;
162                         utf8_mb = 0;
163                 }
164         }
165
166         if(is_ascii)
167                 return new Ascii;
168         else if(is_utf8)
169                 return new Utf8;
170         else if(is_latin1)
171                 return new Iso88591;
172         else
173                 return new Windows1252;
174 }
175
176 } // namespace Codecs
177 } // namespace Msp