6 namespace StringCodec {
8 Utf16::Encoder::Encoder(ErrorMode em, Endian en):
10 endian(en==AUTO ? BIG : en),
14 void Utf16::Encoder::encode_char(unichar ch, string &buf)
16 if(!is_valid_unichar(ch))
17 return error(ch, buf, invalid_character(ch, "UTF-16"));
22 buf.append("\xFF\xFE");
24 buf.append("\xFE\xFF");
28 bool e = (endian==LITTLE);
40 unichar sur = 0xD800+((ch>>10)&0x3FF);
43 sur = 0xDC00+(ch&0x3FF);
50 void Utf16::Encoder::transliterate(unichar, std::string &buf)
53 buf.append("\xFD\xFF", 2);
55 buf.append("\xFF\xFD", 2);
59 Utf16::Decoder::Decoder(ErrorMode em, Endian en):
64 unichar Utf16::Decoder::decode_char(const string &str, string::const_iterator &i)
69 string::const_iterator j = i;
71 unichar unit = decode_unit(str, i, j);
76 /* Set endian based on the first decoded unit. If the unit was a BOM,
91 if(unit==-1 && j!=str.end())
92 unit = decode_unit(str, i, j);
98 if(unit>=0xD800 && unit<=0xDBFF)
100 string::const_iterator k = j;
104 unit2 = decode_unit(str, i, k);
106 if(unit2>=0xDC00 && unit2<=0xDFFF)
109 result = 0x10000 + ((unit&0x3FF)<<10) + (unit2&0x3FF);
112 result = error(invalid_sequence(i, j, "incomplete UTF-16 surrogate pair"));
114 else if(unit>=0xDC00 && unit<=0xDFFF)
115 result = error(invalid_sequence(i, j, "stray UTF-16 trail surrogate"));
124 unichar Utf16::Decoder::decode_unit(const string &str, const string::const_iterator &i, string::const_iterator &j)
126 unsigned char b1 = *j++;
128 return error(invalid_sequence(i, j, "incomplete UTF-16 character"));
129 unsigned char b2 = *j++;
137 } // namespace StringCodec