6 namespace StringCodec {
8 Utf16::Encoder::Encoder(ErrorMode em, Endian en):
10 endian(en==AUTO ? BIG : en)
13 void Utf16::Encoder::encode_char(unichar ch, string &buf)
15 if(!is_valid_unichar(ch))
16 return error(ch, buf, invalid_character(ch, "UTF-16"));
21 buf.append("\xFF\xFE");
23 buf.append("\xFE\xFF");
27 bool e = (endian==LITTLE);
39 unichar sur = 0xD800+((ch>>10)&0x3FF);
42 sur = 0xDC00+(ch&0x3FF);
49 void Utf16::Encoder::transliterate(unichar, string &buf)
52 buf.append("\xFD\xFF", 2);
54 buf.append("\xFF\xFD", 2);
58 Utf16::Decoder::Decoder(ErrorMode em, Endian en):
63 unichar Utf16::Decoder::decode_char(const string &str, string::const_iterator &i)
70 unichar unit = decode_unit(str, i, j);
75 /* Set endian based on the first decoded unit. If the unit was a BOM,
90 if(unit==-1 && j!=str.end())
91 unit = decode_unit(str, i, j);
97 if(unit>=0xD800 && unit<=0xDBFF)
103 unit2 = decode_unit(str, i, k);
105 if(unit2>=0xDC00 && unit2<=0xDFFF)
108 result = 0x10000 + ((unit&0x3FF)<<10) + (unit2&0x3FF);
111 result = error(invalid_sequence(i, j, "incomplete UTF-16 surrogate pair"));
113 else if(unit>=0xDC00 && unit<=0xDFFF)
114 result = error(invalid_sequence(i, j, "stray UTF-16 trail surrogate"));
123 unichar Utf16::Decoder::decode_unit(const string &str, const string::const_iterator &i, string::const_iterator &j)
125 unsigned char b1 = *j++;
127 return error(invalid_sequence(i, j, "incomplete UTF-16 character"));
128 unsigned char b2 = *j++;
136 } // namespace StringCodec