6 namespace StringCodec {
8 void Utf8::Encoder::encode_char(unichar ch, string &buf)
10 if(!is_valid_unichar(ch))
11 return error(ch, buf, invalid_character(ch, "UTF-8"));
27 utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6);
28 for(unsigned j=bytes-1; j>0; --j)
30 utf[j] = 0x80 | (ch&0x3F);
34 buf.append(utf, bytes);
38 void Utf8::Encoder::transliterate(unichar, string &buf)
40 buf.append("\357\277\275", 3); // � U+FFFD Replacement Character
44 unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
51 unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte"));
59 for(; *i&mask; mask>>=1)
64 unichar result = (*j++)&(mask-1);
67 for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
68 result = (result<<6) | ((*j++)&0x3F);
71 result = error(invalid_sequence(i, j, "incomplete UTF-8 character"));
72 else if(!(result>>(bytes*5-4)) || !(result>>7))
73 result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence"));
74 else if(!is_valid_unichar(result))
75 result = error(invalid_sequence(i, j, "undefined UTF-8 character"));
84 } // namespace StringCodec