3 This file is part of libmspstrings
4 Copyright © 2006-2007 Mikko Rasa
5 Distributed under the LGPL
15 void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf)
17 if(ch<0 || ch>0x10FFFF)
18 return error(ch, buf, "Can't express character in UTF-8");
34 utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6);
35 for(unsigned j=bytes-1; j>0; --j)
37 utf[j] = 0x80 | (ch&0x3F);
41 buf.append(utf, bytes);
45 void Utf8::Encoder::transliterate(UnicodeChar, string &buf)
47 buf.append("\357\277\275", 3); // � U+FFFE Replacement Character
51 UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
54 return error("No input");
58 UnicodeChar result = error("UTF-8 tail byte found when expecting head");
66 for(; *i&mask; mask>>=1)
69 string::const_iterator j = i;
71 UnicodeChar result = (*j++)&(mask-1);
74 for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
75 result = (result<<6) | ((*j++)&0x3F);
78 result = error("Incomplete UTF-8 character");
79 else if(!(result>>(bytes*5-4)) || !(result>>7))
80 result = error("Denormalized UTF-8 multibyte sequence");
81 else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
82 result = error("Invalid Unicode code point");