using namespace std;
namespace Msp {
-namespace Codecs {
+namespace StringCodec {
-void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Utf8::Encoder::encode_char(unichar ch, string &buf)
{
- if(ch<0 || ch>0x10FFFF)
- return error(ch, buf, "Can't express character in UTF-8");
+ if(!is_valid_unichar(ch))
+ return error(ch, buf, invalid_character(ch, "UTF-8"));
unsigned bytes = 1;
if(ch>0xFFFF)
}
}
-void Utf8::Encoder::transliterate(UnicodeChar, string &buf)
+void Utf8::Encoder::transliterate(unichar, string &buf)
{
- buf.append("\357\277\275", 3); // � U+FFFE Replacement Character
+ buf.append("\357\277\275", 3); // � U+FFFD Replacement Character
}
-UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
- return error("No input");
+ return -1;
if((*i&0xC0)==0x80)
{
- UnicodeChar result = error("UTF-8 tail byte found when expecting head");
+ unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte"));
++i;
return result;
}
for(; *i&mask; mask>>=1)
++bytes;
- string::const_iterator j = i;
+ auto j = i;
- UnicodeChar result = (*j++)&(mask-1);
+ unichar result = (*j++)&(mask-1);
unsigned k;
for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
result = (result<<6) | ((*j++)&0x3F);
if(k<bytes)
- result = error("Incomplete UTF-8 character");
+ result = error(invalid_sequence(i, j, "incomplete UTF-8 character"));
else if(!(result>>(bytes*5-4)) || !(result>>7))
- result = error("Denormalized UTF-8 multibyte sequence");
- else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
- result = error("Invalid Unicode code point");
+ result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence"));
+ else if(!is_valid_unichar(result))
+ result = error(invalid_sequence(i, j, "undefined UTF-8 character"));
i = j;
return result;
return *i++;
}
-} // namespace Codecs
+} // namespace StringCodec
} // namespace Msp