X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Futf8.cpp;h=b75b39780edfa6269ad09fa780ca9a565f574872;hp=f56ba546064ee0c234f8d47af5a2d4fc25d0fb04;hb=8245b8036c8bdc51625616ca6248b0f2b0271dc0;hpb=02794ef3620d0d9cc3b8f1c0d8f2995c825fdf4f diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index f56ba54..b75b397 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -7,8 +7,8 @@ namespace StringCodec { void Utf8::Encoder::encode_char(unichar ch, string &buf) { - if(ch<0 || ch>0x10FFFF) - return error(ch, buf, "Can't express character in UTF-8"); + if(!is_valid_unichar(ch)) + return error(ch, buf, invalid_character(ch, "UTF-8")); unsigned bytes = 1; if(ch>0xFFFF) @@ -37,18 +37,18 @@ void Utf8::Encoder::encode_char(unichar ch, string &buf) void Utf8::Encoder::transliterate(unichar, string &buf) { - buf.append("\357\277\275", 3); // � U+FFFE Replacement Character + buf.append("\357\277\275", 3); // � U+FFFD Replacement Character } unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) { if(i==str.end()) - return error("No input"); + return -1; if((*i&0xC0)==0x80) { - unichar result = error("UTF-8 tail byte found when expecting head"); + unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte")); ++i; return result; } @@ -68,11 +68,11 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) result = (result<<6) | ((*j++)&0x3F); if(k>(bytes*5-4)) || !(result>>7)) - result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) - result = error("Invalid Unicode code point"); + result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence")); + else if(!is_valid_unichar(result)) + result = error(invalid_sequence(i, j, "undefined UTF-8 character")); i = j; return result;