X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Futf8.cpp;h=b75b39780edfa6269ad09fa780ca9a565f574872;hp=19fe488282561dafbe1d4b788ec3bee0b8cb1b18;hb=8245b8036c8bdc51625616ca6248b0f2b0271dc0;hpb=c5cb2162baeeb7c750595e07ba1cbfcb03702f77 diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index 19fe488..b75b397 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -7,8 +7,8 @@ namespace StringCodec { void Utf8::Encoder::encode_char(unichar ch, string &buf) { - if(ch<0 || ch>0x10FFFF) - return error(ch, buf, "Can't express character in UTF-8"); + if(!is_valid_unichar(ch)) + return error(ch, buf, invalid_character(ch, "UTF-8")); unsigned bytes = 1; if(ch>0xFFFF) @@ -48,7 +48,7 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) if((*i&0xC0)==0x80) { - unichar result = error("UTF-8 tail byte found when expecting head"); + unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte")); ++i; return result; } @@ -68,11 +68,11 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) result = (result<<6) | ((*j++)&0x3F); if(k>(bytes*5-4)) || !(result>>7)) - result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) - result = error("Invalid Unicode code point"); + result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence")); + else if(!is_valid_unichar(result)) + result = error(invalid_sequence(i, j, "undefined UTF-8 character")); i = j; return result;