From: Mikko Rasa Date: Mon, 23 Mar 2009 11:49:11 +0000 (+0000) Subject: Fix the UTF-8 decoder X-Git-Tag: strings-1.1~3 X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=commitdiff_plain;h=a0a5c796a6ec11a13c49912672a82bf1857bbc71 Fix the UTF-8 decoder --- diff --git a/source/utf8.cpp b/source/utf8.cpp index 08dd69a..dd01150 100644 --- a/source/utf8.cpp +++ b/source/utf8.cpp @@ -71,22 +71,14 @@ UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator UnicodeChar result=(*j++)&(mask-1); unsigned k; - for(k=1; (k>(bytes*6-6)) + result=error("Incomplete UTF-8 character"); + else if(!(result>>(bytes*5-4)) || !(result>>7)) result=error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF) + else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) result=error("Invalid Unicode code point"); i=j;