]> git.tdb.fi Git - libs/core.git/commitdiff
Fix the UTF-8 decoder
authorMikko Rasa <tdb@tdb.fi>
Mon, 23 Mar 2009 11:49:11 +0000 (11:49 +0000)
committerMikko Rasa <tdb@tdb.fi>
Mon, 23 Mar 2009 11:49:11 +0000 (11:49 +0000)
source/utf8.cpp

index 08dd69a664e97dace581bd3324c049217d184a24..dd01150803186dc68a4687668026a94e23b3dee4 100644 (file)
@@ -71,22 +71,14 @@ UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator
                UnicodeChar result=(*j++)&(mask-1);
 
                unsigned k;
                UnicodeChar result=(*j++)&(mask-1);
 
                unsigned k;
-               for(k=1; (k<bytes && j!=str.end()); ++k)
-               {
-                       if((*j&0xC0)!=0x80)
-                       {
-                               result=error("Incomplete UTF-8 character");
-                               i=j;
-                               return result;
-                       }
+               for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
                        result=(result<<6) | ((*j++)&0x3F);
                        result=(result<<6) | ((*j++)&0x3F);
-               }
 
                if(k<bytes)
 
                if(k<bytes)
-                       result=error("Incomplete UTF-8 character at end of input");
-               else if(!result>>(bytes*6-6))
+                       result=error("Incomplete UTF-8 character");
+               else if(!(result>>(bytes*5-4)) || !(result>>7))
                        result=error("Denormalized UTF-8 multibyte sequence");
                        result=error("Denormalized UTF-8 multibyte sequence");
-               else if(result>0x10FFFF)
+               else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
                        result=error("Invalid Unicode code point");
 
                i=j;
                        result=error("Invalid Unicode code point");
 
                i=j;