]> git.tdb.fi Git - libs/core.git/blobdiff - source/utf8.cpp
More sophisticated error handling
[libs/core.git] / source / utf8.cpp
index f95ad9c27a9ea1b76b86d273432f3c74b21b7fea..030406d3abcfc84dc32fd3d73a8812746d36d41d 100644 (file)
@@ -8,7 +8,10 @@ void Utf8::Encoder::encode_char(wchar_t c)
 {
        unsigned code=c;
        if(code>0x10FFFF)
 {
        unsigned code=c;
        if(code>0x10FFFF)
-               throw CodecError("Can't express character in UTF-8");
+       {
+               error("Can't express character in UTF-8");
+               return;
+       }
 
        unsigned bytes=1;
        if(code>0xFFFF)
 
        unsigned bytes=1;
        if(code>0xFFFF)
@@ -43,8 +46,11 @@ void Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
                if(bytes==0)
                {
                        if((*i&0xC0)==0x80)
                if(bytes==0)
                {
                        if((*i&0xC0)==0x80)
-                               throw CodecError("Invalid UTF-8 string (tail byte when expecting head)");
-
+                       {
+                               error("Invalid UTF-8 string (tail byte when expecting head)");
+                               ++i;
+                               break;
+                       }
                        else if(*i&0x80)
                        {
                                unsigned mask=0x40;
                        else if(*i&0x80)
                        {
                                unsigned mask=0x40;
@@ -52,11 +58,20 @@ void Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
                                        ++bytes;
 
                                if(bytes>3)
                                        ++bytes;
 
                                if(bytes>3)
-                                       throw CodecError("Invalid UTF-8 string (overlong multibyte sequence)");
-
-                               code=(*i++)&(mask-1);
-                               if(!code)
-                                       throw CodecError("Invalid UTF-8 string (denormalized multibyte sequence)");
+                               {
+                                       error("Invalid UTF-8 string (overlong multibyte sequence)");
+                                       ++i;
+                                       break;
+                               }
+                               else
+                               {
+                                       code=(*i++)&(mask-1);
+                                       if(!code)
+                                       {
+                                               error("Invalid UTF-8 string (denormalized multibyte sequence)");
+                                               break;
+                                       }
+                               }
                        }
                        else
                        {
                        }
                        else
                        {
@@ -67,7 +82,11 @@ void Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
                else
                {
                        if((*i&0xC0)!=0x80)
                else
                {
                        if((*i&0xC0)!=0x80)
-                               throw CodecError("Invalid UTF-8 string (head byte when expecting tail)");
+                       {
+                               error("Invalid UTF-8 string (head byte when expecting tail)");
+                               ++i;
+                               break;
+                       }
 
                        code=code<<6 | (*i++)&0x3F;
                        --bytes;
 
                        code=code<<6 | (*i++)&0x3F;
                        --bytes;
@@ -75,8 +94,9 @@ void Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
                        if(!bytes)
                        {
                                if(code>0x10FFFF)
                        if(!bytes)
                        {
                                if(code>0x10FFFF)
-                                       throw CodecError("Invalid UTF-8 string (character out of range)");
-                               append(code);
+                                       error("Invalid UTF-8 string (character out of range)");
+                               else
+                                       append(code);
                                break;
                        }
                }
                                break;
                        }
                }
@@ -86,7 +106,10 @@ void Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
 void Utf8::Decoder::sync()
 {
        if(bytes)
 void Utf8::Decoder::sync()
 {
        if(bytes)
-               throw CodecError("Sync in the middle of multibyte UTF-8 sequence");
+       {
+               error("Sync in the middle of multibyte UTF-8 sequence");
+               bytes=0;
+       }
 }
 
 void Utf8::Decoder::reset()
 }
 
 void Utf8::Decoder::reset()