utf[0]=0xFF<<(8-bytes) | ch>>(bytes*6-6);
for(unsigned j=bytes-1; j>0; --j)
{
- utf[j]=0x80 | ch&0x3F;
+ utf[j]=0x80 | (ch&0x3F);
ch>>=6;
}
UnicodeChar result=(*j++)&(mask-1);
unsigned k;
- for(k=1; (k<bytes && j!=str.end()); ++k)
- {
- if((*j&0xC0)!=0x80)
- {
- result=error("Incomplete UTF-8 character");
- i=j;
- return result;
- }
- result=result<<6 | (*j++)&0x3F;
- }
+ for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
+ result=(result<<6) | ((*j++)&0x3F);
if(k<bytes)
- result=error("Incomplete UTF-8 character at end of input");
- else if(!result>>(bytes*6-6))
+ result=error("Incomplete UTF-8 character");
+ else if(!(result>>(bytes*5-4)) || !(result>>7))
result=error("Denormalized UTF-8 multibyte sequence");
- else if(result>0x10FFFF)
+ else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
result=error("Invalid Unicode code point");
i=j;