X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Futf8.cpp;fp=source%2Futf8.cpp;h=0000000000000000000000000000000000000000;hp=c7e1705f814e2d099d3b0180730c57e3aa210d85;hb=b42ed73a1b241c0e93ee03c43c4584b41c549bac;hpb=5b1368cb791cab043f0435628cacbaff36e39b7b diff --git a/source/utf8.cpp b/source/utf8.cpp deleted file mode 100644 index c7e1705..0000000 --- a/source/utf8.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* $Id$ - -This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa -Distributed under the LGPL -*/ - -#include "utf8.h" - -using namespace std; - -namespace Msp { -namespace Codecs { - -void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf) -{ - if(ch<0 || ch>0x10FFFF) - return error(ch, buf, "Can't express character in UTF-8"); - - unsigned bytes = 1; - if(ch>0xFFFF) - bytes = 4; - else if(ch>0x7FF) - bytes = 3; - else if(ch>0x7F) - bytes = 2; - - if(bytes==1) - buf += ch; - else - { - char utf[4]; - - utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6); - for(unsigned j=bytes-1; j>0; --j) - { - utf[j] = 0x80 | (ch&0x3F); - ch >>= 6; - } - - buf.append(utf, bytes); - } -} - -void Utf8::Encoder::transliterate(UnicodeChar, string &buf) -{ - buf.append("\357\277\275", 3); // � U+FFFE Replacement Character -} - - -UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) -{ - if(i==str.end()) - return error("No input"); - - if((*i&0xC0)==0x80) - { - UnicodeChar result = error("UTF-8 tail byte found when expecting head"); - ++i; - return result; - } - else if(*i&0x80) - { - unsigned bytes = 2; - unsigned mask = 0x20; - for(; *i&mask; mask>>=1) - ++bytes; - - string::const_iterator j = i; - - UnicodeChar result = (*j++)&(mask-1); - - unsigned k; - for(k=1; (k>(bytes*5-4)) || !(result>>7)) - result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) - result = error("Invalid Unicode code point"); - - i = j; - return result; - } - else - return *i++; -} - -} // namespace Codecs -} // namespace Msp