X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Fstringcodec%2Futf8.cpp;h=4c75d8b073e277c2adc2f38db5f3db806da7b0e5;hb=9a79b9ca04d934e8e5b865a1d2b6719f76ec9d15;hp=c7e1705f814e2d099d3b0180730c57e3aa210d85;hpb=b56eb5ec1da675da0c66abc53c1e4f6c4e4cccbd;p=libs%2Fcore.git diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index c7e1705..4c75d8b 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -1,18 +1,11 @@ -/* $Id$ - -This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa -Distributed under the LGPL -*/ - #include "utf8.h" using namespace std; namespace Msp { -namespace Codecs { +namespace StringCodec { -void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf) +void Utf8::Encoder::encode_char(unichar ch, string &buf) { if(ch<0 || ch>0x10FFFF) return error(ch, buf, "Can't express character in UTF-8"); @@ -42,20 +35,20 @@ void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf) } } -void Utf8::Encoder::transliterate(UnicodeChar, string &buf) +void Utf8::Encoder::transliterate(unichar, string &buf) { - buf.append("\357\277\275", 3); // � U+FFFE Replacement Character + buf.append("\357\277\275", 3); // � U+FFFD Replacement Character } -UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) +unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) { if(i==str.end()) - return error("No input"); + return -1; if((*i&0xC0)==0x80) { - UnicodeChar result = error("UTF-8 tail byte found when expecting head"); + unichar result = error("UTF-8 tail byte found when expecting head"); ++i; return result; } @@ -68,7 +61,7 @@ UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator string::const_iterator j = i; - UnicodeChar result = (*j++)&(mask-1); + unichar result = (*j++)&(mask-1); unsigned k; for(k=1; (k>(bytes*5-4)) || !(result>>7)) result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) + else if(!is_valid_unichar(result)) result = error("Invalid Unicode code point"); i = j; @@ -88,5 +81,5 @@ UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator return *i++; } -} // namespace Codecs +} // namespace StringCodec } // namespace Msp