From: Mikko Rasa Date: Sun, 5 Jun 2011 19:58:18 +0000 (+0300) Subject: Put unichar and ustring in their own file X-Git-Url: http://git.tdb.fi/?a=commitdiff_plain;h=9a79b9ca04d934e8e5b865a1d2b6719f76ec9d15;p=libs%2Fcore.git Put unichar and ustring in their own file --- diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index cb80066..5acbe7b 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -3,14 +3,11 @@ #include #include +#include "ustring.h" namespace Msp { namespace StringCodec { -typedef int unichar; - -typedef std::basic_string ustring; - enum ErrorMode { THROW_ON_ERROR, diff --git a/source/stringcodec/ustring.h b/source/stringcodec/ustring.h new file mode 100644 index 0000000..1a0a64e --- /dev/null +++ b/source/stringcodec/ustring.h @@ -0,0 +1,19 @@ +#ifndef MSP_STRINGCODEC_USTRING_H_ +#define MSP_STRINGCODEC_USTRING_H_ + +#include + +namespace Msp { +namespace StringCodec { + +typedef int unichar; + +typedef std::basic_string ustring; + +inline bool is_valid_unichar(unichar ch) +{ return ch>=0 && ch<=0x10FFFF && (ch<0xD800 || ch>0xDFFF) && (ch&0xFFFE)!=0xFFFE; } + +} // namespace StringCodec +} // namespace Msp + +#endif diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index 19fe488..4c75d8b 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -71,7 +71,7 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) result = error("Incomplete UTF-8 character"); else if(!(result>>(bytes*5-4)) || !(result>>7)) result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) + else if(!is_valid_unichar(result)) result = error("Invalid Unicode code point"); i = j;