From 9a79b9ca04d934e8e5b865a1d2b6719f76ec9d15 Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Sun, 5 Jun 2011 22:58:18 +0300 Subject: [PATCH] Put unichar and ustring in their own file --- source/stringcodec/codec.h | 5 +---- source/stringcodec/ustring.h | 19 +++++++++++++++++++ source/stringcodec/utf8.cpp | 2 +- 3 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 source/stringcodec/ustring.h diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index cb80066..5acbe7b 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -3,14 +3,11 @@ #include #include +#include "ustring.h" namespace Msp { namespace StringCodec { -typedef int unichar; - -typedef std::basic_string ustring; - enum ErrorMode { THROW_ON_ERROR, diff --git a/source/stringcodec/ustring.h b/source/stringcodec/ustring.h new file mode 100644 index 0000000..1a0a64e --- /dev/null +++ b/source/stringcodec/ustring.h @@ -0,0 +1,19 @@ +#ifndef MSP_STRINGCODEC_USTRING_H_ +#define MSP_STRINGCODEC_USTRING_H_ + +#include + +namespace Msp { +namespace StringCodec { + +typedef int unichar; + +typedef std::basic_string ustring; + +inline bool is_valid_unichar(unichar ch) +{ return ch>=0 && ch<=0x10FFFF && (ch<0xD800 || ch>0xDFFF) && (ch&0xFFFE)!=0xFFFE; } + +} // namespace StringCodec +} // namespace Msp + +#endif diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index 19fe488..4c75d8b 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -71,7 +71,7 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) result = error("Incomplete UTF-8 character"); else if(!(result>>(bytes*5-4)) || !(result>>7)) result = error("Denormalized UTF-8 multibyte sequence"); - else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF)) + else if(!is_valid_unichar(result)) result = error("Invalid Unicode code point"); i = j; -- 2.43.0