X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Futf16.cpp;fp=source%2Fstringcodec%2Futf16.cpp;h=fed994ef9e76d03dd6deaae0ee531911bd9da4e8;hp=0000000000000000000000000000000000000000;hb=fdb0d473cc3e10dc40b8dcc98ebfff1acb91951d;hpb=8245b8036c8bdc51625616ca6248b0f2b0271dc0

diff --git a/source/stringcodec/utf16.cpp b/source/stringcodec/utf16.cpp
new file mode 100644
index 0000000..fed994e
--- /dev/null
+++ b/source/stringcodec/utf16.cpp
@@ -0,0 +1,138 @@
+#include "utf16.h"
+
+using namespace std;
+
+namespace Msp {
+namespace StringCodec {
+
+Utf16::Encoder::Encoder(ErrorMode em, Endian en):
+	Codec::Encoder(em),
+	endian(en==AUTO ? BIG : en),
+	emit_bom(true)
+{ }
+
+void Utf16::Encoder::encode_char(unichar ch, string &buf)
+{
+	if(!is_valid_unichar(ch))
+		return error(ch, buf, invalid_character(ch, "UTF-16"));
+
+	if(emit_bom)
+	{
+		if(endian==LITTLE)
+			buf.append("\xFF\xFE");
+		else
+			buf.append("\xFE\xFF");
+		emit_bom = false;
+	}
+
+	bool e = (endian==LITTLE);
+	if(ch<0x10000)
+	{
+		char utf[2];
+		utf[e] = ch>>8;
+		utf[1-e] = ch;
+		buf.append(utf, 2);
+	}
+	else
+	{
+		char utf[4];
+		ch -= 0x10000;
+		unichar sur = 0xD800+((ch>>10)&0x3FF);
+		utf[e] = sur>>8;
+		utf[1-e] = sur;
+		sur = 0xDC00+(ch&0x3FF);
+		utf[2+e] = sur>>8;
+		utf[3-e] = sur;
+		buf.append(utf, 4);
+	}
+}
+
+void Utf16::Encoder::transliterate(unichar, std::string &buf)
+{
+	if(endian==LITTLE)
+		buf.append("\xFD\xFF", 2);
+	else
+		buf.append("\xFF\xFD", 2);
+}
+
+
+Utf16::Decoder::Decoder(ErrorMode em, Endian en):
+	Codec::Decoder(em),
+	endian(en)
+{ }
+
+unichar Utf16::Decoder::decode_char(const string &str, string::const_iterator &i)
+{
+	if(i==str.end())
+		return -1;
+
+	string::const_iterator j = i;
+
+	unichar unit = decode_unit(str, i, j);
+	if(unit!=-1)
+	{
+		if(endian==AUTO)
+		{
+			/* Set endian based on the first decoded unit.  If the unit was a BOM,
+			discard it. */
+			if(unit==0xFFFE)
+			{
+				endian = LITTLE;
+				unit = -1;
+			}
+			else
+			{
+				endian = BIG;
+				if(unit==0xFEFF)
+					unit = -1;
+			}
+		}
+
+		if(unit==-1 && j!=str.end())
+			unit = decode_unit(str, i, j);
+	}
+
+	unichar result = -1;
+	if(unit!=-1)
+	{
+		if(unit>=0xD800 && unit<=0xDBFF)
+		{
+			string::const_iterator k = j;
+
+			unichar unit2 = -2;
+			if(k!=str.end())
+				unit2 = decode_unit(str, i, k);
+
+			if(unit2>=0xDC00 && unit2<=0xDFFF)
+			{
+				j = k;
+				result = 0x10000 + ((unit&0x3FF)<<10) + (unit2&0x3FF);
+			}
+			else if(unit2!=-1)
+				result = error(invalid_sequence(i, j, "incomplete UTF-16 surrogate pair"));
+		}
+		else if(unit>=0xDC00 && unit<=0xDFFF)
+			result = error(invalid_sequence(i, j, "stray UTF-16 trail surrogate"));
+		else
+			result = unit;
+	}
+
+	i = j;
+	return result;
+}
+
+unichar Utf16::Decoder::decode_unit(const string &str, const string::const_iterator &i, string::const_iterator &j)
+{
+	unsigned char b1 = *j++;
+	if(j==str.end())
+		return error(invalid_sequence(i, j, "incomplete UTF-16 character"));
+	unsigned char b2 = *j++;
+
+	if(endian==LITTLE)
+		return (b2<<8) | b1;
+	else
+		return (b1<<8) | b2;
+}
+
+} // namespace StringCodec
+} // namespace Msp