--- /dev/null
+/* $Id$
+
+This file is part of libmspstrings
+Copyright © 2006-2007 Mikko Rasa
+Distributed under the LGPL
+*/
+
+#include "utf8.h"
+
+using namespace std;
+
+namespace Msp {
+namespace Codecs {
+
+void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf)
+{
+ if(ch<0 || ch>0x10FFFF)
+ return error(ch, buf, "Can't express character in UTF-8");
+
+ unsigned bytes = 1;
+ if(ch>0xFFFF)
+ bytes = 4;
+ else if(ch>0x7FF)
+ bytes = 3;
+ else if(ch>0x7F)
+ bytes = 2;
+
+ if(bytes==1)
+ buf += ch;
+ else
+ {
+ char utf[4];
+
+ utf[0] = 0xFF<<(8-bytes) | ch>>(bytes*6-6);
+ for(unsigned j=bytes-1; j>0; --j)
+ {
+ utf[j] = 0x80 | (ch&0x3F);
+ ch >>= 6;
+ }
+
+ buf.append(utf, bytes);
+ }
+}
+
+void Utf8::Encoder::transliterate(UnicodeChar, string &buf)
+{
+ buf.append("\357\277\275", 3); // � U+FFFE Replacement Character
+}
+
+
+UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
+{
+ if(i==str.end())
+ return error("No input");
+
+ if((*i&0xC0)==0x80)
+ {
+ UnicodeChar result = error("UTF-8 tail byte found when expecting head");
+ ++i;
+ return result;
+ }
+ else if(*i&0x80)
+ {
+ unsigned bytes = 2;
+ unsigned mask = 0x20;
+ for(; *i&mask; mask>>=1)
+ ++bytes;
+
+ string::const_iterator j = i;
+
+ UnicodeChar result = (*j++)&(mask-1);
+
+ unsigned k;
+ for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
+ result = (result<<6) | ((*j++)&0x3F);
+
+ if(k<bytes)
+ result = error("Incomplete UTF-8 character");
+ else if(!(result>>(bytes*5-4)) || !(result>>7))
+ result = error("Denormalized UTF-8 multibyte sequence");
+ else if(result>0x10FFFF || (result>=0xD800 && result<=0xDFFF))
+ result = error("Invalid Unicode code point");
+
+ i = j;
+ return result;
+ }
+ else
+ return *i++;
+}
+
+} // namespace Codecs
+} // namespace Msp