namespace Msp {
namespace StringCodec {
-void Ascii::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Ascii::Encoder::encode_char(unichar ch, string &buf)
{
if(ch<0 || ch>0x7F)
return error(ch, buf, "Can't express character in ASCII");
buf += ch;
}
-void Ascii::Encoder::transliterate(UnicodeChar ch, string &buf)
+void Ascii::Encoder::transliterate(unichar ch, string &buf)
{
if(ch>=0xC0 && ch<=0xC5)
buf += 'A';
}
-UnicodeChar Ascii::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Ascii::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
else if(*i&0x80)
{
- UnicodeChar result = error("Undefined ASCII character");
+ unichar result = error("Undefined ASCII character");
++i;
return result;
}
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ASCII"; }
return buf;
}
-void Codec::Encoder::error(UnicodeChar ch, string &buf, const string &msg)
+void Codec::Encoder::error(unichar ch, string &buf, const string &msg)
{
switch(err_mode)
{
{
for(string::const_iterator i=str.begin(); i!=str.end();)
{
- UnicodeChar c = decode_char(str, i);
+ unichar c = decode_char(str, i);
if(c!=-1)
buf += c;
}
return buf;
}
-UnicodeChar Codec::Decoder::error(const string &msg)
+unichar Codec::Decoder::error(const string &msg)
{
switch(err_mode)
{
namespace Msp {
namespace StringCodec {
-typedef int UnicodeChar;
+typedef int unichar;
-typedef std::basic_string<UnicodeChar> ustring;
+typedef std::basic_string<unichar> ustring;
enum ErrorMode
{
/** Encodes a single unicode character. If the character can't be
represented in this encoding, error() should be called. */
- virtual void encode_char(UnicodeChar ch, std::string &buf) = 0;
+ virtual void encode_char(unichar ch, std::string &buf) = 0;
/** Encodes a unicode string. This is equivalent to calling encode_char
for each character in the string with the same buffer. */
THROW_ON_ERROR: throws CodecError(msg)
IGNORE_ERRORS: does nothing
TRANSLITERATE: calls transliterate(ch, buf) */
- void error(UnicodeChar ch, std::string &buf, const std::string &msg);
+ void error(unichar ch, std::string &buf, const std::string &msg);
/** Attempts to produce an alternative encoding for a unicode character.
Typically this includes dropping accent marks or romanizing letters. */
- virtual void transliterate(UnicodeChar ch, std::string &buf) = 0;
+ virtual void transliterate(unichar ch, std::string &buf) = 0;
};
/**
state change sequence was decoded but no character followed it. If
invalid input is encountered, the error() function should be called and
the iterator advanced only if it doesn't throw. */
- virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
+ virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
/** Decodes a string. */
virtual void decode(const std::string &str, ustring &buf);
THROW_ON_ERROR: throws CodecError(msg)
IGNORE_ERRORS: returns -1
TRANSLITERATE: return 0xFFFE */
- UnicodeChar error(const std::string &msg);
+ unichar error(const std::string &msg);
};
protected:
namespace Msp {
namespace StringCodec {
-void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Iso2022Jp::Encoder::encode_char(unichar ch, string &buf)
{
if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
{
}
}
-void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf)
+void Iso2022Jp::Encoder::transliterate(unichar, string &buf)
{
if(mode!=ASCII)
switch_mode(ASCII, buf);
dec(new Ascii::Decoder)
{ }
-UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
{
string::const_iterator j = i;
- UnicodeChar result = -1;
+ unichar result = -1;
if(*j==033)
{
unsigned escape = 0;
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em), mode(ASCII) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
virtual void sync(std::string &);
virtual void reset();
private:
void switch_mode(Mode, std::string &);
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode =THROW_ON_ERROR);
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
virtual void reset();
private:
void switch_mode(Mode);
namespace Msp {
namespace StringCodec {
-void Iso646Fi::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Iso646Fi::Encoder::encode_char(unichar ch, string &buf)
{
int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
if(tch<0 || tch>0x7F)
buf += tch;
}
-void Iso646Fi::Encoder::transliterate(UnicodeChar, string &buf)
+void Iso646Fi::Encoder::transliterate(unichar, string &buf)
{
buf += '?';
}
-UnicodeChar Iso646Fi::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Iso646Fi::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
unsigned char ch = *i;
int tch = (ch<=0x7F ? transform_mapping_or_direct(mapping, map_size, ch, true) : -1);
- UnicodeChar result;
+ unichar result;
if(tch==-1)
result = error("Undefined ISO-646-FI character");
else
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-646-FI"; }
namespace Msp {
namespace StringCodec {
-void Iso88591::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Iso88591::Encoder::encode_char(unichar ch, string &buf)
{
if(ch<0 || ch>0xFF)
return error(ch, buf, "Can't express character in ISO-8859-1");
buf += ch;
}
-void Iso88591::Encoder::transliterate(UnicodeChar, string &buf)
+void Iso88591::Encoder::transliterate(unichar, string &buf)
{
buf += '?';
}
-UnicodeChar Iso88591::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Iso88591::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-8859-1"; }
namespace Msp {
namespace StringCodec {
-void Iso885915::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Iso885915::Encoder::encode_char(unichar ch, string &buf)
{
int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
if(tch<0 || tch>0xFF)
}
-void Iso885915::Encoder::transliterate(UnicodeChar, string &buf)
+void Iso885915::Encoder::transliterate(unichar, string &buf)
{
buf += '?';
}
-UnicodeChar Iso885915::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Iso885915::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
unsigned char ch = *i;
int tch = transform_mapping_or_direct(mapping, map_size, ch, true);
- UnicodeChar result;
+ unichar result;
if(tch==-1)
result = error("Undefined ISO-8859-15 character");
else
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "ISO-8859-15"; }
namespace Msp {
namespace StringCodec {
-void JisX0201::Encoder::encode_char(UnicodeChar ch, string &buf)
+void JisX0201::Encoder::encode_char(unichar ch, string &buf)
{
if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
buf += ch;
error(ch, buf, "Can't express character in JIS X 0201");
}
-void JisX0201::Encoder::transliterate(UnicodeChar, string &buf)
+void JisX0201::Encoder::transliterate(unichar, string &buf)
{
buf += '?';
}
-UnicodeChar JisX0201::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar JisX0201::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
unsigned char ch = *i;
- UnicodeChar result;
+ unichar result;
if(ch==0x5C)
result = 0xA5;
else if(ch==0x7E)
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "JIS X 0201"; }
namespace Msp {
namespace StringCodec {
-void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf)
+void JisX0208::Encoder::encode_char(unichar ucs, string &buf)
{
unsigned short jis = ucs_to_jisx0208(ucs);
if(jis)
error(ucs, buf, "Can't express character in JIS X 0208");
}
-void JisX0208::Encoder::transliterate(UnicodeChar, string &buf)
+void JisX0208::Encoder::transliterate(unichar, string &buf)
{
buf.append("!)", 2);
}
-UnicodeChar JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
Kuten jis;
jis.ku = *j++-0x20;
- UnicodeChar result;
+ unichar result;
if(j==str.end())
result = error("Incomplete JIS X 0208 character");
else
}
-UnicodeChar jisx0208_to_ucs(Kuten jis)
+unichar jisx0208_to_ucs(Kuten jis)
{
if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E)
return 0;
return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95];
}
-Kuten ucs_to_jisx0208(UnicodeChar c)
+Kuten ucs_to_jisx0208(unichar c)
{
if(c<0 || c>0xFFFF)
return Kuten();
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "JIS X 0208"; }
operator bool() { return ku!=0 && ten!=0; }
};
-extern UnicodeChar jisx0208_to_ucs(Kuten);
-extern Kuten ucs_to_jisx0208(UnicodeChar);
+extern unichar jisx0208_to_ucs(Kuten);
+extern Kuten ucs_to_jisx0208(unichar);
} // namespace StringCodec
} // namespace Msp
namespace Msp {
namespace StringCodec {
-void Utf8::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Utf8::Encoder::encode_char(unichar ch, string &buf)
{
if(ch<0 || ch>0x10FFFF)
return error(ch, buf, "Can't express character in UTF-8");
}
}
-void Utf8::Encoder::transliterate(UnicodeChar, string &buf)
+void Utf8::Encoder::transliterate(unichar, string &buf)
{
buf.append("\357\277\275", 3); // � U+FFFE Replacement Character
}
-UnicodeChar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
if((*i&0xC0)==0x80)
{
- UnicodeChar result = error("UTF-8 tail byte found when expecting head");
+ unichar result = error("UTF-8 tail byte found when expecting head");
++i;
return result;
}
string::const_iterator j = i;
- UnicodeChar result = (*j++)&(mask-1);
+ unichar result = (*j++)&(mask-1);
unsigned k;
for(k=1; (k<bytes && j!=str.end() && (*j&0xC0)==0x80); ++k)
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "UTF-8"; }
namespace Msp {
namespace StringCodec {
-void Windows1252::Encoder::encode_char(UnicodeChar ch, string &buf)
+void Windows1252::Encoder::encode_char(unichar ch, string &buf)
{
if((ch>=0 && ch<=0x7F) || (ch>=0xA0 && ch<=0xFF))
buf += ch;
}
}
-void Windows1252::Encoder::transliterate(UnicodeChar, string &buf)
+void Windows1252::Encoder::transliterate(unichar, string &buf)
{
buf += '?';
}
-UnicodeChar Windows1252::Decoder::decode_char(const string &str, string::const_iterator &i)
+unichar Windows1252::Decoder::decode_char(const string &str, string::const_iterator &i)
{
if(i==str.end())
return error("No input");
int ch = static_cast<unsigned char>(*i);
- UnicodeChar result;
+ unichar result;
if(ch>=0x80 && ch<=0x9F)
{
result = table[ch-0x80];
public:
Encoder(ErrorMode em = THROW_ON_ERROR): Codec::Encoder(em) { }
- virtual void encode_char(UnicodeChar, std::string &);
+ virtual void encode_char(unichar, std::string &);
private:
- virtual void transliterate(UnicodeChar, std::string &);
+ virtual void transliterate(unichar, std::string &);
};
class Decoder: public Codec::Decoder
public:
Decoder(ErrorMode em = THROW_ON_ERROR): Codec::Decoder(em) { }
- virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
+ virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
virtual const char *get_name() const { return "Windows-1252"; }