X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.h;h=00f03b0cc4b4c4f2f4fcd133f2d3fa67e9fac5fb;hb=94ee3a1040f67d9de2e92fc34049642b08d65b3e;hp=53775206ea1915026b79e5ed75d02b227d74dca0;hpb=967785734be5c3fc6f75da122c2d93ebbb338271;p=libs%2Fcore.git diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index 5377520..00f03b0 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -1,15 +1,12 @@ -#ifndef MSP_STRINGS_CODEC_H_ -#define MSP_STRINGS_CODEC_H_ +#ifndef MSP_STRINGCODEC_CODEC_H_ +#define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include "except.h" +#include "ustring.h" namespace Msp { -namespace Codecs { - -typedef int UnicodeChar; - -typedef std::basic_string ustring; +namespace StringCodec { enum ErrorMode { @@ -18,15 +15,6 @@ enum ErrorMode TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -58,7 +46,7 @@ public: /** Encodes a single unicode character. If the character can't be represented in this encoding, error() should be called. */ - virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; + virtual void encode_char(unichar ch, std::string &buf) = 0; /** Encodes a unicode string. This is equivalent to calling encode_char for each character in the string with the same buffer. */ @@ -76,14 +64,21 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(UnicodeChar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ - virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; + virtual void transliterate(unichar ch, std::string &buf) = 0; }; /** @@ -105,7 +100,7 @@ public: state change sequence was decoded but no character followed it. If invalid input is encountered, the error() function should be called and the iterator advanced only if it doesn't throw. */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; + virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0; /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); @@ -119,10 +114,19 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 - TRANSLITERATE: return 0xFFFE */ - UnicodeChar error(const std::string &msg); + TRANSLITERATE: return 0xFFFD */ + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: @@ -154,9 +158,7 @@ template ustring decode(const std::string &s) { typename C::Decoder dec; - ustring result; - dec.decode(s, result); - return result; + return dec.decode(s); } /** Convenience function that encodes a string. */ @@ -164,24 +166,14 @@ template std::string encode(const ustring &s) { typename C::Encoder enc; - std::string result; - enc.encode(s, result); - enc.sync(result); - return result; + return enc.encode(s); } /** Convenience function that transcodes a string from one codec to another. */ template std::string transcode(const std::string &s) { - typename F::Decoder from; - typename T::Encoder to; - ustring temp; - from.decode(s, temp); - std::string result; - to.encode(temp, result); - to.sync(result); - return result; + return encode(decode(s)); } /** Creates a codec for an encoding by name. The caller is responsible for @@ -192,7 +184,7 @@ Codec *create_codec(const std::string &); The codec must be deleted when it's no longer needed. */ Codec *detect_codec(const std::string &); -} // namespace Codecs +} // namespace StringCodec } // namespace Msp #endif