X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.h;h=d0871ff119ab563ea2ff000ece006c55577ddf48;hp=a307e2ec8acc3a784d1f57ce9d7ca4516dfff4fc;hb=017feade2799ddbecad62b9a7911bf4d3e229dad;hpb=b011b17393069d86790b2291a759280e15c75a0f diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index a307e2e..d0871ff 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -2,31 +2,20 @@ #define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include "except.h" +#include "ustring.h" namespace Msp { namespace StringCodec { -typedef int UnicodeChar; - -typedef std::basic_string ustring; - enum ErrorMode { + DEFAULT, THROW_ON_ERROR, IGNORE_ERRORS, TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -52,13 +41,13 @@ public: protected: ErrorMode err_mode; - Encoder(ErrorMode em): err_mode(em) { } + Encoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: virtual ~Encoder() { } /** Encodes a single unicode character. If the character can't be represented in this encoding, error() should be called. */ - virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; + virtual void encode_char(unichar ch, std::string &buf) = 0; /** Encodes a unicode string. This is equivalent to calling encode_char for each character in the string with the same buffer. */ @@ -76,14 +65,21 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(UnicodeChar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ - virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; + virtual void transliterate(unichar ch, std::string &buf) = 0; }; /** @@ -96,7 +92,7 @@ public: protected: ErrorMode err_mode; - Decoder(ErrorMode em): err_mode(em) { } + Decoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: virtual ~Decoder() { } @@ -105,7 +101,7 @@ public: state change sequence was decoded but no character followed it. If invalid input is encountered, the error() function should be called and the iterator advanced only if it doesn't throw. */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; + virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0; /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); @@ -119,10 +115,19 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 - TRANSLITERATE: return 0xFFFE */ - UnicodeChar error(const std::string &msg); + TRANSLITERATE: return 0xFFFD */ + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: @@ -134,10 +139,10 @@ public: virtual const char *get_name() const = 0; /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0; /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0; /** Determines whether the given string can be successfully decoded with this codec. Note that this function returning true does not guarantee that @@ -149,8 +154,30 @@ public: typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; + +/** +A helper class to provide some common functionality. +*/ +template +class StandardCodec: public Codec +{ +private: + ErrorMode err_mode; + +protected: + StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } + +public: + virtual Encoder *create_encoder(ErrorMode em = DEFAULT) const + { return new typename C::Encoder(em==DEFAULT ? err_mode : em); } + + virtual Decoder *create_decoder(ErrorMode em = DEFAULT) const + { return new typename C::Decoder(em==DEFAULT ? err_mode : em); } +}; + + /** Convenience function that decodes a string. */ -template +template ustring decode(const std::string &s) { typename C::Decoder dec; @@ -158,7 +185,7 @@ ustring decode(const std::string &s) } /** Convenience function that encodes a string. */ -template +template std::string encode(const ustring &s) { typename C::Encoder enc; @@ -166,7 +193,7 @@ std::string encode(const ustring &s) } /** Convenience function that transcodes a string from one codec to another. */ -template +template std::string transcode(const std::string &s) { return encode(decode(s));