X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.h;h=d0871ff119ab563ea2ff000ece006c55577ddf48;hp=53775206ea1915026b79e5ed75d02b227d74dca0;hb=017feade2799ddbecad62b9a7911bf4d3e229dad;hpb=967785734be5c3fc6f75da122c2d93ebbb338271 diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index 5377520..d0871ff 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -1,32 +1,21 @@ -#ifndef MSP_STRINGS_CODEC_H_ -#define MSP_STRINGS_CODEC_H_ +#ifndef MSP_STRINGCODEC_CODEC_H_ +#define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include "except.h" +#include "ustring.h" namespace Msp { -namespace Codecs { - -typedef int UnicodeChar; - -typedef std::basic_string ustring; +namespace StringCodec { enum ErrorMode { + DEFAULT, THROW_ON_ERROR, IGNORE_ERRORS, TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -52,13 +41,13 @@ public: protected: ErrorMode err_mode; - Encoder(ErrorMode em): err_mode(em) { } + Encoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: virtual ~Encoder() { } /** Encodes a single unicode character. If the character can't be represented in this encoding, error() should be called. */ - virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; + virtual void encode_char(unichar ch, std::string &buf) = 0; /** Encodes a unicode string. This is equivalent to calling encode_char for each character in the string with the same buffer. */ @@ -76,14 +65,21 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(UnicodeChar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ - virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; + virtual void transliterate(unichar ch, std::string &buf) = 0; }; /** @@ -96,7 +92,7 @@ public: protected: ErrorMode err_mode; - Decoder(ErrorMode em): err_mode(em) { } + Decoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: virtual ~Decoder() { } @@ -105,7 +101,7 @@ public: state change sequence was decoded but no character followed it. If invalid input is encountered, the error() function should be called and the iterator advanced only if it doesn't throw. */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; + virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0; /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); @@ -119,10 +115,19 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 - TRANSLITERATE: return 0xFFFE */ - UnicodeChar error(const std::string &msg); + TRANSLITERATE: return 0xFFFD */ + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: @@ -134,10 +139,10 @@ public: virtual const char *get_name() const = 0; /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0; /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0; /** Determines whether the given string can be successfully decoded with this codec. Note that this function returning true does not guarantee that @@ -149,39 +154,49 @@ public: typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; + +/** +A helper class to provide some common functionality. +*/ +template +class StandardCodec: public Codec +{ +private: + ErrorMode err_mode; + +protected: + StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } + +public: + virtual Encoder *create_encoder(ErrorMode em = DEFAULT) const + { return new typename C::Encoder(em==DEFAULT ? err_mode : em); } + + virtual Decoder *create_decoder(ErrorMode em = DEFAULT) const + { return new typename C::Decoder(em==DEFAULT ? err_mode : em); } +}; + + /** Convenience function that decodes a string. */ -template +template ustring decode(const std::string &s) { typename C::Decoder dec; - ustring result; - dec.decode(s, result); - return result; + return dec.decode(s); } /** Convenience function that encodes a string. */ -template +template std::string encode(const ustring &s) { typename C::Encoder enc; - std::string result; - enc.encode(s, result); - enc.sync(result); - return result; + return enc.encode(s); } /** Convenience function that transcodes a string from one codec to another. */ -template +template std::string transcode(const std::string &s) { - typename F::Decoder from; - typename T::Encoder to; - ustring temp; - from.decode(s, temp); - std::string result; - to.encode(temp, result); - to.sync(result); - return result; + return encode(decode(s)); } /** Creates a codec for an encoding by name. The caller is responsible for @@ -192,7 +207,7 @@ Codec *create_codec(const std::string &); The codec must be deleted when it's no longer needed. */ Codec *detect_codec(const std::string &); -} // namespace Codecs +} // namespace StringCodec } // namespace Msp #endif