X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.h;h=5e67d46049078f3555994eeca393ce9691bf75f8;hp=cb8006696e52c17042e304cc66efc1ef5d644815;hb=HEAD;hpb=c5cb2162baeeb7c750595e07ba1cbfcb03702f77 diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index cb80066..f53bae5 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -2,31 +2,21 @@ #define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include +#include "except.h" +#include "ustring.h" namespace Msp { namespace StringCodec { -typedef int unichar; - -typedef std::basic_string ustring; - enum ErrorMode { + DEFAULT, THROW_ON_ERROR, IGNORE_ERRORS, TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -36,7 +26,7 @@ Unicode strings are represented as ustrings. An std::string is considered to be an encoded sequence of bytes. A codec is able to determine if an encoded string could be decoded with it. */ -class Codec +class MSPCORE_API Codec { public: /** @@ -47,14 +37,14 @@ public: may find it useful or necessary to implement some other functions too (particularly sync and reset for stateful codecs). */ - class Encoder + class MSPCORE_API Encoder { protected: - ErrorMode err_mode; + ErrorMode err_mode = THROW_ON_ERROR; - Encoder(ErrorMode em): err_mode(em) { } + Encoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: - virtual ~Encoder() { } + virtual ~Encoder() = default; /** Encodes a single unicode character. If the character can't be represented in this encoding, error() should be called. */ @@ -76,10 +66,17 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(unichar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ @@ -91,14 +88,14 @@ public: Each codec class should contain an Decoder class derived from this. */ - class Decoder + class MSPCORE_API Decoder { protected: - ErrorMode err_mode; + ErrorMode err_mode = THROW_ON_ERROR; - Decoder(ErrorMode em): err_mode(em) { } + Decoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: - virtual ~Decoder() { } + virtual ~Decoder() = default; /** Decodes a single character from a string. The iterator is advanced to the next character. For stateful codecs, -1 may be returned if a @@ -119,25 +116,34 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 TRANSLITERATE: return 0xFFFD */ - unichar error(const std::string &msg); + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: - Codec() { } + Codec() = default; public: - virtual ~Codec() { } + virtual ~Codec() = default; /** Returns the name of the encoding handled by this codec. */ virtual const char *get_name() const = 0; /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0; /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0; /** Determines whether the given string can be successfully decoded with this codec. Note that this function returning true does not guarantee that @@ -149,8 +155,33 @@ public: typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; + +/** +A helper class to provide some common functionality. +*/ +template +class StandardCodec: public Codec +{ +private: + ErrorMode err_mode = THROW_ON_ERROR; + +protected: + StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } + + ErrorMode get_error_mode(ErrorMode em = DEFAULT) const + { return (em==DEFAULT ? err_mode : em); } + +public: + Encoder *create_encoder(ErrorMode em = DEFAULT) const override + { return new typename C::Encoder(get_error_mode(em)); } + + Decoder *create_decoder(ErrorMode em = DEFAULT) const override + { return new typename C::Decoder(get_error_mode(em)); } +}; + + /** Convenience function that decodes a string. */ -template +template ustring decode(const std::string &s) { typename C::Decoder dec; @@ -158,7 +189,7 @@ ustring decode(const std::string &s) } /** Convenience function that encodes a string. */ -template +template std::string encode(const ustring &s) { typename C::Encoder enc; @@ -166,7 +197,7 @@ std::string encode(const ustring &s) } /** Convenience function that transcodes a string from one codec to another. */ -template +template std::string transcode(const std::string &s) { return encode(decode(s)); @@ -174,11 +205,11 @@ std::string transcode(const std::string &s) /** Creates a codec for an encoding by name. The caller is responsible for deleting the codec when it's no longer needed. */ -Codec *create_codec(const std::string &); +MSPCORE_API Codec *create_codec(const std::string &); /** Automatically detects the encoding of a string and creates a codec for it. The codec must be deleted when it's no longer needed. */ -Codec *detect_codec(const std::string &); +MSPCORE_API Codec *detect_codec(const std::string &); } // namespace StringCodec } // namespace Msp