X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fstringcodec%2Fcodec.h;h=5e67d46049078f3555994eeca393ce9691bf75f8;hp=ab2a3ea7c8491edc0297ae81ffddd1ae4e20c23e;hb=HEAD;hpb=056dc68dfc606a2c14126a70321045d6d9f12e0e diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index ab2a3ea..f53bae5 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -2,31 +2,21 @@ #define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include +#include "except.h" +#include "ustring.h" namespace Msp { namespace StringCodec { -typedef int UnicodeChar; - -typedef std::basic_string ustring; - enum ErrorMode { + DEFAULT, THROW_ON_ERROR, IGNORE_ERRORS, TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -36,7 +26,7 @@ Unicode strings are represented as ustrings. An std::string is considered to be an encoded sequence of bytes. A codec is able to determine if an encoded string could be decoded with it. */ -class Codec +class MSPCORE_API Codec { public: /** @@ -47,18 +37,18 @@ public: may find it useful or necessary to implement some other functions too (particularly sync and reset for stateful codecs). */ - class Encoder + class MSPCORE_API Encoder { protected: - ErrorMode err_mode; + ErrorMode err_mode = THROW_ON_ERROR; - Encoder(ErrorMode em): err_mode(em) { } + Encoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: - virtual ~Encoder() { } + virtual ~Encoder() = default; /** Encodes a single unicode character. If the character can't be represented in this encoding, error() should be called. */ - virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; + virtual void encode_char(unichar ch, std::string &buf) = 0; /** Encodes a unicode string. This is equivalent to calling encode_char for each character in the string with the same buffer. */ @@ -76,14 +66,21 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(UnicodeChar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ - virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; + virtual void transliterate(unichar ch, std::string &buf) = 0; }; /** @@ -91,21 +88,21 @@ public: Each codec class should contain an Decoder class derived from this. */ - class Decoder + class MSPCORE_API Decoder { protected: - ErrorMode err_mode; + ErrorMode err_mode = THROW_ON_ERROR; - Decoder(ErrorMode em): err_mode(em) { } + Decoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } public: - virtual ~Decoder() { } + virtual ~Decoder() = default; /** Decodes a single character from a string. The iterator is advanced to the next character. For stateful codecs, -1 may be returned if a state change sequence was decoded but no character followed it. If invalid input is encountered, the error() function should be called and the iterator advanced only if it doesn't throw. */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; + virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0; /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); @@ -119,25 +116,34 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 - TRANSLITERATE: return 0xFFFE */ - UnicodeChar error(const std::string &msg); + TRANSLITERATE: return 0xFFFD */ + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: - Codec() { } + Codec() = default; public: - virtual ~Codec() { } + virtual ~Codec() = default; /** Returns the name of the encoding handled by this codec. */ virtual const char *get_name() const = 0; /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0; /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0; /** Determines whether the given string can be successfully decoded with this codec. Note that this function returning true does not guarantee that @@ -149,48 +155,61 @@ public: typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; + +/** +A helper class to provide some common functionality. +*/ +template +class StandardCodec: public Codec +{ +private: + ErrorMode err_mode = THROW_ON_ERROR; + +protected: + StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } + + ErrorMode get_error_mode(ErrorMode em = DEFAULT) const + { return (em==DEFAULT ? err_mode : em); } + +public: + Encoder *create_encoder(ErrorMode em = DEFAULT) const override + { return new typename C::Encoder(get_error_mode(em)); } + + Decoder *create_decoder(ErrorMode em = DEFAULT) const override + { return new typename C::Decoder(get_error_mode(em)); } +}; + + /** Convenience function that decodes a string. */ -template +template ustring decode(const std::string &s) { typename C::Decoder dec; - ustring result; - dec.decode(s, result); - return result; + return dec.decode(s); } /** Convenience function that encodes a string. */ -template +template std::string encode(const ustring &s) { typename C::Encoder enc; - std::string result; - enc.encode(s, result); - enc.sync(result); - return result; + return enc.encode(s); } /** Convenience function that transcodes a string from one codec to another. */ -template +template std::string transcode(const std::string &s) { - typename F::Decoder from; - typename T::Encoder to; - ustring temp; - from.decode(s, temp); - std::string result; - to.encode(temp, result); - to.sync(result); - return result; + return encode(decode(s)); } /** Creates a codec for an encoding by name. The caller is responsible for deleting the codec when it's no longer needed. */ -Codec *create_codec(const std::string &); +MSPCORE_API Codec *create_codec(const std::string &); /** Automatically detects the encoding of a string and creates a codec for it. The codec must be deleted when it's no longer needed. */ -Codec *detect_codec(const std::string &); +MSPCORE_API Codec *detect_codec(const std::string &); } // namespace StringCodec } // namespace Msp