X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.h;h=e04e909fb7a1fc5c4750db7e13f33f2d44a96f92;hp=e8b172eb5af068004d6ad051642ced663a2b9608;hb=5b1368cb791cab043f0435628cacbaff36e39b7b;hpb=36f9e78ae75f5e14b132f37d249340ad3480b8ce diff --git a/source/codec.h b/source/codec.h index e8b172e..e04e909 100644 --- a/source/codec.h +++ b/source/codec.h @@ -56,62 +56,41 @@ public: */ class Encoder { + protected: + ErrorMode err_mode; + + Encoder(ErrorMode em): err_mode(em) { } public: virtual ~Encoder() { } - /** - Encodes a single unicode character. If the character can't be - represented in this encoding, behavior depends on the error mode - specified for the encoder: - - For THROW_ON_ERROR, an exception is thrown. - - For IGNORE_ERRORS, nothing is done. - - For TRANSLITERATE, the encoder attempts to select a character or a string - or characters that closely approximates the non-representable character. - */ - virtual void encode_char(UnicodeChar ch, std::string &buf) =0; + /** Encodes a single unicode character. If the character can't be + represented in this encoding, error() should be called. */ + virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; - /** - Encodes a unicode string. This is equivalent to callind encode_char for - each character in the string with the same buffer. - */ + /** Encodes a unicode string. This is equivalent to calling encode_char + for each character in the string with the same buffer. */ virtual void encode(const ustring &str, std::string &buf); std::string encode(const ustring &); - /** - Procuces a sequence of bytes that will bring the encoder back to the - initial state. - */ + /** Procuces a sequence of bytes that will bring the encoder back to the + initial state. */ virtual void sync(std::string &buf) { (void)buf; } - /** - Resets the encoder to the initial state without producing output. - */ + /** Resets the encoder to the initial state without producing output. */ virtual void reset() { } - protected: - ErrorMode err_mode; - - Encoder(ErrorMode em): err_mode(em) { } - - /** - Handles an error depending on the error mode. - - For THROW_ON_ERROR, throws CodecError(msg). - For IGNORE_ERROR, does nothing. + protected: + /** Handles an error depending on the error mode. - For TRANSLITERATE, calls transliterate(ch, buf). - */ + THROW_ON_ERROR: throws CodecError(msg) + IGNORE_ERRORS: does nothing + TRANSLITERATE: calls transliterate(ch, buf) */ void error(UnicodeChar ch, std::string &buf, const std::string &msg); - /** - Attempts to produce an alternative encoding for a unicode character. - Typically this includes dropping accent marks or romanizing letters. - */ - virtual void transliterate(UnicodeChar ch, std::string &buf) =0; + /** Attempts to produce an alternative encoding for a unicode character. + Typically this includes dropping accent marks or romanizing letters. */ + virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; }; /** @@ -121,81 +100,63 @@ public: */ class Decoder { + protected: + ErrorMode err_mode; + + Decoder(ErrorMode em): err_mode(em) { } public: virtual ~Decoder() { } - /** - Decodes a single character from a string. The iterator is advanced to - the next character. For stateful codecs, -1 may be returned if a state - change sequence was decoded but no character followed it. In case a - decoding error occurs, behavior depends on the error mode specified for - the decoder: - - For THROW_ON_ERROR, an exception is thrown and the iterator is left at - the erroneous character. + /** Decodes a single character from a string. The iterator is advanced + to the next character. For stateful codecs, -1 may be returned if a + state change sequence was decoded but no character followed it. If + invalid input is encountered, the error() function should be called and + the iterator advanced only if it doesn't throw. */ + virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; - For IGNORE_ERRORS, -1 is returned and the iterator is advanced. - - For TRANSLITERATE, 0xFFFE is returned and the iterator is advanced. - */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0; - - /** - Decodes a string. - */ + /** Decodes a string. */ virtual void decode(const std::string &str, ustring &buf); ustring decode(const std::string &); - /** - Resets the decoder to the initial state. - */ + /** Resets the decoder to the initial state. */ virtual void reset() { } - protected: - ErrorMode err_mode; - - Decoder(ErrorMode em): err_mode(em) { } - /** - Handles an error depending on the error mode. - */ - UnicodeChar error(const std::string &); + protected: + /** Handles an error depending on the error mode. The return value is + suitable for returning from decode_char. + + THROW_ON_ERROR: throws CodecError(msg) + IGNORE_ERRORS: returns -1 + TRANSLITERATE: return 0xFFFE */ + UnicodeChar error(const std::string &msg); }; +protected: + Codec() { } +public: virtual ~Codec() { } - /** - Returns the name of the encoding handled by this codec. - */ - virtual const char *get_name() const =0; + /** Returns the name of the encoding handled by this codec. */ + virtual const char *get_name() const = 0; - /** - Creates an encoder for this codec. - */ - virtual Encoder *create_encoder(ErrorMode err_mode=THROW_ON_ERROR) const =0; + /** Creates an encoder for this codec. */ + virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - /** - Creates a decoder for this codec. - */ - virtual Decoder *create_decoder(ErrorMode err_mode=THROW_ON_ERROR) const =0; + /** Creates a decoder for this codec. */ + virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - /** - Determines whether the given string can be successfully decoded with this - codec. Note that this function returning true does not guarantee that the - string was actually encoded with this codec. In particular, many 8-bit - encodings are indistinguishable. - */ + /** Determines whether the given string can be successfully decoded with + this codec. Note that this function returning true does not guarantee that + the string was actually encoded with this codec. In particular, many 8-bit + encodings are indistinguishable. */ virtual bool detect(const std::string &) const; -protected: - Codec() { } }; typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; -/** -Convenience function that decodes a string. -*/ +/** Convenience function that decodes a string. */ template ustring decode(const std::string &s) { @@ -205,9 +166,7 @@ ustring decode(const std::string &s) return result; } -/** -Convenience function that encodes a string. -*/ +/** Convenience function that encodes a string. */ template std::string encode(const ustring &s) { @@ -218,9 +177,7 @@ std::string encode(const ustring &s) return result; } -/** -Convenience function that transcodes a string from one codec to another. -*/ +/** Convenience function that transcodes a string from one codec to another. */ template std::string transcode(const std::string &s) { @@ -234,16 +191,12 @@ std::string transcode(const std::string &s) return result; } -/** -Creates a codec for an encoding by name. The caller is responsible for -deleting the codec when it's no longer needed. -*/ +/** Creates a codec for an encoding by name. The caller is responsible for +deleting the codec when it's no longer needed. */ Codec *create_codec(const std::string &); -/** -Automatically detects the encoding of a string and creates a codec for it. -The codec must be deleted when it's no longer needed. -*/ +/** Automatically detects the encoding of a string and creates a codec for it. +The codec must be deleted when it's no longer needed. */ Codec *detect_codec(const std::string &); } // namespace Codecs