X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.h;h=3470ccc68e6c65ad4890f499a54b41c88fa91256;hp=bed3d93cb2c3e1f245492c4eec43e8db4ef65c58;hb=c88a239ff3b218ad0226e8c53356350ccc1f7014;hpb=f47bc86e6ce900c5323e593db003c93110538268 diff --git a/source/codec.h b/source/codec.h index bed3d93..3470ccc 100644 --- a/source/codec.h +++ b/source/codec.h @@ -39,38 +39,83 @@ public: Base class for string codecs. Use one of the derived classes or the function create_codec to create a specific codec. -For the purposes of this class, an ustring is considered to contain -Unicode characters and an std::string is considered to be an encoded sequence -of bytes. A codec is able to determine if an encoded string could be decoded -with it. +Unicode strings are represented as ustrings. An std::string is considered to +be an encoded sequence of bytes. A codec is able to determine if an encoded +string could be decoded with it. */ class Codec { public: /** - Base class for string encoder. Each codec class should contain an Encoder - class derived from this. + Base class for string encoder. + + Each codec class should contain an Encoder class derived from this. The + encode_char and transliterate functions must be overloaded. Some encoders + may find it useful or necessary to implement some other functions too + (particularly sync and reset for stateful codecs). */ class Encoder { public: virtual ~Encoder() { } + /** + Encodes a single unicode character. If the character can't be + represented in this encoding, behavior depends on the error mode + specified for the encoder: + + For THROW_ON_ERROR, an exception is thrown. + + For IGNORE_ERRORS, nothing is done. + + For TRANSLITERATE, the encoder attempts to select a character or a string + or characters that closely approximates the non-representable character. + */ virtual void encode_char(UnicodeChar ch, std::string &buf) =0; + + /** + Encodes a unicode string. This is equivalent to callind encode_char for + each character in the string with the same buffer. + */ virtual void encode(const ustring &str, std::string &buf); + + /** + Procuces a sequence of bytes that will bring the encoder back to the + initial state. + */ virtual void sync(std::string &buf) { (void)buf; } + + /** + Resets the encoder to the initial state without producing output. + */ virtual void reset() { } protected: ErrorMode err_mode; Encoder(ErrorMode em): err_mode(em) { } - void error(UnicodeChar, std::string &, const std::string &); - virtual void transliterate(UnicodeChar, std::string &) { } + + /** + Handles an error depending on the error mode. + + For THROW_ON_ERROR, throws CodecError(msg). + + For IGNORE_ERROR, does nothing. + + For TRANSLITERATE, calls transliterate(ch, buf). + */ + void error(UnicodeChar ch, std::string &buf, const std::string &msg); + + /** + Attempts to produce an alternative encoding for a unicode character. + Typically this includes dropping accent marks or romanizing letters. + */ + virtual void transliterate(UnicodeChar ch, std::string &buf) =0; }; /** - Base class for string decoder. Each codec class should contain an Decoder - class derived from this. + Base class for string decoder. + + Each codec class should contain an Decoder class derived from this. */ class Decoder { @@ -93,12 +138,23 @@ public: */ virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0; + /** + Decodes a string. + */ virtual void decode(const std::string &str, ustring &buf); + + /** + Resets the decoder to the initial state. + */ virtual void reset() { } protected: ErrorMode err_mode; Decoder(ErrorMode em): err_mode(em) { } + + /** + Handles an error depending on the error mode. + */ UnicodeChar error(const std::string &); }; @@ -129,7 +185,7 @@ typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; /** -Convenience function that decodes a string using the given codec. +Convenience function that decodes a string. */ template ustring decode(const std::string &s) @@ -140,6 +196,9 @@ ustring decode(const std::string &s) return result; } +/** +Convenience function that encodes a string. +*/ template std::string encode(const ustring &s) { @@ -150,6 +209,9 @@ std::string encode(const ustring &s) return result; } +/** +Convenience function that transcodes a string from one codec to another. +*/ template std::string transcode(const std::string &s) {