X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fcodec.h;fp=source%2Fcodec.h;h=0000000000000000000000000000000000000000;hp=e04e909fb7a1fc5c4750db7e13f33f2d44a96f92;hb=b42ed73a1b241c0e93ee03c43c4584b41c549bac;hpb=5b1368cb791cab043f0435628cacbaff36e39b7b diff --git a/source/codec.h b/source/codec.h deleted file mode 100644 index e04e909..0000000 --- a/source/codec.h +++ /dev/null @@ -1,205 +0,0 @@ -/* $Id$ - -This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa -Distributed under the LGPL -*/ - -#ifndef MSP_STRINGS_CODEC_H_ -#define MSP_STRINGS_CODEC_H_ - -#include -#include - -namespace Msp { -namespace Codecs { - -typedef int UnicodeChar; - -typedef std::basic_string ustring; - -enum ErrorMode -{ - THROW_ON_ERROR, - IGNORE_ERRORS, - TRANSLITERATE -}; - -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; - -/** -Base class for string codecs. Use one of the derived classes or the function -create_codec to create a specific codec. - -Unicode strings are represented as ustrings. An std::string is considered to -be an encoded sequence of bytes. A codec is able to determine if an encoded -string could be decoded with it. -*/ -class Codec -{ -public: - /** - Base class for string encoder. - - Each codec class should contain an Encoder class derived from this. The - encode_char and transliterate functions must be overloaded. Some encoders - may find it useful or necessary to implement some other functions too - (particularly sync and reset for stateful codecs). - */ - class Encoder - { - protected: - ErrorMode err_mode; - - Encoder(ErrorMode em): err_mode(em) { } - public: - virtual ~Encoder() { } - - /** Encodes a single unicode character. If the character can't be - represented in this encoding, error() should be called. */ - virtual void encode_char(UnicodeChar ch, std::string &buf) = 0; - - /** Encodes a unicode string. This is equivalent to calling encode_char - for each character in the string with the same buffer. */ - virtual void encode(const ustring &str, std::string &buf); - - std::string encode(const ustring &); - - /** Procuces a sequence of bytes that will bring the encoder back to the - initial state. */ - virtual void sync(std::string &buf) { (void)buf; } - - /** Resets the encoder to the initial state without producing output. */ - virtual void reset() { } - - protected: - /** Handles an error depending on the error mode. - - THROW_ON_ERROR: throws CodecError(msg) - IGNORE_ERRORS: does nothing - TRANSLITERATE: calls transliterate(ch, buf) */ - void error(UnicodeChar ch, std::string &buf, const std::string &msg); - - /** Attempts to produce an alternative encoding for a unicode character. - Typically this includes dropping accent marks or romanizing letters. */ - virtual void transliterate(UnicodeChar ch, std::string &buf) = 0; - }; - - /** - Base class for string decoder. - - Each codec class should contain an Decoder class derived from this. - */ - class Decoder - { - protected: - ErrorMode err_mode; - - Decoder(ErrorMode em): err_mode(em) { } - public: - virtual ~Decoder() { } - - /** Decodes a single character from a string. The iterator is advanced - to the next character. For stateful codecs, -1 may be returned if a - state change sequence was decoded but no character followed it. If - invalid input is encountered, the error() function should be called and - the iterator advanced only if it doesn't throw. */ - virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0; - - /** Decodes a string. */ - virtual void decode(const std::string &str, ustring &buf); - - ustring decode(const std::string &); - - /** Resets the decoder to the initial state. */ - virtual void reset() { } - - protected: - /** Handles an error depending on the error mode. The return value is - suitable for returning from decode_char. - - THROW_ON_ERROR: throws CodecError(msg) - IGNORE_ERRORS: returns -1 - TRANSLITERATE: return 0xFFFE */ - UnicodeChar error(const std::string &msg); - }; - -protected: - Codec() { } -public: - virtual ~Codec() { } - - /** Returns the name of the encoding handled by this codec. */ - virtual const char *get_name() const = 0; - - /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - - /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; - - /** Determines whether the given string can be successfully decoded with - this codec. Note that this function returning true does not guarantee that - the string was actually encoded with this codec. In particular, many 8-bit - encodings are indistinguishable. */ - virtual bool detect(const std::string &) const; -}; - -typedef Codec::Encoder Encoder; -typedef Codec::Decoder Decoder; - -/** Convenience function that decodes a string. */ -template -ustring decode(const std::string &s) -{ - typename C::Decoder dec; - ustring result; - dec.decode(s, result); - return result; -} - -/** Convenience function that encodes a string. */ -template -std::string encode(const ustring &s) -{ - typename C::Encoder enc; - std::string result; - enc.encode(s, result); - enc.sync(result); - return result; -} - -/** Convenience function that transcodes a string from one codec to another. */ -template -std::string transcode(const std::string &s) -{ - typename F::Decoder from; - typename T::Encoder to; - ustring temp; - from.decode(s, temp); - std::string result; - to.encode(temp, result); - to.sync(result); - return result; -} - -/** Creates a codec for an encoding by name. The caller is responsible for -deleting the codec when it's no longer needed. */ -Codec *create_codec(const std::string &); - -/** Automatically detects the encoding of a string and creates a codec for it. -The codec must be deleted when it's no longer needed. */ -Codec *detect_codec(const std::string &); - -} // namespace Codecs -} // namespace Msp - -#endif