3 This file is part of libmspstrings
4 Copyright © 2006-2007 Mikko Rasa
5 Distributed under the LGPL
8 #ifndef MSP_STRINGS_CODEC_H_
9 #define MSP_STRINGS_CODEC_H_
12 #include <msp/core/error.h>
17 typedef int UnicodeChar;
19 typedef std::basic_string<UnicodeChar> ustring;
29 An exception thrown for all kinds of problems encountered while encoding or
32 class CodecError: public Exception
35 CodecError(const std::string &w_): Exception(w_) { }
39 Base class for string codecs. Use one of the derived classes or the function
40 create_codec to create a specific codec.
42 For the purposes of this class, an ustring is considered to contain
43 Unicode characters and an std::string is considered to be an encoded sequence
44 of bytes. A codec is able to determine if an encoded string could be decoded
51 Base class for string encoder. Each codec class should contain an Encoder
52 class derived from this.
57 virtual ~Encoder() { }
59 virtual void encode_char(UnicodeChar ch, std::string &buf) =0;
60 virtual void encode(const ustring &str, std::string &buf);
61 virtual void sync(std::string &buf) { (void)buf; }
62 virtual void reset() { }
66 Encoder(ErrorMode em): err_mode(em) { }
67 void error(UnicodeChar, std::string &, const std::string &);
68 virtual void transliterate(UnicodeChar, std::string &) { }
72 Base class for string decoder. Each codec class should contain an Decoder
73 class derived from this.
78 virtual ~Decoder() { }
81 Decodes a single character from a string. The iterator is advanced to
82 the next character. For stateful codecs, -1 may be returned if a state
83 change sequence was decoded but no character followed it. In case a
84 decoding error occurs, behavior depends on the error mode specified for
87 For THROW_ON_ERROR, an exception is thrown and the iterator is left at
88 the erroneous character.
90 For IGNORE_ERRORS, -1 is returned and the iterator is advanced.
92 For TRANSLITERATE, 0xFFFE is returned and the iterator is advanced.
94 virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0;
96 virtual void decode(const std::string &str, ustring &buf);
97 virtual void reset() { }
101 Decoder(ErrorMode em): err_mode(em) { }
102 UnicodeChar error(const std::string &);
108 Creates an encoder for this codec.
110 virtual Encoder *create_encoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
113 Creates a decoder for this codec.
115 virtual Decoder *create_decoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
118 Determines whether the given string can be successfully decoded with this
119 codec. Note that this function returning true does not guarantee that the
120 string was actually encoded with this codec. In particular, many 8-bit
121 encodings are indistinguishable.
123 virtual bool detect(const std::string &) const;
128 typedef Codec::Encoder Encoder;
129 typedef Codec::Decoder Decoder;
132 Convenience function that decodes a string using the given codec.
135 ustring decode(const std::string &s)
137 typename C::Decoder dec;
139 dec.decode(s, result);
144 std::string encode(const ustring &s)
146 typename C::Encoder enc;
148 enc.encode(s, result);
153 template<class F, class T>
154 std::string transcode(const std::string &s)
156 typename F::Decoder from;
157 typename T::Encoder to;
159 from.decode(s, temp);
161 to.encode(temp, result);
167 Creates a codec for an encoding by name. The caller is responsible for
168 deleting the codec when it's no longer needed.
170 Codec *create_codec(const std::string &);
172 } // namespace Codecs