3 This file is part of libmspstrings
4 Copyright © 2006-2007 Mikko Rasa
5 Distributed under the LGPL
8 #ifndef MSP_STRINGS_CODEC_H_
9 #define MSP_STRINGS_CODEC_H_
12 #include <msp/error.h>
17 An exception thrown for all kinds of problems encountered while encoding or
20 class CodecError: public Exception
23 CodecError(const std::string &w_): Exception(w_) { }
27 Base class for string codecs. Use one of the derived classes or the function
28 create_codec to create a specific codec.
30 For the purposes of this class, an std::wstring is considered to contain
31 Unicode characters and an std::string is considered to be an encoded sequence
32 of bytes. A codec is able to determine if an encoded string could be decoded
46 Base class for string encoder. Each codec class should contain an Encoder
47 class derived from this.
53 Encodes a single character. Derived classes should use the append
54 function to put the result into the internal buffer.
56 virtual void encode_char(wchar_t) =0;
61 virtual void encode(const std::wstring &s)
62 { for(std::wstring::const_iterator i=s.begin(); i!=s.end(); ++i) encode_char(*i); }
65 Brings the encoder back to its initial state. This allows the encoded
66 sequence to be extracted or flushed without loss of integrity.
68 virtual void sync() { }
71 Returns a reference to the encoded sequence. Call sync() first to make
72 sure it's a valid encoded string by itself.
74 const std::string &get() const { return buffer_; }
77 Returns the number of bytes in the output buffer.
79 unsigned size() const { return buffer_.size(); }
82 Clears the encoded sequence. Encoder state is left intact.
84 void flush() { buffer_.clear(); }
86 virtual ~Encoder() { }
88 Encoder(ErrorMode em=THROW_ON_ERROR): err_mode_(em) { }
89 void append(char c) { buffer_+=c; }
90 void append(const char *s, unsigned l) { buffer_.append(s, l); }
91 void append(const std::string &s) { buffer_+=s; }
92 void error(const std::string &);
93 virtual void append_replacement() { }
100 Base class for string decoder. Each codec class should contain an Decoder
101 class derived from this.
106 virtual void decode_char(const std::string &, std::string::const_iterator &) =0;
107 virtual void decode(const std::string &s)
108 { for(std::string::const_iterator i=s.begin(); i!=s.end(); ) decode_char(s, i); }
111 Ensures that all input has been processed. If this is not the case any
112 buffers are cleared and an error is triggered.
114 virtual void sync() { }
116 const std::wstring &get() const { return buffer_; }
117 unsigned size() const { return buffer_.size(); }
118 void flush() { buffer_.clear(); }
119 virtual ~Decoder() { }
121 Decoder(ErrorMode em): err_mode_(em) { }
122 void append(wchar_t c) { buffer_+=c; }
123 void append(const std::wstring &s) { buffer_+=s; }
124 void error(const std::string &);
127 std::wstring buffer_;
130 virtual Encoder *create_encoder(ErrorMode =THROW_ON_ERROR) const =0;
131 virtual Decoder *create_decoder(ErrorMode =THROW_ON_ERROR) const =0;
132 virtual bool detect(const std::string &) const;
133 virtual ~StringCodec() { }
139 Convenience function that decodes a string using the given codec.
142 std::wstring decode(const std::string &s)
144 typename C::Decoder dec;
151 std::string encode(const std::wstring &s)
153 typename C::Encoder enc;
159 template<class F, class T>
160 std::string transcode(const std::string &s)
162 typename F::Decoder from;
163 typename T::Encoder to;
166 to.encode(from.get());
171 StringCodec *create_codec(const std::string &);