*/
class Encoder
{
+ protected:
+ ErrorMode err_mode;
+
+ Encoder(ErrorMode em): err_mode(em) { }
public:
virtual ~Encoder() { }
- /**
- Encodes a single unicode character. If the character can't be
- represented in this encoding, behavior depends on the error mode
- specified for the encoder:
-
- For THROW_ON_ERROR, an exception is thrown.
-
- For IGNORE_ERRORS, nothing is done.
-
- For TRANSLITERATE, the encoder attempts to select a character or a string
- or characters that closely approximates the non-representable character.
- */
- virtual void encode_char(UnicodeChar ch, std::string &buf) =0;
+ /** Encodes a single unicode character. If the character can't be
+ represented in this encoding, error() should be called. */
+ virtual void encode_char(UnicodeChar ch, std::string &buf) = 0;
- /**
- Encodes a unicode string. This is equivalent to callind encode_char for
- each character in the string with the same buffer.
- */
+ /** Encodes a unicode string. This is equivalent to calling encode_char
+ for each character in the string with the same buffer. */
virtual void encode(const ustring &str, std::string &buf);
std::string encode(const ustring &);
- /**
- Procuces a sequence of bytes that will bring the encoder back to the
- initial state.
- */
+ /** Procuces a sequence of bytes that will bring the encoder back to the
+ initial state. */
virtual void sync(std::string &buf) { (void)buf; }
- /**
- Resets the encoder to the initial state without producing output.
- */
+ /** Resets the encoder to the initial state without producing output. */
virtual void reset() { }
- protected:
- ErrorMode err_mode;
-
- Encoder(ErrorMode em): err_mode(em) { }
-
- /**
- Handles an error depending on the error mode.
-
- For THROW_ON_ERROR, throws CodecError(msg).
- For IGNORE_ERROR, does nothing.
+ protected:
+ /** Handles an error depending on the error mode.
- For TRANSLITERATE, calls transliterate(ch, buf).
- */
+ THROW_ON_ERROR: throws CodecError(msg)
+ IGNORE_ERRORS: does nothing
+ TRANSLITERATE: calls transliterate(ch, buf) */
void error(UnicodeChar ch, std::string &buf, const std::string &msg);
- /**
- Attempts to produce an alternative encoding for a unicode character.
- Typically this includes dropping accent marks or romanizing letters.
- */
- virtual void transliterate(UnicodeChar ch, std::string &buf) =0;
+ /** Attempts to produce an alternative encoding for a unicode character.
+ Typically this includes dropping accent marks or romanizing letters. */
+ virtual void transliterate(UnicodeChar ch, std::string &buf) = 0;
};
/**
*/
class Decoder
{
+ protected:
+ ErrorMode err_mode;
+
+ Decoder(ErrorMode em): err_mode(em) { }
public:
virtual ~Decoder() { }
- /**
- Decodes a single character from a string. The iterator is advanced to
- the next character. For stateful codecs, -1 may be returned if a state
- change sequence was decoded but no character followed it. In case a
- decoding error occurs, behavior depends on the error mode specified for
- the decoder:
-
- For THROW_ON_ERROR, an exception is thrown and the iterator is left at
- the erroneous character.
+ /** Decodes a single character from a string. The iterator is advanced
+ to the next character. For stateful codecs, -1 may be returned if a
+ state change sequence was decoded but no character followed it. If
+ invalid input is encountered, the error() function should be called and
+ the iterator advanced only if it doesn't throw. */
+ virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
- For IGNORE_ERRORS, -1 is returned and the iterator is advanced.
-
- For TRANSLITERATE, 0xFFFE is returned and the iterator is advanced.
- */
- virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) =0;
-
- /**
- Decodes a string.
- */
+ /** Decodes a string. */
virtual void decode(const std::string &str, ustring &buf);
ustring decode(const std::string &);
- /**
- Resets the decoder to the initial state.
- */
+ /** Resets the decoder to the initial state. */
virtual void reset() { }
- protected:
- ErrorMode err_mode;
-
- Decoder(ErrorMode em): err_mode(em) { }
- /**
- Handles an error depending on the error mode.
- */
- UnicodeChar error(const std::string &);
+ protected:
+ /** Handles an error depending on the error mode. The return value is
+ suitable for returning from decode_char.
+
+ THROW_ON_ERROR: throws CodecError(msg)
+ IGNORE_ERRORS: returns -1
+ TRANSLITERATE: return 0xFFFE */
+ UnicodeChar error(const std::string &msg);
};
+protected:
+ Codec() { }
+public:
virtual ~Codec() { }
- /**
- Returns the name of the encoding handled by this codec.
- */
- virtual const char *get_name() const =0;
+ /** Returns the name of the encoding handled by this codec. */
+ virtual const char *get_name() const = 0;
- /**
- Creates an encoder for this codec.
- */
- virtual Encoder *create_encoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
+ /** Creates an encoder for this codec. */
+ virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
- /**
- Creates a decoder for this codec.
- */
- virtual Decoder *create_decoder(ErrorMode err_mode=THROW_ON_ERROR) const =0;
+ /** Creates a decoder for this codec. */
+ virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
- /**
- Determines whether the given string can be successfully decoded with this
- codec. Note that this function returning true does not guarantee that the
- string was actually encoded with this codec. In particular, many 8-bit
- encodings are indistinguishable.
- */
+ /** Determines whether the given string can be successfully decoded with
+ this codec. Note that this function returning true does not guarantee that
+ the string was actually encoded with this codec. In particular, many 8-bit
+ encodings are indistinguishable. */
virtual bool detect(const std::string &) const;
-protected:
- Codec() { }
};
typedef Codec::Encoder Encoder;
typedef Codec::Decoder Decoder;
-/**
-Convenience function that decodes a string.
-*/
+/** Convenience function that decodes a string. */
template<class C>
ustring decode(const std::string &s)
{
return result;
}
-/**
-Convenience function that encodes a string.
-*/
+/** Convenience function that encodes a string. */
template<class C>
std::string encode(const ustring &s)
{
return result;
}
-/**
-Convenience function that transcodes a string from one codec to another.
-*/
+/** Convenience function that transcodes a string from one codec to another. */
template<class F, class T>
std::string transcode(const std::string &s)
{
return result;
}
-/**
-Creates a codec for an encoding by name. The caller is responsible for
-deleting the codec when it's no longer needed.
-*/
+/** Creates a codec for an encoding by name. The caller is responsible for
+deleting the codec when it's no longer needed. */
Codec *create_codec(const std::string &);
-/**
-Automatically detects the encoding of a string and creates a codec for it.
-The codec must be deleted when it's no longer needed.
-*/
+/** Automatically detects the encoding of a string and creates a codec for it.
+The codec must be deleted when it's no longer needed. */
Codec *detect_codec(const std::string &);
} // namespace Codecs