-#ifndef MSP_STRINGS_CODEC_H_
-#define MSP_STRINGS_CODEC_H_
+#ifndef MSP_STRINGCODEC_CODEC_H_
+#define MSP_STRINGCODEC_CODEC_H_
#include <string>
-#include <msp/core/except.h>
+#include "except.h"
+#include "ustring.h"
namespace Msp {
-namespace Codecs {
-
-typedef int UnicodeChar;
-
-typedef std::basic_string<UnicodeChar> ustring;
+namespace StringCodec {
enum ErrorMode
{
+ DEFAULT,
THROW_ON_ERROR,
IGNORE_ERRORS,
TRANSLITERATE
};
-/**
-An exception thrown for all kinds of problems encountered while encoding or
-decoding strings.
-*/
-class CodecError: public Exception
-{
-public:
- CodecError(const std::string &w_): Exception(w_) { }
-};
/**
Base class for string codecs. Use one of the derived classes or the function
class Encoder
{
protected:
- ErrorMode err_mode;
+ ErrorMode err_mode = THROW_ON_ERROR;
- Encoder(ErrorMode em): err_mode(em) { }
+ Encoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { }
public:
- virtual ~Encoder() { }
+ virtual ~Encoder() = default;
/** Encodes a single unicode character. If the character can't be
represented in this encoding, error() should be called. */
- virtual void encode_char(UnicodeChar ch, std::string &buf) = 0;
+ virtual void encode_char(unichar ch, std::string &buf) = 0;
/** Encodes a unicode string. This is equivalent to calling encode_char
for each character in the string with the same buffer. */
protected:
/** Handles an error depending on the error mode.
- THROW_ON_ERROR: throws CodecError(msg)
+ THROW_ON_ERROR: throws err
IGNORE_ERRORS: does nothing
TRANSLITERATE: calls transliterate(ch, buf) */
- void error(UnicodeChar ch, std::string &buf, const std::string &msg);
+ template<typename E>
+ void error(unichar ch, std::string &buf, const E &err)
+ {
+ if(err_mode==TRANSLITERATE)
+ transliterate(ch, buf);
+ else if(err_mode!=IGNORE_ERRORS)
+ throw err;
+ }
/** Attempts to produce an alternative encoding for a unicode character.
Typically this includes dropping accent marks or romanizing letters. */
- virtual void transliterate(UnicodeChar ch, std::string &buf) = 0;
+ virtual void transliterate(unichar ch, std::string &buf) = 0;
};
/**
class Decoder
{
protected:
- ErrorMode err_mode;
+ ErrorMode err_mode = THROW_ON_ERROR;
- Decoder(ErrorMode em): err_mode(em) { }
+ Decoder(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { }
public:
- virtual ~Decoder() { }
+ virtual ~Decoder() = default;
/** Decodes a single character from a string. The iterator is advanced
to the next character. For stateful codecs, -1 may be returned if a
state change sequence was decoded but no character followed it. If
invalid input is encountered, the error() function should be called and
the iterator advanced only if it doesn't throw. */
- virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
+ virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
/** Decodes a string. */
virtual void decode(const std::string &str, ustring &buf);
/** Handles an error depending on the error mode. The return value is
suitable for returning from decode_char.
- THROW_ON_ERROR: throws CodecError(msg)
+ THROW_ON_ERROR: throws err
IGNORE_ERRORS: returns -1
- TRANSLITERATE: return 0xFFFE */
- UnicodeChar error(const std::string &msg);
+ TRANSLITERATE: return 0xFFFD */
+ template<typename E>
+ unichar error(const E &err)
+ {
+ if(err_mode==TRANSLITERATE)
+ return 0xFFFD;
+ else if(err_mode==IGNORE_ERRORS)
+ return -1;
+ else
+ throw err;
+ }
};
protected:
- Codec() { }
+ Codec() = default;
public:
- virtual ~Codec() { }
+ virtual ~Codec() = default;
/** Returns the name of the encoding handled by this codec. */
virtual const char *get_name() const = 0;
/** Creates an encoder for this codec. */
- virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
+ virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0;
/** Creates a decoder for this codec. */
- virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
+ virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0;
/** Determines whether the given string can be successfully decoded with
this codec. Note that this function returning true does not guarantee that
typedef Codec::Encoder Encoder;
typedef Codec::Decoder Decoder;
+
+/**
+A helper class to provide some common functionality.
+*/
+template<typename C>
+class StandardCodec: public Codec
+{
+private:
+ ErrorMode err_mode = THROW_ON_ERROR;
+
+protected:
+ StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { }
+
+ ErrorMode get_error_mode(ErrorMode em = DEFAULT) const
+ { return (em==DEFAULT ? err_mode : em); }
+
+public:
+ virtual Encoder *create_encoder(ErrorMode em = DEFAULT) const
+ { return new typename C::Encoder(get_error_mode(em)); }
+
+ virtual Decoder *create_decoder(ErrorMode em = DEFAULT) const
+ { return new typename C::Decoder(get_error_mode(em)); }
+};
+
+
/** Convenience function that decodes a string. */
-template<class C>
+template<typename C>
ustring decode(const std::string &s)
{
typename C::Decoder dec;
- ustring result;
- dec.decode(s, result);
- return result;
+ return dec.decode(s);
}
/** Convenience function that encodes a string. */
-template<class C>
+template<typename C>
std::string encode(const ustring &s)
{
typename C::Encoder enc;
- std::string result;
- enc.encode(s, result);
- enc.sync(result);
- return result;
+ return enc.encode(s);
}
/** Convenience function that transcodes a string from one codec to another. */
-template<class F, class T>
+template<typename F, typename T>
std::string transcode(const std::string &s)
{
- typename F::Decoder from;
- typename T::Encoder to;
- ustring temp;
- from.decode(s, temp);
- std::string result;
- to.encode(temp, result);
- to.sync(result);
- return result;
+ return encode<T>(decode<F>(s));
}
/** Creates a codec for an encoding by name. The caller is responsible for
The codec must be deleted when it's no longer needed. */
Codec *detect_codec(const std::string &);
-} // namespace Codecs
+} // namespace StringCodec
} // namespace Msp
#endif