#define MSP_STRINGCODEC_CODEC_H_
#include <string>
-#include <msp/core/except.h>
+#include "except.h"
+#include "ustring.h"
namespace Msp {
namespace StringCodec {
-typedef int UnicodeChar;
-
-typedef std::basic_string<UnicodeChar> ustring;
-
enum ErrorMode
{
THROW_ON_ERROR,
TRANSLITERATE
};
-/**
-An exception thrown for all kinds of problems encountered while encoding or
-decoding strings.
-*/
-class CodecError: public Exception
-{
-public:
- CodecError(const std::string &w_): Exception(w_) { }
-};
/**
Base class for string codecs. Use one of the derived classes or the function
/** Encodes a single unicode character. If the character can't be
represented in this encoding, error() should be called. */
- virtual void encode_char(UnicodeChar ch, std::string &buf) = 0;
+ virtual void encode_char(unichar ch, std::string &buf) = 0;
/** Encodes a unicode string. This is equivalent to calling encode_char
for each character in the string with the same buffer. */
protected:
/** Handles an error depending on the error mode.
- THROW_ON_ERROR: throws CodecError(msg)
+ THROW_ON_ERROR: throws err
IGNORE_ERRORS: does nothing
TRANSLITERATE: calls transliterate(ch, buf) */
- void error(UnicodeChar ch, std::string &buf, const std::string &msg);
+ template<typename E>
+ void error(unichar ch, std::string &buf, const E &err)
+ {
+ if(err_mode==TRANSLITERATE)
+ transliterate(ch, buf);
+ else if(err_mode!=IGNORE_ERRORS)
+ throw err;
+ }
/** Attempts to produce an alternative encoding for a unicode character.
Typically this includes dropping accent marks or romanizing letters. */
- virtual void transliterate(UnicodeChar ch, std::string &buf) = 0;
+ virtual void transliterate(unichar ch, std::string &buf) = 0;
};
/**
state change sequence was decoded but no character followed it. If
invalid input is encountered, the error() function should be called and
the iterator advanced only if it doesn't throw. */
- virtual UnicodeChar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
+ virtual unichar decode_char(const std::string &str, std::string::const_iterator &i) = 0;
/** Decodes a string. */
virtual void decode(const std::string &str, ustring &buf);
/** Handles an error depending on the error mode. The return value is
suitable for returning from decode_char.
- THROW_ON_ERROR: throws CodecError(msg)
+ THROW_ON_ERROR: throws err
IGNORE_ERRORS: returns -1
- TRANSLITERATE: return 0xFFFE */
- UnicodeChar error(const std::string &msg);
+ TRANSLITERATE: return 0xFFFD */
+ template<typename E>
+ unichar error(const E &err)
+ {
+ if(err_mode==TRANSLITERATE)
+ return 0xFFFD;
+ else if(err_mode==IGNORE_ERRORS)
+ return -1;
+ else
+ throw err;
+ }
};
protected:
ustring decode(const std::string &s)
{
typename C::Decoder dec;
- ustring result;
- dec.decode(s, result);
- return result;
+ return dec.decode(s);
}
/** Convenience function that encodes a string. */
std::string encode(const ustring &s)
{
typename C::Encoder enc;
- std::string result;
- enc.encode(s, result);
- enc.sync(result);
- return result;
+ return enc.encode(s);
}
/** Convenience function that transcodes a string from one codec to another. */
template<class F, class T>
std::string transcode(const std::string &s)
{
- typename F::Decoder from;
- typename T::Encoder to;
- ustring temp;
- from.decode(s, temp);
- std::string result;
- to.encode(temp, result);
- to.sync(result);
- return result;
+ return encode<T>(decode<F>(s));
}
/** Creates a codec for an encoding by name. The caller is responsible for