namespace Msp {
namespace StringCodec {
-class Ascii: public Codec
+class Ascii: public StandardCodec<Ascii>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "ASCII"; }
+ Ascii(ErrorMode em = THROW_ON_ERROR): StandardCodec<Ascii>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ASCII"; }
};
} // namespace StringCodec
Codec *create_codec(const string &n)
{
string name;
- for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
+ string::const_iterator i;
+ for(i=n.begin(); i!=n.end(); ++i)
{
- if(isupper(*i))
+ if(*i==':')
+ break;
+ else if(isupper(*i))
name += tolower(*i);
else if(islower(*i) || isdigit(*i))
name += *i;
}
- if(name=="ascii") return new Ascii;
- if(name=="iso2022jp") return new Iso2022Jp;
- if(name=="iso646fi") return new Iso646Fi;
- if(name=="iso88591" || name=="latin1") return new Iso88591;
- if(name=="iso885915" || name=="latin9") return new Iso885915;
- if(name=="jisx0201") return new JisX0201;
- if(name=="jisx0208") return new JisX0208;
- if(name=="utf8") return new Utf8;
- if(name=="windows1252" || name=="cp1252") return new Windows1252;
+ ErrorMode em = THROW_ON_ERROR;
+ if(i!=n.end() && *i==':')
+ {
+ string em_str(i+1, n.end());
+ if(em_str=="throw")
+ em = THROW_ON_ERROR;
+ else if(em_str=="ignore")
+ em = IGNORE_ERRORS;
+ else if(em_str=="trans" || em_str=="transliterate")
+ em = TRANSLITERATE;
+ else
+ throw invalid_argument("invalid error mode");
+ }
+
+ if(name=="ascii") return new Ascii(em);
+ if(name=="iso2022jp") return new Iso2022Jp(em);
+ if(name=="iso646fi") return new Iso646Fi(em);
+ if(name=="iso88591" || name=="latin1") return new Iso88591(em);
+ if(name=="iso885915" || name=="latin9") return new Iso885915(em);
+ if(name=="jisx0201") return new JisX0201(em);
+ if(name=="jisx0208") return new JisX0208(em);
+ if(name=="utf8") return new Utf8(em);
+ if(name=="windows1252" || name=="cp1252") return new Windows1252(em);
throw invalid_argument("unknown string codec");
}
enum ErrorMode
{
+ DEFAULT,
THROW_ON_ERROR,
IGNORE_ERRORS,
TRANSLITERATE
virtual const char *get_name() const = 0;
/** Creates an encoder for this codec. */
- virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
+ virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0;
/** Creates a decoder for this codec. */
- virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0;
+ virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0;
/** Determines whether the given string can be successfully decoded with
this codec. Note that this function returning true does not guarantee that
typedef Codec::Encoder Encoder;
typedef Codec::Decoder Decoder;
+
+/**
+A helper class to provide some common functionality.
+*/
+template<typename C>
+class StandardCodec: public Codec
+{
+private:
+ ErrorMode err_mode;
+
+protected:
+ StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { }
+
+public:
+ virtual Encoder *create_encoder(ErrorMode em = DEFAULT) const
+ { return new typename C::Encoder(em==DEFAULT ? err_mode : em); }
+
+ virtual Decoder *create_decoder(ErrorMode em = DEFAULT) const
+ { return new typename C::Decoder(em==DEFAULT ? err_mode : em); }
+};
+
+
/** Convenience function that decodes a string. */
template<class C>
ustring decode(const std::string &s)
namespace Msp {
namespace StringCodec {
-class Iso2022Jp: public Codec
+class Iso2022Jp: public StandardCodec<Iso2022Jp>
{
public:
enum Mode
void switch_mode(Mode);
};
- virtual const char *get_name() const { return "ISO-2022-JP"; }
+ Iso2022Jp(ErrorMode em = THROW_ON_ERROR): StandardCodec<Iso2022Jp>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-2022-JP"; }
};
} // namespace StringCodec
namespace Msp {
namespace StringCodec {
-class Iso646Fi: public Codec
+class Iso646Fi: public StandardCodec<Iso646Fi>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "ISO-646-FI"; }
+ Iso646Fi(ErrorMode em = THROW_ON_ERROR): StandardCodec<Iso646Fi>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-646-FI"; }
};
} // namespace StringCodec
namespace Msp {
namespace StringCodec {
-class Iso88591: public Codec
+class Iso88591: public StandardCodec<Iso88591>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "ISO-8859-1"; }
+ Iso88591(ErrorMode em = THROW_ON_ERROR): StandardCodec<Iso88591>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-8859-1"; }
};
} // namespace StringCodec
namespace Msp {
namespace StringCodec {
-class Iso885915: public Codec
+class Iso885915: public StandardCodec<Iso885915>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "ISO-8859-15"; }
+ Iso885915(ErrorMode em = THROW_ON_ERROR): StandardCodec<Iso885915>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-8859-15"; }
};
} // namespace StringCodec
namespace Msp {
namespace StringCodec {
-class JisX0201: public Codec
+class JisX0201: public StandardCodec<JisX0201>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "JIS X 0201"; }
+ JisX0201(ErrorMode em = THROW_ON_ERROR): StandardCodec<JisX0201>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "JIS X 0201"; }
};
} // namespace StringCodec
stand-alone codec, due to lack of a linefeed character among other things,
but is included as part of some other encodings.
*/
-class JisX0208: public Codec
+class JisX0208: public StandardCodec<JisX0208>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "JIS X 0208"; }
+ JisX0208(ErrorMode em = THROW_ON_ERROR): StandardCodec<JisX0208>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "JIS X 0208"; }
};
namespace Msp {
namespace StringCodec {
-class Utf8: public Codec
+class Utf8: public StandardCodec<Utf8>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "UTF-8"; }
+ Utf8(ErrorMode em = THROW_ON_ERROR): StandardCodec<Utf8>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "UTF-8"; }
};
} // namespace StringCodec
namespace Msp {
namespace StringCodec {
-class Windows1252: public Codec
+class Windows1252: public StandardCodec<Windows1252>
{
public:
class Encoder: public Codec::Encoder
virtual unichar decode_char(const std::string &, std::string::const_iterator &);
};
- virtual const char *get_name() const { return "Windows-1252"; }
+ Windows1252(ErrorMode em = THROW_ON_ERROR): StandardCodec<Windows1252>(em) { }
- virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); }
- virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "Windows-1252"; }
};
} // namespace StringCodec
StringCodec::Codec *from = StringCodec::create_codec(argv[1]);
StringCodec::Codec *to = StringCodec::create_codec(argv[2]);
- StringCodec::Decoder *from_dec = from->create_decoder(StringCodec::TRANSLITERATE);
- StringCodec::Encoder *to_enc = to->create_encoder(StringCodec::TRANSLITERATE);
+ StringCodec::Decoder *from_dec = from->create_decoder();
+ StringCodec::Encoder *to_enc = to->create_encoder();
string line;
while(getline(cin, line))