From d3fc0bf0f20f100f2831188c1ce21461d21c2c7a Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Tue, 7 Jun 2011 13:16:11 +0300 Subject: [PATCH] Allow error mode to be specified when creating the codec object --- source/stringcodec/ascii.h | 7 +++--- source/stringcodec/codec.cpp | 39 +++++++++++++++++++++++--------- source/stringcodec/codec.h | 27 ++++++++++++++++++++-- source/stringcodec/iso2022jp.h | 7 +++--- source/stringcodec/iso646fi.h | 7 +++--- source/stringcodec/iso88591.h | 7 +++--- source/stringcodec/iso885915.h | 7 +++--- source/stringcodec/jisx0201.h | 7 +++--- source/stringcodec/jisx0208.h | 7 +++--- source/stringcodec/utf8.h | 7 +++--- source/stringcodec/windows1252.h | 7 +++--- transcode.cpp | 4 ++-- 12 files changed, 82 insertions(+), 51 deletions(-) diff --git a/source/stringcodec/ascii.h b/source/stringcodec/ascii.h index 163448b..b052727 100644 --- a/source/stringcodec/ascii.h +++ b/source/stringcodec/ascii.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Ascii: public Codec +class Ascii: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "ASCII"; } + Ascii(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "ASCII"; } }; } // namespace StringCodec diff --git a/source/stringcodec/codec.cpp b/source/stringcodec/codec.cpp index b1c05c5..d1fc1df 100644 --- a/source/stringcodec/codec.cpp +++ b/source/stringcodec/codec.cpp @@ -63,23 +63,40 @@ ustring Codec::Decoder::decode(const string &str) Codec *create_codec(const string &n) { string name; - for(string::const_iterator i=n.begin(); i!=n.end(); ++i) + string::const_iterator i; + for(i=n.begin(); i!=n.end(); ++i) { - if(isupper(*i)) + if(*i==':') + break; + else if(isupper(*i)) name += tolower(*i); else if(islower(*i) || isdigit(*i)) name += *i; } - if(name=="ascii") return new Ascii; - if(name=="iso2022jp") return new Iso2022Jp; - if(name=="iso646fi") return new Iso646Fi; - if(name=="iso88591" || name=="latin1") return new Iso88591; - if(name=="iso885915" || name=="latin9") return new Iso885915; - if(name=="jisx0201") return new JisX0201; - if(name=="jisx0208") return new JisX0208; - if(name=="utf8") return new Utf8; - if(name=="windows1252" || name=="cp1252") return new Windows1252; + ErrorMode em = THROW_ON_ERROR; + if(i!=n.end() && *i==':') + { + string em_str(i+1, n.end()); + if(em_str=="throw") + em = THROW_ON_ERROR; + else if(em_str=="ignore") + em = IGNORE_ERRORS; + else if(em_str=="trans" || em_str=="transliterate") + em = TRANSLITERATE; + else + throw invalid_argument("invalid error mode"); + } + + if(name=="ascii") return new Ascii(em); + if(name=="iso2022jp") return new Iso2022Jp(em); + if(name=="iso646fi") return new Iso646Fi(em); + if(name=="iso88591" || name=="latin1") return new Iso88591(em); + if(name=="iso885915" || name=="latin9") return new Iso885915(em); + if(name=="jisx0201") return new JisX0201(em); + if(name=="jisx0208") return new JisX0208(em); + if(name=="utf8") return new Utf8(em); + if(name=="windows1252" || name=="cp1252") return new Windows1252(em); throw invalid_argument("unknown string codec"); } diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index 00f03b0..c70e052 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -10,6 +10,7 @@ namespace StringCodec { enum ErrorMode { + DEFAULT, THROW_ON_ERROR, IGNORE_ERRORS, TRANSLITERATE @@ -138,10 +139,10 @@ public: virtual const char *get_name() const = 0; /** Creates an encoder for this codec. */ - virtual Encoder *create_encoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Encoder *create_encoder(ErrorMode err_mode = DEFAULT) const = 0; /** Creates a decoder for this codec. */ - virtual Decoder *create_decoder(ErrorMode err_mode = THROW_ON_ERROR) const = 0; + virtual Decoder *create_decoder(ErrorMode err_mode = DEFAULT) const = 0; /** Determines whether the given string can be successfully decoded with this codec. Note that this function returning true does not guarantee that @@ -153,6 +154,28 @@ public: typedef Codec::Encoder Encoder; typedef Codec::Decoder Decoder; + +/** +A helper class to provide some common functionality. +*/ +template +class StandardCodec: public Codec +{ +private: + ErrorMode err_mode; + +protected: + StandardCodec(ErrorMode em): err_mode(em==DEFAULT ? THROW_ON_ERROR : em) { } + +public: + virtual Encoder *create_encoder(ErrorMode em = DEFAULT) const + { return new typename C::Encoder(em==DEFAULT ? err_mode : em); } + + virtual Decoder *create_decoder(ErrorMode em = DEFAULT) const + { return new typename C::Decoder(em==DEFAULT ? err_mode : em); } +}; + + /** Convenience function that decodes a string. */ template ustring decode(const std::string &s) diff --git a/source/stringcodec/iso2022jp.h b/source/stringcodec/iso2022jp.h index 91f0166..a3f425a 100644 --- a/source/stringcodec/iso2022jp.h +++ b/source/stringcodec/iso2022jp.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Iso2022Jp: public Codec +class Iso2022Jp: public StandardCodec { public: enum Mode @@ -47,10 +47,9 @@ public: void switch_mode(Mode); }; - virtual const char *get_name() const { return "ISO-2022-JP"; } + Iso2022Jp(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "ISO-2022-JP"; } }; } // namespace StringCodec diff --git a/source/stringcodec/iso646fi.h b/source/stringcodec/iso646fi.h index f61a49f..7ded23c 100644 --- a/source/stringcodec/iso646fi.h +++ b/source/stringcodec/iso646fi.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Iso646Fi: public Codec +class Iso646Fi: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "ISO-646-FI"; } + Iso646Fi(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "ISO-646-FI"; } }; } // namespace StringCodec diff --git a/source/stringcodec/iso88591.h b/source/stringcodec/iso88591.h index 806bdb1..5c59fae 100644 --- a/source/stringcodec/iso88591.h +++ b/source/stringcodec/iso88591.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Iso88591: public Codec +class Iso88591: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "ISO-8859-1"; } + Iso88591(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "ISO-8859-1"; } }; } // namespace StringCodec diff --git a/source/stringcodec/iso885915.h b/source/stringcodec/iso885915.h index f52fdac..5d65730 100644 --- a/source/stringcodec/iso885915.h +++ b/source/stringcodec/iso885915.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Iso885915: public Codec +class Iso885915: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "ISO-8859-15"; } + Iso885915(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "ISO-8859-15"; } }; } // namespace StringCodec diff --git a/source/stringcodec/jisx0201.h b/source/stringcodec/jisx0201.h index 7a7223e..5521d01 100644 --- a/source/stringcodec/jisx0201.h +++ b/source/stringcodec/jisx0201.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class JisX0201: public Codec +class JisX0201: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "JIS X 0201"; } + JisX0201(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "JIS X 0201"; } }; } // namespace StringCodec diff --git a/source/stringcodec/jisx0208.h b/source/stringcodec/jisx0208.h index 81b39f2..c59e44f 100644 --- a/source/stringcodec/jisx0208.h +++ b/source/stringcodec/jisx0208.h @@ -11,7 +11,7 @@ Codec for the JIS X 0208 encoding. This is not particularly useful as a stand-alone codec, due to lack of a linefeed character among other things, but is included as part of some other encodings. */ -class JisX0208: public Codec +class JisX0208: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -32,10 +32,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "JIS X 0208"; } + JisX0208(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "JIS X 0208"; } }; diff --git a/source/stringcodec/utf8.h b/source/stringcodec/utf8.h index cb4924a..fcf23ef 100644 --- a/source/stringcodec/utf8.h +++ b/source/stringcodec/utf8.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Utf8: public Codec +class Utf8: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "UTF-8"; } + Utf8(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "UTF-8"; } }; } // namespace StringCodec diff --git a/source/stringcodec/windows1252.h b/source/stringcodec/windows1252.h index b10071c..44d97c9 100644 --- a/source/stringcodec/windows1252.h +++ b/source/stringcodec/windows1252.h @@ -6,7 +6,7 @@ namespace Msp { namespace StringCodec { -class Windows1252: public Codec +class Windows1252: public StandardCodec { public: class Encoder: public Codec::Encoder @@ -27,10 +27,9 @@ public: virtual unichar decode_char(const std::string &, std::string::const_iterator &); }; - virtual const char *get_name() const { return "Windows-1252"; } + Windows1252(ErrorMode em = THROW_ON_ERROR): StandardCodec(em) { } - virtual Encoder *create_encoder(ErrorMode em = THROW_ON_ERROR) const { return new Encoder(em); } - virtual Decoder *create_decoder(ErrorMode em = THROW_ON_ERROR) const { return new Decoder(em); } + virtual const char *get_name() const { return "Windows-1252"; } }; } // namespace StringCodec diff --git a/transcode.cpp b/transcode.cpp index e8ba3eb..47a2184 100644 --- a/transcode.cpp +++ b/transcode.cpp @@ -17,8 +17,8 @@ int main(int argc, char **argv) StringCodec::Codec *from = StringCodec::create_codec(argv[1]); StringCodec::Codec *to = StringCodec::create_codec(argv[2]); - StringCodec::Decoder *from_dec = from->create_decoder(StringCodec::TRANSLITERATE); - StringCodec::Encoder *to_enc = to->create_encoder(StringCodec::TRANSLITERATE); + StringCodec::Decoder *from_dec = from->create_decoder(); + StringCodec::Encoder *to_enc = to->create_encoder(); string line; while(getline(cin, line)) -- 2.45.2