From: Mikko Rasa Date: Mon, 6 Jun 2011 20:37:44 +0000 (+0300) Subject: Exception changes X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=commitdiff_plain;h=94ee3a1040f67d9de2e92fc34049642b08d65b3e Exception changes --- diff --git a/source/stringcodec/ascii.cpp b/source/stringcodec/ascii.cpp index fa5ce7f..1d27561 100644 --- a/source/stringcodec/ascii.cpp +++ b/source/stringcodec/ascii.cpp @@ -24,7 +24,7 @@ namespace StringCodec { void Ascii::Encoder::encode_char(unichar ch, string &buf) { if(ch<0 || ch>0x7F) - return error(ch, buf, "Can't express character in ASCII"); + return error(ch, buf, invalid_character(ch, "ASCII")); buf += ch; } @@ -94,7 +94,7 @@ unichar Ascii::Decoder::decode_char(const string &str, string::const_iterator &i return -1; else if(*i&0x80) { - unichar result = error("Undefined ASCII character"); + unichar result = error(invalid_sequence(i, i+1, "undefined ASCII character")); ++i; return result; } diff --git a/source/stringcodec/codec.cpp b/source/stringcodec/codec.cpp index 3dc4f9a..b1c05c5 100644 --- a/source/stringcodec/codec.cpp +++ b/source/stringcodec/codec.cpp @@ -41,18 +41,6 @@ string Codec::Encoder::encode(const ustring &str) return buf; } -void Codec::Encoder::error(unichar ch, string &buf, const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - transliterate(ch, buf); - case IGNORE_ERRORS: - break; - default: - throw CodecError(msg); - } -} void Codec::Decoder::decode(const string &str, ustring &buf) @@ -72,19 +60,6 @@ ustring Codec::Decoder::decode(const string &str) return buf; } -unichar Codec::Decoder::error(const string &msg) -{ - switch(err_mode) - { - case TRANSLITERATE: - return 0xFFFD; - case IGNORE_ERRORS: - return -1; - default: - throw CodecError(msg); - } -} - Codec *create_codec(const string &n) { string name; @@ -105,7 +80,7 @@ Codec *create_codec(const string &n) if(name=="jisx0208") return new JisX0208; if(name=="utf8") return new Utf8; if(name=="windows1252" || name=="cp1252") return new Windows1252; - throw InvalidParameterValue("Unknown string codec"); + throw invalid_argument("unknown string codec"); } Codec *detect_codec(const string &str) diff --git a/source/stringcodec/codec.h b/source/stringcodec/codec.h index 5acbe7b..00f03b0 100644 --- a/source/stringcodec/codec.h +++ b/source/stringcodec/codec.h @@ -2,7 +2,7 @@ #define MSP_STRINGCODEC_CODEC_H_ #include -#include +#include "except.h" #include "ustring.h" namespace Msp { @@ -15,15 +15,6 @@ enum ErrorMode TRANSLITERATE }; -/** -An exception thrown for all kinds of problems encountered while encoding or -decoding strings. -*/ -class CodecError: public Exception -{ -public: - CodecError(const std::string &w_): Exception(w_) { } -}; /** Base class for string codecs. Use one of the derived classes or the function @@ -73,10 +64,17 @@ public: protected: /** Handles an error depending on the error mode. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: does nothing TRANSLITERATE: calls transliterate(ch, buf) */ - void error(unichar ch, std::string &buf, const std::string &msg); + template + void error(unichar ch, std::string &buf, const E &err) + { + if(err_mode==TRANSLITERATE) + transliterate(ch, buf); + else if(err_mode!=IGNORE_ERRORS) + throw err; + } /** Attempts to produce an alternative encoding for a unicode character. Typically this includes dropping accent marks or romanizing letters. */ @@ -116,10 +114,19 @@ public: /** Handles an error depending on the error mode. The return value is suitable for returning from decode_char. - THROW_ON_ERROR: throws CodecError(msg) + THROW_ON_ERROR: throws err IGNORE_ERRORS: returns -1 TRANSLITERATE: return 0xFFFD */ - unichar error(const std::string &msg); + template + unichar error(const E &err) + { + if(err_mode==TRANSLITERATE) + return 0xFFFD; + else if(err_mode==IGNORE_ERRORS) + return -1; + else + throw err; + } }; protected: diff --git a/source/stringcodec/except.cpp b/source/stringcodec/except.cpp new file mode 100644 index 0000000..045f44a --- /dev/null +++ b/source/stringcodec/except.cpp @@ -0,0 +1,31 @@ +#include +#include "except.h" + +using namespace std; + +namespace Msp { +namespace StringCodec { + +invalid_character::invalid_character(unichar ch, const string &detail): + codec_error(format("invalid character: U+%04X (%s)", ch, detail)) +{ } + + +invalid_sequence::invalid_sequence(const string::const_iterator &begin, const string::const_iterator &end, const string &detail): + codec_error(format("invalid sequence: %s (%s)", format_sequence(begin, end), detail)) +{ } + +string invalid_sequence::format_sequence(const string::const_iterator &begin, const string::const_iterator &end) +{ + string result; + for(string::const_iterator i=begin; i!=end; ++i) + { + if(!result.empty()) + result += ' '; + result += lexical_cast(static_cast(*i), Fmt().fill('0').width(2).hex().uppercase()); + } + return result; +} + +} // namespace StringCodec +} // namespace Msp diff --git a/source/stringcodec/except.h b/source/stringcodec/except.h new file mode 100644 index 0000000..6e25666 --- /dev/null +++ b/source/stringcodec/except.h @@ -0,0 +1,48 @@ +#ifndef MSP_STRINGCODEC_EXCEPT_H_ +#define MSP_STRINGCODEC_EXCEPT_H_ + +#include +#include "ustring.h" + +namespace Msp { +namespace StringCodec { + +/** +Base class for codec errors. +*/ +class codec_error: public std::runtime_error +{ +public: + codec_error(const std::string &w): std::runtime_error(w) { } + virtual ~codec_error() throw() { } +}; + + +/** +Thrown when a codec can't encode the requested character. +*/ +class invalid_character: public codec_error +{ +public: + invalid_character(unichar, const std::string &); + virtual ~invalid_character() throw() { } +}; + + +/** +Thrown when a codec encounters a byte sequence it can't decode. +*/ +class invalid_sequence: public codec_error +{ +public: + invalid_sequence(const std::string::const_iterator &, const std::string::const_iterator &, const std::string &); + virtual ~invalid_sequence() throw() { } + +private: + std::string format_sequence(const std::string::const_iterator &, const std::string::const_iterator &); +}; + +} // namespace StringCodec +} // namespace Msp + +#endif diff --git a/source/stringcodec/iso2022jp.cpp b/source/stringcodec/iso2022jp.cpp index 1e5bf90..14a4b99 100644 --- a/source/stringcodec/iso2022jp.cpp +++ b/source/stringcodec/iso2022jp.cpp @@ -35,12 +35,12 @@ void Iso2022Jp::Encoder::encode_char(unichar ch, string &buf) { Kuten jis = ucs_to_jisx0208(ch); if(!jis) - return error(ch, buf, "Can't express character in ISO-2022-JP"); + return error(ch, buf, invalid_character(ch, "ISO-2022-JP")); if(mode!=JISX0208) switch_mode(JISX0208, buf); - char jbuf[2] = {jis.ku+0x20, jis.ten+0x20}; + char jbuf[2] = { jis.ku+0x20, jis.ten+0x20 }; buf.append(jbuf, 2); } } @@ -64,7 +64,7 @@ void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf) case ASCII: buf.append("\033(B", 3); break; case JISX0201: buf.append("\033(J", 3); break; case JISX0208: buf.append("\033$B", 3); break; - default: throw CodecError("WTF? Invalid mode in Iso2022Jp::Encoder::switch_mode"); + default: throw invalid_argument("Iso2022Jp::Encoder::switch_mode"); } } @@ -121,7 +121,7 @@ unichar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterato else if(dec) return dec->decode_char(str, i); else - throw CodecError("WTF? No sub-decoder for Iso2022Jp::Decoder"); + throw logic_error("no sub-decoder"); if(result>=0) return result; diff --git a/source/stringcodec/iso646fi.cpp b/source/stringcodec/iso646fi.cpp index 64e429d..0db1d00 100644 --- a/source/stringcodec/iso646fi.cpp +++ b/source/stringcodec/iso646fi.cpp @@ -30,7 +30,7 @@ void Iso646Fi::Encoder::encode_char(unichar ch, string &buf) { int tch = transform_mapping_or_direct(mapping, map_size, ch, false); if(tch<0 || tch>0x7F) - error(ch, buf, "Can't express character in ISO-646-FI"); + error(ch, buf, invalid_character(ch, "ISO-646-FI")); else buf += tch; } @@ -49,7 +49,7 @@ unichar Iso646Fi::Decoder::decode_char(const string &str, string::const_iterator unsigned char ch = *i; unichar result; if(ch>=0x80) - result = error("Undefined ISO-646-FI character"); + result = error(invalid_sequence(i, i+1, "undefined ISO-646-FI character")); else result = transform_mapping_or_direct(mapping, map_size, ch, true); diff --git a/source/stringcodec/iso88591.cpp b/source/stringcodec/iso88591.cpp index 9adc2d9..a21b6f1 100644 --- a/source/stringcodec/iso88591.cpp +++ b/source/stringcodec/iso88591.cpp @@ -8,7 +8,7 @@ namespace StringCodec { void Iso88591::Encoder::encode_char(unichar ch, string &buf) { if(ch<0 || ch>0xFF) - return error(ch, buf, "Can't express character in ISO-8859-1"); + return error(ch, buf, invalid_character(ch, "ISO-8859-1")); buf += ch; } diff --git a/source/stringcodec/iso885915.cpp b/source/stringcodec/iso885915.cpp index 8dfc3a1..eb26264 100644 --- a/source/stringcodec/iso885915.cpp +++ b/source/stringcodec/iso885915.cpp @@ -29,7 +29,7 @@ void Iso885915::Encoder::encode_char(unichar ch, string &buf) { int tch = transform_mapping_or_direct(mapping, map_size, ch, false); if(tch<0 || tch>0xFF) - error(ch, buf, "Can't express character in ISO-8859-15"); + error(ch, buf, invalid_character(ch, "ISO-8859-15")); else buf += tch; diff --git a/source/stringcodec/jisx0201.cpp b/source/stringcodec/jisx0201.cpp index 4d9d6bd..4c8cd48 100644 --- a/source/stringcodec/jisx0201.cpp +++ b/source/stringcodec/jisx0201.cpp @@ -16,7 +16,7 @@ void JisX0201::Encoder::encode_char(unichar ch, string &buf) else if(ch>=0xFF61 && ch<=0xFF9F) buf += ch-0xFEC0; else - error(ch, buf, "Can't express character in JIS X 0201"); + error(ch, buf, invalid_character(ch, "JIS X 0201")); } void JisX0201::Encoder::transliterate(unichar, string &buf) @@ -41,7 +41,7 @@ unichar JisX0201::Decoder::decode_char(const string &str, string::const_iterator else if(ch>=0xA1 && ch<=0xDF) result = ch+0xFEC0; else - result = error("Undefined JIS X 0201 character"); + result = error(invalid_sequence(i, i+1, "undefined JIS X 0201 character")); ++i; return result; diff --git a/source/stringcodec/jisx0208.cpp b/source/stringcodec/jisx0208.cpp index 411ae2b..d56ec33 100644 --- a/source/stringcodec/jisx0208.cpp +++ b/source/stringcodec/jisx0208.cpp @@ -16,7 +16,7 @@ void JisX0208::Encoder::encode_char(unichar ucs, string &buf) buf.append(jbuf, 2); } else - error(ucs, buf, "Can't express character in JIS X 0208"); + error(ucs, buf, invalid_character(ucs, "JIS X 0208")); } void JisX0208::Encoder::transliterate(unichar, string &buf) @@ -36,13 +36,15 @@ unichar JisX0208::Decoder::decode_char(const string &str, string::const_iterator unichar result; if(j==str.end()) - result = error("Incomplete JIS X 0208 character"); + result = error(invalid_sequence(i, j, "incomplete JIS X 0208 character")); else { jis.ten = *j++-0x20; result = jisx0208_to_ucs(jis); + if(result==-1) + result = error(invalid_sequence(i, j, "invalid JIS X 0208 ku-ten")); if(result==0) - result = error("Undefined JIS X 0208 character"); + result = error(invalid_sequence(i, j, "undefined JIS X 0208 character")); } i = j; @@ -53,7 +55,7 @@ unichar JisX0208::Decoder::decode_char(const string &str, string::const_iterator unichar jisx0208_to_ucs(Kuten jis) { if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E) - return 0; + return -1; return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95]; } diff --git a/source/stringcodec/utf8.cpp b/source/stringcodec/utf8.cpp index 4c75d8b..c6d1990 100644 --- a/source/stringcodec/utf8.cpp +++ b/source/stringcodec/utf8.cpp @@ -8,7 +8,7 @@ namespace StringCodec { void Utf8::Encoder::encode_char(unichar ch, string &buf) { if(ch<0 || ch>0x10FFFF) - return error(ch, buf, "Can't express character in UTF-8"); + return error(ch, buf, invalid_character(ch, "UTF-8")); unsigned bytes = 1; if(ch>0xFFFF) @@ -48,7 +48,7 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) if((*i&0xC0)==0x80) { - unichar result = error("UTF-8 tail byte found when expecting head"); + unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte")); ++i; return result; } @@ -68,11 +68,11 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i) result = (result<<6) | ((*j++)&0x3F); if(k>(bytes*5-4)) || !(result>>7)) - result = error("Denormalized UTF-8 multibyte sequence"); + result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence")); else if(!is_valid_unichar(result)) - result = error("Invalid Unicode code point"); + result = error(invalid_sequence(i, j, "undefined UTF-8 character")); i = j; return result; diff --git a/source/stringcodec/windows1252.cpp b/source/stringcodec/windows1252.cpp index 72eb104..a6bde24 100644 --- a/source/stringcodec/windows1252.cpp +++ b/source/stringcodec/windows1252.cpp @@ -31,7 +31,7 @@ void Windows1252::Encoder::encode_char(unichar ch, string &buf) return; } - error(ch, buf, "Can't express character in Windows-1252"); + error(ch, buf, invalid_character(ch, "Windows-1252")); } } @@ -52,7 +52,7 @@ unichar Windows1252::Decoder::decode_char(const string &str, string::const_itera { result = table[ch-0x80]; if(result==0) - result = error("Undefined Windows-1252 character"); + result = error(invalid_sequence(i, i+1, "undefined Windows-1252 character")); } else result = ch;