]> git.tdb.fi Git - libs/core.git/commitdiff
Exception changes
authorMikko Rasa <tdb@tdb.fi>
Mon, 6 Jun 2011 20:37:44 +0000 (23:37 +0300)
committerMikko Rasa <tdb@tdb.fi>
Mon, 6 Jun 2011 20:52:50 +0000 (23:52 +0300)
13 files changed:
source/stringcodec/ascii.cpp
source/stringcodec/codec.cpp
source/stringcodec/codec.h
source/stringcodec/except.cpp [new file with mode: 0644]
source/stringcodec/except.h [new file with mode: 0644]
source/stringcodec/iso2022jp.cpp
source/stringcodec/iso646fi.cpp
source/stringcodec/iso88591.cpp
source/stringcodec/iso885915.cpp
source/stringcodec/jisx0201.cpp
source/stringcodec/jisx0208.cpp
source/stringcodec/utf8.cpp
source/stringcodec/windows1252.cpp

index fa5ce7f1cdfc0cd4eeb8b931a0f1c42aa0dc26a4..1d27561498f1de132fee1466e34e84868c66df75 100644 (file)
@@ -24,7 +24,7 @@ namespace StringCodec {
 void Ascii::Encoder::encode_char(unichar ch, string &buf)
 {
        if(ch<0 || ch>0x7F)
-               return error(ch, buf, "Can't express character in ASCII");
+               return error(ch, buf, invalid_character(ch, "ASCII"));
 
        buf += ch;
 }
@@ -94,7 +94,7 @@ unichar Ascii::Decoder::decode_char(const string &str, string::const_iterator &i
                return -1;
        else if(*i&0x80)
        {
-               unichar result = error("Undefined ASCII character");
+               unichar result = error(invalid_sequence(i, i+1, "undefined ASCII character"));
                ++i;
                return result;
        }
index 3dc4f9a9d452efe7aef2478878d36ad3977af170..b1c05c50292295e573f307497a175ac78797a08a 100644 (file)
@@ -41,18 +41,6 @@ string Codec::Encoder::encode(const ustring &str)
        return buf;
 }
 
-void Codec::Encoder::error(unichar ch, string &buf, const string &msg)
-{
-       switch(err_mode)
-       {
-       case TRANSLITERATE:
-               transliterate(ch, buf);
-       case IGNORE_ERRORS:
-               break;
-       default:
-               throw CodecError(msg);
-       }
-}
 
 
 void Codec::Decoder::decode(const string &str, ustring &buf)
@@ -72,19 +60,6 @@ ustring Codec::Decoder::decode(const string &str)
        return buf;
 }
 
-unichar Codec::Decoder::error(const string &msg)
-{
-       switch(err_mode)
-       {
-       case TRANSLITERATE:
-               return 0xFFFD;
-       case IGNORE_ERRORS:
-               return -1;
-       default:
-               throw CodecError(msg);
-       }
-}
-
 Codec *create_codec(const string &n)
 {
        string name;
@@ -105,7 +80,7 @@ Codec *create_codec(const string &n)
        if(name=="jisx0208") return new JisX0208;
        if(name=="utf8") return new Utf8;
        if(name=="windows1252" || name=="cp1252") return new Windows1252;
-       throw InvalidParameterValue("Unknown string codec");
+       throw invalid_argument("unknown string codec");
 }
 
 Codec *detect_codec(const string &str)
index 5acbe7b0d1dc34b1d7285c6b96a63665b1ffde2f..00f03b0cc4b4c4f2f4fcd133f2d3fa67e9fac5fb 100644 (file)
@@ -2,7 +2,7 @@
 #define MSP_STRINGCODEC_CODEC_H_
 
 #include <string>
-#include <msp/core/except.h>
+#include "except.h"
 #include "ustring.h"
 
 namespace Msp {
@@ -15,15 +15,6 @@ enum ErrorMode
        TRANSLITERATE
 };
 
-/**
-An exception thrown for all kinds of problems encountered while encoding or
-decoding strings.
-*/
-class CodecError: public Exception
-{
-public:
-       CodecError(const std::string &w_): Exception(w_) { }
-};
 
 /**
 Base class for string codecs.  Use one of the derived classes or the function
@@ -73,10 +64,17 @@ public:
        protected:
                /** Handles an error depending on the error mode.
 
-               THROW_ON_ERROR: throws CodecError(msg)
+               THROW_ON_ERROR: throws err
                IGNORE_ERRORS: does nothing
                TRANSLITERATE: calls transliterate(ch, buf) */
-               void error(unichar ch, std::string &buf, const std::string &msg);
+               template<typename E>
+               void error(unichar ch, std::string &buf, const E &err)
+               {
+                       if(err_mode==TRANSLITERATE)
+                               transliterate(ch, buf);
+                       else if(err_mode!=IGNORE_ERRORS)
+                               throw err;
+               }
 
                /** Attempts to produce an alternative encoding for a unicode character.
                Typically this includes dropping accent marks or romanizing letters. */
@@ -116,10 +114,19 @@ public:
                /** Handles an error depending on the error mode.  The return value is
                suitable for returning from decode_char.
                
-               THROW_ON_ERROR: throws CodecError(msg)
+               THROW_ON_ERROR: throws err
                IGNORE_ERRORS: returns -1
                TRANSLITERATE: return 0xFFFD */
-               unichar error(const std::string &msg);
+               template<typename E>
+               unichar error(const E &err)
+               {
+                       if(err_mode==TRANSLITERATE)
+                               return 0xFFFD;
+                       else if(err_mode==IGNORE_ERRORS)
+                               return -1;
+                       else
+                               throw err;
+               }
        };
 
 protected:
diff --git a/source/stringcodec/except.cpp b/source/stringcodec/except.cpp
new file mode 100644 (file)
index 0000000..045f44a
--- /dev/null
@@ -0,0 +1,31 @@
+#include <msp/strings/format.h>
+#include "except.h"
+
+using namespace std;
+
+namespace Msp {
+namespace StringCodec {
+
+invalid_character::invalid_character(unichar ch, const string &detail):
+       codec_error(format("invalid character: U+%04X (%s)", ch, detail))
+{ }
+
+
+invalid_sequence::invalid_sequence(const string::const_iterator &begin, const string::const_iterator &end, const string &detail):
+       codec_error(format("invalid sequence: %s (%s)", format_sequence(begin, end), detail))
+{ }
+
+string invalid_sequence::format_sequence(const string::const_iterator &begin, const string::const_iterator &end)
+{
+       string result;
+       for(string::const_iterator i=begin; i!=end; ++i)
+       {
+               if(!result.empty())
+                       result += ' ';
+               result += lexical_cast(static_cast<unsigned char>(*i), Fmt().fill('0').width(2).hex().uppercase());
+       }
+       return result;
+}
+
+} // namespace StringCodec
+} // namespace Msp
diff --git a/source/stringcodec/except.h b/source/stringcodec/except.h
new file mode 100644 (file)
index 0000000..6e25666
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef MSP_STRINGCODEC_EXCEPT_H_
+#define MSP_STRINGCODEC_EXCEPT_H_
+
+#include <stdexcept>
+#include "ustring.h"
+
+namespace Msp {
+namespace StringCodec {
+
+/**
+Base class for codec errors.
+*/
+class codec_error: public std::runtime_error
+{
+public:
+       codec_error(const std::string &w): std::runtime_error(w) { }
+       virtual ~codec_error() throw() { }
+};
+
+
+/**
+Thrown when a codec can't encode the requested character.
+*/
+class invalid_character: public codec_error
+{
+public:
+       invalid_character(unichar, const std::string &);
+       virtual ~invalid_character() throw() { }
+};
+
+
+/**
+Thrown when a codec encounters a byte sequence it can't decode.
+*/
+class invalid_sequence: public codec_error
+{
+public:
+       invalid_sequence(const std::string::const_iterator &, const std::string::const_iterator &, const std::string &);
+       virtual ~invalid_sequence() throw() { }
+
+private:
+       std::string format_sequence(const std::string::const_iterator &, const std::string::const_iterator &);
+};
+
+} // namespace StringCodec
+} // namespace Msp
+
+#endif
index 1e5bf90ee590fb068276a1287e3b0fe5c3a9f4ff..14a4b99f83196b835b44eba00afb51082760d37a 100644 (file)
@@ -35,12 +35,12 @@ void Iso2022Jp::Encoder::encode_char(unichar ch, string &buf)
        {
                Kuten jis = ucs_to_jisx0208(ch);
                if(!jis)
-                       return error(ch, buf, "Can't express character in ISO-2022-JP");
+                       return error(ch, buf, invalid_character(ch, "ISO-2022-JP"));
 
                if(mode!=JISX0208)
                        switch_mode(JISX0208, buf);
 
-               char jbuf[2] = {jis.ku+0x20, jis.ten+0x20};
+               char jbuf[2] = { jis.ku+0x20, jis.ten+0x20 };
                buf.append(jbuf, 2);
        }
 }
@@ -64,7 +64,7 @@ void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf)
        case ASCII:    buf.append("\033(B", 3); break;
        case JISX0201: buf.append("\033(J", 3); break;
        case JISX0208: buf.append("\033$B", 3); break;
-       default: throw CodecError("WTF?  Invalid mode in Iso2022Jp::Encoder::switch_mode");
+       default: throw invalid_argument("Iso2022Jp::Encoder::switch_mode");
        }
 }
 
@@ -121,7 +121,7 @@ unichar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterato
                else if(dec)
                        return dec->decode_char(str, i);
                else
-                       throw CodecError("WTF?  No sub-decoder for Iso2022Jp::Decoder");
+                       throw logic_error("no sub-decoder");
 
                if(result>=0)
                        return result;
index 64e429d4197a648349edbda2f8bc88d5df070454..0db1d00485df6d553241e27db47fd3176c9fb031 100644 (file)
@@ -30,7 +30,7 @@ void Iso646Fi::Encoder::encode_char(unichar ch, string &buf)
 {
        int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
        if(tch<0 || tch>0x7F)
-               error(ch, buf, "Can't express character in ISO-646-FI");
+               error(ch, buf, invalid_character(ch, "ISO-646-FI"));
        else
                buf += tch;
 }
@@ -49,7 +49,7 @@ unichar Iso646Fi::Decoder::decode_char(const string &str, string::const_iterator
        unsigned char ch = *i;
        unichar result;
        if(ch>=0x80)
-               result = error("Undefined ISO-646-FI character");
+               result = error(invalid_sequence(i, i+1, "undefined ISO-646-FI character"));
        else
                result = transform_mapping_or_direct(mapping, map_size, ch, true);
 
index 9adc2d9e09bf7423d650b78c66409a0e8c9bbbcf..a21b6f15d6601e04be4cbeb7100218a5308cc90b 100644 (file)
@@ -8,7 +8,7 @@ namespace StringCodec {
 void Iso88591::Encoder::encode_char(unichar ch, string &buf)
 {
        if(ch<0 || ch>0xFF)
-               return error(ch, buf, "Can't express character in ISO-8859-1");
+               return error(ch, buf, invalid_character(ch, "ISO-8859-1"));
 
        buf += ch;
 }
index 8dfc3a1e91a5c178e26461d686ea1bb340f76ce9..eb26264d8dd5bacd7d72bc481ffbad5632dc25c7 100644 (file)
@@ -29,7 +29,7 @@ void Iso885915::Encoder::encode_char(unichar ch, string &buf)
 {
        int tch = transform_mapping_or_direct(mapping, map_size, ch, false);
        if(tch<0 || tch>0xFF)
-               error(ch, buf, "Can't express character in ISO-8859-15");
+               error(ch, buf, invalid_character(ch, "ISO-8859-15"));
        else
                buf += tch;
 
index 4d9d6bdf5eaea9c257a13325643c574b1672761d..4c8cd4804901e0332a4b458f51bf2dbc79b27857 100644 (file)
@@ -16,7 +16,7 @@ void JisX0201::Encoder::encode_char(unichar ch, string &buf)
        else if(ch>=0xFF61 && ch<=0xFF9F)
                buf += ch-0xFEC0;
        else
-               error(ch, buf, "Can't express character in JIS X 0201");
+               error(ch, buf, invalid_character(ch, "JIS X 0201"));
 }
 
 void JisX0201::Encoder::transliterate(unichar, string &buf)
@@ -41,7 +41,7 @@ unichar JisX0201::Decoder::decode_char(const string &str, string::const_iterator
        else if(ch>=0xA1 && ch<=0xDF)
                result = ch+0xFEC0;
        else
-               result = error("Undefined JIS X 0201 character");
+               result = error(invalid_sequence(i, i+1, "undefined JIS X 0201 character"));
 
        ++i;
        return result;
index 411ae2b6477c945e5d5a95da14837e42863d2591..d56ec33d22a2da43bc6ff8b8bc1d6af30e549706 100644 (file)
@@ -16,7 +16,7 @@ void JisX0208::Encoder::encode_char(unichar ucs, string &buf)
                buf.append(jbuf, 2);
        }
        else
-               error(ucs, buf, "Can't express character in JIS X 0208");
+               error(ucs, buf, invalid_character(ucs, "JIS X 0208"));
 }
 
 void JisX0208::Encoder::transliterate(unichar, string &buf)
@@ -36,13 +36,15 @@ unichar JisX0208::Decoder::decode_char(const string &str, string::const_iterator
 
        unichar result;
        if(j==str.end())
-               result = error("Incomplete JIS X 0208 character");
+               result = error(invalid_sequence(i, j, "incomplete JIS X 0208 character"));
        else
        {
                jis.ten = *j++-0x20;
                result = jisx0208_to_ucs(jis);
+               if(result==-1)
+                       result = error(invalid_sequence(i, j, "invalid JIS X 0208 ku-ten"));
                if(result==0)
-                       result = error("Undefined JIS X 0208 character");
+                       result = error(invalid_sequence(i, j, "undefined JIS X 0208 character"));
        }
 
        i = j;
@@ -53,7 +55,7 @@ unichar JisX0208::Decoder::decode_char(const string &str, string::const_iterator
 unichar jisx0208_to_ucs(Kuten jis)
 {
        if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E)
-               return 0;
+               return -1;
 
        return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95];
 }
index 4c75d8b073e277c2adc2f38db5f3db806da7b0e5..c6d1990d0b00e6ae36870e8b0af7978f09cdb9fa 100644 (file)
@@ -8,7 +8,7 @@ namespace StringCodec {
 void Utf8::Encoder::encode_char(unichar ch, string &buf)
 {
        if(ch<0 || ch>0x10FFFF)
-               return error(ch, buf, "Can't express character in UTF-8");
+               return error(ch, buf, invalid_character(ch, "UTF-8"));
 
        unsigned bytes = 1;
        if(ch>0xFFFF)
@@ -48,7 +48,7 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
 
        if((*i&0xC0)==0x80)
        {
-               unichar result = error("UTF-8 tail byte found when expecting head");
+               unichar result = error(invalid_sequence(i, i+1, "stray UTF-8 head byte"));
                ++i;
                return result;
        }
@@ -68,11 +68,11 @@ unichar Utf8::Decoder::decode_char(const string &str, string::const_iterator &i)
                        result = (result<<6) | ((*j++)&0x3F);
 
                if(k<bytes)
-                       result = error("Incomplete UTF-8 character");
+                       result = error(invalid_sequence(i, j, "incomplete UTF-8 character"));
                else if(!(result>>(bytes*5-4)) || !(result>>7))
-                       result = error("Denormalized UTF-8 multibyte sequence");
+                       result = error(invalid_sequence(i, j, "denormalized UTF-8 sequence"));
                else if(!is_valid_unichar(result))
-                       result = error("Invalid Unicode code point");
+                       result = error(invalid_sequence(i, j, "undefined UTF-8 character"));
 
                i = j;
                return result;
index 72eb104115db192cce202802d479a4d0e592beff..a6bde2407bad5cdcb5fa3a2cb458b911b0ebd9b5 100644 (file)
@@ -31,7 +31,7 @@ void Windows1252::Encoder::encode_char(unichar ch, string &buf)
                                return;
                        }
 
-               error(ch, buf, "Can't express character in Windows-1252");
+               error(ch, buf, invalid_character(ch, "Windows-1252"));
        }
 }
 
@@ -52,7 +52,7 @@ unichar Windows1252::Decoder::decode_char(const string &str, string::const_itera
        {
                result = table[ch-0x80];
                if(result==0)
-                       result = error("Undefined Windows-1252 character");
+                       result = error(invalid_sequence(i, i+1, "undefined Windows-1252 character"));
        }
        else
                result = ch;