#include "iso885915.h"
#include "jisx0201.h"
#include "jisx0208.h"
+#include "utf16.h"
#include "utf8.h"
#include "windows1252.h"
bool Codec::detect(const string &str) const
{
- Decoder *dec = create_decoder();
+ Decoder *dec = create_decoder(IGNORE_ERRORS);
+
bool result = true;
- try
- {
- for(string::const_iterator i=str.begin(); i!=str.end(); )
- dec->decode_char(str, i);
- }
- catch(const CodecError &)
- {
- result = false;
- }
+ for(string::const_iterator i=str.begin(); (result && i!=str.end()); )
+ result = (dec->decode_char(str, i)!=-1);
delete dec;
return buf;
}
-void Codec::Encoder::error(unichar ch, string &buf, const string &msg)
-{
- switch(err_mode)
- {
- case TRANSLITERATE:
- transliterate(ch, buf);
- case IGNORE_ERRORS:
- break;
- default:
- throw CodecError(msg);
- }
-}
void Codec::Decoder::decode(const string &str, ustring &buf)
return buf;
}
-unichar Codec::Decoder::error(const string &msg)
-{
- switch(err_mode)
- {
- case TRANSLITERATE:
- return 0xFFFD;
- case IGNORE_ERRORS:
- return -1;
- default:
- throw CodecError(msg);
- }
-}
-
Codec *create_codec(const string &n)
{
string name;
- for(string::const_iterator i=n.begin(); i!=n.end(); ++i)
+ string::const_iterator i;
+ for(i=n.begin(); i!=n.end(); ++i)
{
- if(isupper(*i))
+ if(*i==':')
+ break;
+ else if(isupper(*i))
name += tolower(*i);
else if(islower(*i) || isdigit(*i))
name += *i;
}
- if(name=="ascii") return new Ascii;
- if(name=="iso2022jp") return new Iso2022Jp;
- if(name=="iso646fi") return new Iso646Fi;
- if(name=="iso88591" || name=="latin1") return new Iso88591;
- if(name=="iso885915" || name=="latin9") return new Iso885915;
- if(name=="jisx0201") return new JisX0201;
- if(name=="jisx0208") return new JisX0208;
- if(name=="utf8") return new Utf8;
- if(name=="windows1252" || name=="cp1252") return new Windows1252;
- throw InvalidParameterValue("Unknown string codec");
+ ErrorMode em = DEFAULT;
+ if(i!=n.end() && *i==':')
+ {
+ string em_str(i+1, n.end());
+ if(em_str=="throw")
+ em = THROW_ON_ERROR;
+ else if(em_str=="ignore")
+ em = IGNORE_ERRORS;
+ else if(em_str=="trans" || em_str=="transliterate")
+ em = TRANSLITERATE;
+ else
+ throw invalid_argument("invalid error mode");
+ }
+
+ if(name=="ascii") return new Ascii(em);
+ if(name=="iso2022jp") return new Iso2022Jp(em);
+ if(name=="iso646fi") return new Iso646Fi(em);
+ if(name=="iso88591" || name=="latin1") return new Iso88591(em);
+ if(name=="iso885915" || name=="latin9") return new Iso885915(em);
+ if(name=="jisx0201") return new JisX0201(em);
+ if(name=="jisx0208") return new JisX0208(em);
+ if(name=="utf8") return new Utf8(em);
+ if(name=="utf16") return new Utf16(em, Utf16::AUTO);
+ if(name=="utf16be") return new Utf16(em, Utf16::BIG);
+ if(name=="utf16le") return new Utf16(em, Utf16::LITTLE);
+ if(name=="windows1252" || name=="cp1252") return new Windows1252(em);
+ throw invalid_argument("unknown string codec");
}
Codec *detect_codec(const string &str)