virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ASCII"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
if(name=="jisx0201") return new JisX0201;
if(name=="jisx0208") return new JisX0208;
if(name=="utf8") return new Utf8;
- if(name=="windows1252") return new Windows1252;
+ if(name=="windows1252" || name=="cp1252") return new Windows1252;
throw InvalidParameterValue("Unknown string codec");
}
+Codec *detect_codec(const string &str)
+{
+ bool is_utf8=true;
+ bool is_ascii=true;
+ bool is_latin1=true;
+ unsigned utf8_mb=0;
+
+ for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
+ {
+ unsigned char c=*i;
+ if(c&0x80)
+ {
+ is_ascii=false;
+ if((c&0xC0)==0x80)
+ {
+ if((c&0xE0)==0x80)
+ is_latin1=false;
+ if(utf8_mb)
+ --utf8_mb;
+ else
+ is_utf8=false;
+ }
+ else if((c&0xC0)==0xC0)
+ {
+ if(utf8_mb)
+ is_utf8=false;
+ else
+ {
+ for(utf8_mb=1; (c>>(6-utf8_mb))&1; ++utf8_mb) ;
+ }
+ }
+ }
+ }
+
+ if(is_ascii)
+ return new Ascii;
+ else if(is_utf8)
+ return new Utf8;
+ else if(is_latin1)
+ return new Iso88591;
+ else
+ return new Windows1252;
+}
+
} // namespace Codecs
} // namespace Msp
virtual ~Codec() { }
+ /**
+ Returns the name of the encoding handled by this codec.
+ */
+ virtual const char *get_name() const =0;
+
/**
Creates an encoder for this codec.
*/
*/
Codec *create_codec(const std::string &);
+/**
+Automatically detects the encoding of a string and creates a codec for it.
+The codec must be deleted when it's no longer needed.
+*/
+Codec *detect_codec(const std::string &);
+
} // namespace Codecs
} // namespace Msp
virtual void switch_mode(Mode);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-2022-JP"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-646-FI"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-8859-1"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "ISO-8859-15"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "JIS X 0201"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "JIS X 0208"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
struct Kuten
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "UTF-8"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs
virtual UnicodeChar decode_char(const std::string &, std::string::const_iterator &);
};
- Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
- Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
+ virtual const char *get_name() const { return "Windows-1252"; }
+
+ virtual Encoder *create_encoder(ErrorMode em=THROW_ON_ERROR) const { return new Encoder(em); }
+ virtual Decoder *create_decoder(ErrorMode em=THROW_ON_ERROR) const { return new Decoder(em); }
};
} // namespace Codecs