Copyright © 2006-2007 Mikko Rasa
Distributed under the LGPL
*/
-
#include "ascii.h"
#include "iso2022jp.h"
#include "jisx0201.h"
using namespace std;
namespace Msp {
+namespace Codecs {
-void Iso2022Jp::Encoder::encode_char(wchar_t c_)
+void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf)
{
- // Win32 has typedef unsigned short wchar_t
- int c=c_;
-
- if(c>=0 && c<=0x7F && c!=0x5C && c!=0x7E)
+ if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
{
if(mode!=ASCII && mode!=JISX0201)
- switch_mode(ASCII);
- append(c);
+ switch_mode(ASCII, buf);
+ buf+=ch;
}
- else if(c==0x5C || c==0x7E)
+ else if(ch==0x5C || ch==0x7E)
{
if(mode!=ASCII)
- switch_mode(ASCII);
- append(c);
+ switch_mode(ASCII, buf);
+ buf+=ch;
}
- else if(c==0xA5 || c==0x203E)
+ else if(ch==0xA5 || ch==0x203E)
{
if(mode!=JISX0201)
- switch_mode(JISX0201);
- if(c==0xA5)
- append(0x5C);
- else if(c==0x203E)
- append(0x7E);
+ switch_mode(JISX0201, buf);
+ if(ch==0xA5)
+ buf+=0x5C;
+ else if(ch==0x203E)
+ buf+=0x7E;
}
else
{
- unsigned short jis=ucs_to_jisx0208(c);
+ Kuten jis=ucs_to_jisx0208(ch);
if(!jis)
- error("Can't express character in ISO-2022-JP");
- else
- {
- if(mode!=JISX0208)
- switch_mode(JISX0208);
+ return error(ch, buf, "Can't express character in ISO-2022-JP");
- char buf[2]={jis>>8, jis};
- append(buf, 2);
- }
+ if(mode!=JISX0208)
+ switch_mode(JISX0208, buf);
+
+ char jbuf[2]={jis.ku+0x20, jis.ten+0x20};
+ buf.append(jbuf, 2);
}
}
-void Iso2022Jp::Encoder::sync()
+void Iso2022Jp::Encoder::sync(string &buf)
{
if(mode!=ASCII)
- switch_mode(ASCII);
+ switch_mode(ASCII, buf);
+}
+
+void Iso2022Jp::Encoder::reset()
+{
+ mode=ASCII;
}
-void Iso2022Jp::Encoder::switch_mode(Mode m)
+void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf)
{
mode=m;
switch(mode)
{
- case ASCII: append("\033(B", 3); break;
- case JISX0201: append("\033(J", 3); break;
- case JISX0208: append("\033$B", 3); break;
+ case ASCII: buf.append("\033(B", 3); break;
+ case JISX0201: buf.append("\033(J", 3); break;
+ case JISX0208: buf.append("\033$B", 3); break;
+ default: throw CodecError("WTF? Invalid mode in Iso2022Jp::Encoder::switch_mode");
}
}
-void Iso2022Jp::Encoder::append_replacement()
+void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf)
{
if(mode!=ASCII)
- switch_mode(ASCII);
- append(032);
+ switch_mode(ASCII, buf);
+ buf+='?';
}
+
Iso2022Jp::Decoder::Decoder(ErrorMode em):
- StringCodec::Decoder(em),
+ Codec::Decoder(em),
mode(ASCII),
- dec(new Ascii::Decoder),
- escape(0)
+ dec(new Ascii::Decoder)
{ }
-void Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
+UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
{
+ if(i==str.end())
+ return error("No input");
+
while(i!=str.end())
{
- if(escape)
+ string::const_iterator j=i;
+
+ UnicodeChar result=-1;
+ if(*j==033)
{
- escape=escape<<8 | static_cast<unsigned char>(*i);
- if(*i>='@' && *i<='Z')
+ unsigned escape=0;
+ for(++j; j!=str.end(); ++j)
{
- switch(escape)
- {
- case 0x1B2842: switch_mode(ASCII); break; // ESC ( B
- case 0x1B284A: switch_mode(JISX0201); break; // ESC ( J
- case 0x1B2440: // ESC $ @
- case 0x1B2442: switch_mode(JISX0208); break; // ESC $ B
- default: error("Invalid ISO-2022-JP escape sequence");
- }
- escape=0;
+ escape=escape<<8 | static_cast<unsigned char>(*j);
+ if(*j>='@' && *j<='Z')
+ break;
}
+
+ bool ok=true;
+ switch(escape)
+ {
+ case 0x2842: switch_mode(ASCII); break; // ESC ( B
+ case 0x284A: switch_mode(JISX0201); break; // ESC ( J
+ case 0x2440: // ESC $ @
+ case 0x2442: switch_mode(JISX0208); break; // ESC $ B
+ default: ok=false;
+ }
+
+ if(ok)
+ i=j;
+ else
+ result=*i;
++i;
}
- else if(*i==0x1B)
- {
- escape=0x1B;
- ++i;
- }
+ else if(dec)
+ return dec->decode_char(str, i);
else
- {
- dec->decode_char(str, i);
- break;
- }
+ throw CodecError("WTF? No sub-decoder for Iso2022Jp::Decoder");
+
+ if(result>=0)
+ return result;
}
+
+ return -1;
}
-void Iso2022Jp::Decoder::sync()
+void Iso2022Jp::Decoder::reset()
{
- if(escape)
- {
- error("Sync in middle of ISO-2022-JP escape sequence");
- escape=0;
- }
-
- if(mode!=ASCII)
- {
- error("Sync while not in ASCII mode");
- switch_mode(ASCII);
- }
- else
- {
- append(dec->get());
- dec->flush();
- }
+ delete dec;
+ mode=ASCII;
+ dec=new Ascii::Decoder;
}
void Iso2022Jp::Decoder::switch_mode(Mode m)
{
- append(dec->get());
delete dec;
mode=m;
}
}
+} // namespace Codecs
} // namespace Msp