+/* $Id$
+
+This file is part of libmspstrings
+Copyright © 2006-2007 Mikko Rasa
+Distributed under the LGPL
+*/
#include "ascii.h"
#include "iso2022jp.h"
#include "jisx0201.h"
using namespace std;
namespace Msp {
+namespace Codecs {
-void Iso2022Jp::Encoder::encode_char(wchar_t c)
+void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf)
{
- if(c>=0 && c<=0x7F && c!=0x5C && c!=0x7E)
+ if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
{
if(mode!=ASCII && mode!=JISX0201)
- switch_mode(ASCII);
- append(c);
+ switch_mode(ASCII, buf);
+ buf += ch;
}
- else if(c==0x5C || c==0x7E)
+ else if(ch==0x5C || ch==0x7E)
{
if(mode!=ASCII)
- switch_mode(ASCII);
- append(c);
+ switch_mode(ASCII, buf);
+ buf += ch;
}
- else if(c==0xA5 || c==0x203E)
+ else if(ch==0xA5 || ch==0x203E)
{
if(mode!=JISX0201)
- switch_mode(JISX0201);
- if(c==0xA5)
- append(0x5C);
- else if(c==0x203E)
- append(0x7E);
+ switch_mode(JISX0201, buf);
+ if(ch==0xA5)
+ buf += 0x5C;
+ else if(ch==0x203E)
+ buf += 0x7E;
}
else
{
- unsigned short jis=ucs_to_jisx0208(c);
+ Kuten jis = ucs_to_jisx0208(ch);
if(!jis)
- throw CodecError("Can't express character in ISO-2022-JP");
+ return error(ch, buf, "Can't express character in ISO-2022-JP");
+
if(mode!=JISX0208)
- switch_mode(JISX0208);
+ switch_mode(JISX0208, buf);
- char buf[2]={jis>>8, jis};
- append(buf, 2);
+ char jbuf[2] = {jis.ku+0x20, jis.ten+0x20};
+ buf.append(jbuf, 2);
}
}
-void Iso2022Jp::Encoder::sync()
+void Iso2022Jp::Encoder::sync(string &buf)
{
if(mode!=ASCII)
- switch_mode(ASCII);
+ switch_mode(ASCII, buf);
}
-void Iso2022Jp::Encoder::switch_mode(Mode m)
+void Iso2022Jp::Encoder::reset()
{
- mode=m;
+ mode = ASCII;
+}
+
+void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf)
+{
+ mode = m;
switch(mode)
{
- case ASCII: append("\033(B", 3); break;
- case JISX0201: append("\033(J", 3); break;
- case JISX0208: append("\033$B", 3); break;
+ case ASCII: buf.append("\033(B", 3); break;
+ case JISX0201: buf.append("\033(J", 3); break;
+ case JISX0208: buf.append("\033$B", 3); break;
+ default: throw CodecError("WTF? Invalid mode in Iso2022Jp::Encoder::switch_mode");
}
}
-Iso2022Jp::Decoder::Decoder():
+void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf)
+{
+ if(mode!=ASCII)
+ switch_mode(ASCII, buf);
+ buf += '?';
+}
+
+
+Iso2022Jp::Decoder::Decoder(ErrorMode em):
+ Codec::Decoder(em),
mode(ASCII),
- dec(new Ascii::Decoder),
- escape(0)
+ dec(new Ascii::Decoder)
{ }
-void Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
+UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
{
+ if(i==str.end())
+ return error("No input");
+
while(i!=str.end())
{
- if(escape)
+ string::const_iterator j = i;
+
+ UnicodeChar result = -1;
+ if(*j==033)
{
- escape=escape<<8 | static_cast<unsigned char>(*i);
- if(*i>='@' && *i<='Z')
+ unsigned escape = 0;
+ for(++j; j!=str.end(); ++j)
{
- switch(escape)
- {
- case 0x1B2842: switch_mode(ASCII); break; // ESC ( B
- case 0x1B284A: switch_mode(JISX0201); break; // ESC ( J
- case 0x1B2440: // ESC $ @
- case 0x1B2442: switch_mode(JISX0208); break; // ESC $ B
- default: throw CodecError("Invalid ISO-2022-JP escape sequence");
- }
- escape=0;
+ escape = escape<<8 | static_cast<unsigned char>(*j);
+ if(*j>='@' && *j<='Z')
+ break;
}
+
+ bool ok = true;
+ switch(escape)
+ {
+ case 0x2842: switch_mode(ASCII); break; // ESC ( B
+ case 0x284A: switch_mode(JISX0201); break; // ESC ( J
+ case 0x2440: // ESC $ @
+ case 0x2442: switch_mode(JISX0208); break; // ESC $ B
+ default: ok = false;
+ }
+
+ if(ok)
+ i = j;
+ else
+ result = *i;
++i;
}
- else if(*i==0x1B)
- {
- escape=0x1B;
- ++i;
- }
+ else if(dec)
+ return dec->decode_char(str, i);
else
- {
- dec->decode_char(str, i);
- break;
- }
+ throw CodecError("WTF? No sub-decoder for Iso2022Jp::Decoder");
+
+ if(result>=0)
+ return result;
}
+
+ return -1;
}
-void Iso2022Jp::Decoder::sync()
+void Iso2022Jp::Decoder::reset()
{
- if(escape)
- throw CodecError("Sync in middle of ISO-2022-JP escape sequence");
- if(mode!=ASCII)
- throw CodecError("Sync while not in ASCII mode");
- append(dec->get());
- dec->flush();
+ delete dec;
+ mode = ASCII;
+ dec = new Ascii::Decoder;
}
void Iso2022Jp::Decoder::switch_mode(Mode m)
{
- append(dec->get());
delete dec;
- mode=m;
+ mode = m;
switch(mode)
{
- case ASCII: dec=new Ascii::Decoder; break;
- case JISX0201: dec=new JisX0201::Decoder; break;
- case JISX0208: dec=new JisX0208::Decoder; break;
+ case ASCII: dec = new Ascii::Decoder; break;
+ case JISX0201: dec = new JisX0201::Decoder; break;
+ case JISX0208: dec = new JisX0208::Decoder; break;
}
}
+} // namespace Codecs
} // namespace Msp