X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fiso2022jp.cpp;h=d9c7ceac147b0e08ccd3e8c3c684dce6cb947722;hp=6a6bd1dc7970b28c367fbfad07d5719c505bc0ad;hb=5b1368cb791cab043f0435628cacbaff36e39b7b;hpb=5ea1720bc90416df7ac5e28b145e9ebf7f76b7a2 diff --git a/source/iso2022jp.cpp b/source/iso2022jp.cpp index 6a6bd1d..d9c7cea 100644 --- a/source/iso2022jp.cpp +++ b/source/iso2022jp.cpp @@ -1,3 +1,9 @@ +/* $Id$ + +This file is part of libmspstrings +Copyright © 2006-2007 Mikko Rasa +Distributed under the LGPL +*/ #include "ascii.h" #include "iso2022jp.h" #include "jisx0201.h" @@ -6,122 +12,149 @@ using namespace std; namespace Msp { +namespace Codecs { -void Iso2022Jp::Encoder::encode_char(wchar_t c) +void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf) { - if(c>=0 && c<=0x7F && c!=0x5C && c!=0x7E) + if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E) { if(mode!=ASCII && mode!=JISX0201) - switch_mode(ASCII); - append(c); + switch_mode(ASCII, buf); + buf += ch; } - else if(c==0x5C || c==0x7E) + else if(ch==0x5C || ch==0x7E) { if(mode!=ASCII) - switch_mode(ASCII); - append(c); + switch_mode(ASCII, buf); + buf += ch; } - else if(c==0xA5 || c==0x203E) + else if(ch==0xA5 || ch==0x203E) { if(mode!=JISX0201) - switch_mode(JISX0201); - if(c==0xA5) - append(0x5C); - else if(c==0x203E) - append(0x7E); + switch_mode(JISX0201, buf); + if(ch==0xA5) + buf += 0x5C; + else if(ch==0x203E) + buf += 0x7E; } else { - unsigned short jis=ucs_to_jisx0208(c); + Kuten jis = ucs_to_jisx0208(ch); if(!jis) - throw CodecError("Can't express character in ISO-2022-JP"); + return error(ch, buf, "Can't express character in ISO-2022-JP"); + if(mode!=JISX0208) - switch_mode(JISX0208); + switch_mode(JISX0208, buf); - char buf[2]={jis>>8, jis}; - append(buf, 2); + char jbuf[2] = {jis.ku+0x20, jis.ten+0x20}; + buf.append(jbuf, 2); } } -void Iso2022Jp::Encoder::sync() +void Iso2022Jp::Encoder::sync(string &buf) { if(mode!=ASCII) - switch_mode(ASCII); + switch_mode(ASCII, buf); } -void Iso2022Jp::Encoder::switch_mode(Mode m) +void Iso2022Jp::Encoder::reset() { - mode=m; + mode = ASCII; +} + +void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf) +{ + mode = m; switch(mode) { - case ASCII: append("\033(B", 3); break; - case JISX0201: append("\033(J", 3); break; - case JISX0208: append("\033$B", 3); break; + case ASCII: buf.append("\033(B", 3); break; + case JISX0201: buf.append("\033(J", 3); break; + case JISX0208: buf.append("\033$B", 3); break; + default: throw CodecError("WTF? Invalid mode in Iso2022Jp::Encoder::switch_mode"); } } -Iso2022Jp::Decoder::Decoder(): +void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf) +{ + if(mode!=ASCII) + switch_mode(ASCII, buf); + buf += '?'; +} + + +Iso2022Jp::Decoder::Decoder(ErrorMode em): + Codec::Decoder(em), mode(ASCII), - dec(new Ascii::Decoder), - escape(0) + dec(new Ascii::Decoder) { } -void Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i) +UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i) { + if(i==str.end()) + return error("No input"); + while(i!=str.end()) { - if(escape) + string::const_iterator j = i; + + UnicodeChar result = -1; + if(*j==033) { - escape=escape<<8 | (unsigned char)*i; - if(*i>='@' && *i<='Z') + unsigned escape = 0; + for(++j; j!=str.end(); ++j) { - switch(escape) - { - case 0x1B2842: switch_mode(ASCII); break; // ESC ( B - case 0x1B284A: switch_mode(JISX0201); break; // ESC ( J - case 0x1B2440: // ESC $ @ - case 0x1B2442: switch_mode(JISX0208); break; // ESC $ B - default: throw CodecError("Invalid ISO-2022-JP escape sequence"); - } - escape=0; + escape = escape<<8 | static_cast(*j); + if(*j>='@' && *j<='Z') + break; } + + bool ok = true; + switch(escape) + { + case 0x2842: switch_mode(ASCII); break; // ESC ( B + case 0x284A: switch_mode(JISX0201); break; // ESC ( J + case 0x2440: // ESC $ @ + case 0x2442: switch_mode(JISX0208); break; // ESC $ B + default: ok = false; + } + + if(ok) + i = j; + else + result = *i; ++i; } - else if(*i==0x1B) - { - escape=0x1B; - ++i; - } + else if(dec) + return dec->decode_char(str, i); else - { - dec->decode_char(str, i); - break; - } + throw CodecError("WTF? No sub-decoder for Iso2022Jp::Decoder"); + + if(result>=0) + return result; } + + return -1; } -void Iso2022Jp::Decoder::sync() +void Iso2022Jp::Decoder::reset() { - if(escape) - throw CodecError("Sync in middle of ISO-2022-JP escape sequence"); - if(mode!=ASCII) - throw CodecError("Sync while not in ASCII mode"); - append(dec->get()); - dec->flush(); + delete dec; + mode = ASCII; + dec = new Ascii::Decoder; } void Iso2022Jp::Decoder::switch_mode(Mode m) { - append(dec->get()); delete dec; - mode=m; + mode = m; switch(mode) { - case ASCII: dec=new Ascii::Decoder; break; - case JISX0201: dec=new JisX0201::Decoder; break; - case JISX0208: dec=new JisX0208::Decoder; break; + case ASCII: dec = new Ascii::Decoder; break; + case JISX0201: dec = new JisX0201::Decoder; break; + case JISX0208: dec = new JisX0208::Decoder; break; } } +} // namespace Codecs } // namespace Msp