X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fiso2022jp.cpp;h=4d3a1dffd97df07171b61e0dce312cb54037124b;hp=de56552ddac8b4515e9b184b37a48ff9a0643038;hb=f47bc86e6ce900c5323e593db003c93110538268;hpb=79d472ad3fde75de2eba2487579b047d35e56978 diff --git a/source/iso2022jp.cpp b/source/iso2022jp.cpp index de56552..4d3a1df 100644 --- a/source/iso2022jp.cpp +++ b/source/iso2022jp.cpp @@ -4,7 +4,6 @@ This file is part of libmspstrings Copyright © 2006-2007 Mikko Rasa Distributed under the LGPL */ - #include "ascii.h" #include "iso2022jp.h" #include "jisx0201.h" @@ -13,137 +12,139 @@ Distributed under the LGPL using namespace std; namespace Msp { +namespace Codecs { -void Iso2022Jp::Encoder::encode_char(wchar_t c_) +void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf) { - // Win32 has typedef unsigned short wchar_t - int c=c_; - - if(c>=0 && c<=0x7F && c!=0x5C && c!=0x7E) + if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E) { if(mode!=ASCII && mode!=JISX0201) - switch_mode(ASCII); - append(c); + switch_mode(ASCII, buf); + buf+=ch; } - else if(c==0x5C || c==0x7E) + else if(ch==0x5C || ch==0x7E) { if(mode!=ASCII) - switch_mode(ASCII); - append(c); + switch_mode(ASCII, buf); + buf+=ch; } - else if(c==0xA5 || c==0x203E) + else if(ch==0xA5 || ch==0x203E) { if(mode!=JISX0201) - switch_mode(JISX0201); - if(c==0xA5) - append(0x5C); - else if(c==0x203E) - append(0x7E); + switch_mode(JISX0201, buf); + if(ch==0xA5) + buf+=0x5C; + else if(ch==0x203E) + buf+=0x7E; } else { - unsigned short jis=ucs_to_jisx0208(c); + Kuten jis=ucs_to_jisx0208(ch); if(!jis) - error("Can't express character in ISO-2022-JP"); - else - { - if(mode!=JISX0208) - switch_mode(JISX0208); + return error(ch, buf, "Can't express character in ISO-2022-JP"); - char buf[2]={jis>>8, jis}; - append(buf, 2); - } + if(mode!=JISX0208) + switch_mode(JISX0208, buf); + + char jbuf[2]={jis.ku+0x20, jis.ten+0x20}; + buf.append(jbuf, 2); } } -void Iso2022Jp::Encoder::sync() +void Iso2022Jp::Encoder::sync(string &buf) { if(mode!=ASCII) - switch_mode(ASCII); + switch_mode(ASCII, buf); +} + +void Iso2022Jp::Encoder::reset() +{ + mode=ASCII; } -void Iso2022Jp::Encoder::switch_mode(Mode m) +void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf) { mode=m; switch(mode) { - case ASCII: append("\033(B", 3); break; - case JISX0201: append("\033(J", 3); break; - case JISX0208: append("\033$B", 3); break; + case ASCII: buf.append("\033(B", 3); break; + case JISX0201: buf.append("\033(J", 3); break; + case JISX0208: buf.append("\033$B", 3); break; + default: throw CodecError("WTF? Invalid mode in Iso2022Jp::Encoder::switch_mode"); } } -void Iso2022Jp::Encoder::append_replacement() +void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf) { if(mode!=ASCII) - switch_mode(ASCII); - append(032); + switch_mode(ASCII, buf); + buf+='?'; } + Iso2022Jp::Decoder::Decoder(ErrorMode em): - StringCodec::Decoder(em), + Codec::Decoder(em), mode(ASCII), - dec(new Ascii::Decoder), - escape(0) + dec(new Ascii::Decoder) { } -void Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i) +UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i) { + if(i==str.end()) + return error("No input"); + while(i!=str.end()) { - if(escape) + string::const_iterator j=i; + + UnicodeChar result=-1; + if(*j==033) { - escape=escape<<8 | static_cast(*i); - if(*i>='@' && *i<='Z') + unsigned escape=0; + for(++j; j!=str.end(); ++j) { - switch(escape) - { - case 0x1B2842: switch_mode(ASCII); break; // ESC ( B - case 0x1B284A: switch_mode(JISX0201); break; // ESC ( J - case 0x1B2440: // ESC $ @ - case 0x1B2442: switch_mode(JISX0208); break; // ESC $ B - default: error("Invalid ISO-2022-JP escape sequence"); - } - escape=0; + escape=escape<<8 | static_cast(*j); + if(*j>='@' && *j<='Z') + break; } + + bool ok=true; + switch(escape) + { + case 0x2842: switch_mode(ASCII); break; // ESC ( B + case 0x284A: switch_mode(JISX0201); break; // ESC ( J + case 0x2440: // ESC $ @ + case 0x2442: switch_mode(JISX0208); break; // ESC $ B + default: ok=false; + } + + if(ok) + i=j; + else + result=*i; ++i; } - else if(*i==0x1B) - { - escape=0x1B; - ++i; - } + else if(dec) + return dec->decode_char(str, i); else - { - dec->decode_char(str, i); - break; - } + throw CodecError("WTF? No sub-decoder for Iso2022Jp::Decoder"); + + if(result>=0) + return result; } + + return -1; } -void Iso2022Jp::Decoder::sync() +void Iso2022Jp::Decoder::reset() { - if(escape) - { - error("Sync in middle of ISO-2022-JP escape sequence"); - escape=0; - } - - if(mode!=ASCII) - { - error("Sync while not in ASCII mode"); - switch_mode(ASCII); - } - else - { - append(dec->get()); - dec->flush(); - } + delete dec; + mode=ASCII; + dec=new Ascii::Decoder; } void Iso2022Jp::Decoder::switch_mode(Mode m) { - append(dec->get()); delete dec; mode=m; @@ -155,4 +156,5 @@ void Iso2022Jp::Decoder::switch_mode(Mode m) } } +} // namespace Codecs } // namespace Msp