]> git.tdb.fi Git - libs/core.git/blobdiff - source/iso2022jp.cpp
Rework the codec API completely to remove the internal buffering
[libs/core.git] / source / iso2022jp.cpp
index de56552ddac8b4515e9b184b37a48ff9a0643038..4d3a1dffd97df07171b61e0dce312cb54037124b 100644 (file)
@@ -4,7 +4,6 @@ This file is part of libmspstrings
 Copyright © 2006-2007 Mikko Rasa
 Distributed under the LGPL
 */
-
 #include "ascii.h"
 #include "iso2022jp.h"
 #include "jisx0201.h"
@@ -13,137 +12,139 @@ Distributed under the LGPL
 using namespace std;
 
 namespace Msp {
+namespace Codecs {
 
-void Iso2022Jp::Encoder::encode_char(wchar_t c_)
+void Iso2022Jp::Encoder::encode_char(UnicodeChar ch, string &buf)
 {
-       // Win32 has typedef unsigned short wchar_t
-       int c=c_;
-
-       if(c>=0 && c<=0x7F && c!=0x5C && c!=0x7E)
+       if(ch>=0 && ch<=0x7F && ch!=0x5C && ch!=0x7E)
        {
                if(mode!=ASCII && mode!=JISX0201)
-                       switch_mode(ASCII);
-               append(c);
+                       switch_mode(ASCII, buf);
+               buf+=ch;
        }
-       else if(c==0x5C || c==0x7E)
+       else if(ch==0x5C || ch==0x7E)
        {
                if(mode!=ASCII)
-                       switch_mode(ASCII);
-               append(c);
+                       switch_mode(ASCII, buf);
+               buf+=ch;
        }
-       else if(c==0xA5 || c==0x203E)
+       else if(ch==0xA5 || ch==0x203E)
        {
                if(mode!=JISX0201)
-                       switch_mode(JISX0201);
-               if(c==0xA5)
-                       append(0x5C);
-               else if(c==0x203E)
-                       append(0x7E);
+                       switch_mode(JISX0201, buf);
+               if(ch==0xA5)
+                       buf+=0x5C;
+               else if(ch==0x203E)
+                       buf+=0x7E;
        }
        else
        {
-               unsigned short jis=ucs_to_jisx0208(c);
+               Kuten jis=ucs_to_jisx0208(ch);
                if(!jis)
-                       error("Can't express character in ISO-2022-JP");
-               else
-               {
-                       if(mode!=JISX0208)
-                               switch_mode(JISX0208);
+                       return error(ch, buf, "Can't express character in ISO-2022-JP");
 
-                       char buf[2]={jis>>8, jis};
-                       append(buf, 2);
-               }
+               if(mode!=JISX0208)
+                       switch_mode(JISX0208, buf);
+
+               char jbuf[2]={jis.ku+0x20, jis.ten+0x20};
+               buf.append(jbuf, 2);
        }
 }
 
-void Iso2022Jp::Encoder::sync()
+void Iso2022Jp::Encoder::sync(string &buf)
 {
        if(mode!=ASCII)
-               switch_mode(ASCII);
+               switch_mode(ASCII, buf);
+}
+
+void Iso2022Jp::Encoder::reset()
+{
+       mode=ASCII;
 }
 
-void Iso2022Jp::Encoder::switch_mode(Mode m)
+void Iso2022Jp::Encoder::switch_mode(Mode m, string &buf)
 {
        mode=m;
        switch(mode)
        {
-       case ASCII:    append("\033(B", 3); break;
-       case JISX0201: append("\033(J", 3); break;
-       case JISX0208: append("\033$B", 3); break;
+       case ASCII:    buf.append("\033(B", 3); break;
+       case JISX0201: buf.append("\033(J", 3); break;
+       case JISX0208: buf.append("\033$B", 3); break;
+       default: throw CodecError("WTF?  Invalid mode in Iso2022Jp::Encoder::switch_mode");
        }
 }
 
-void Iso2022Jp::Encoder::append_replacement()
+void Iso2022Jp::Encoder::transliterate(UnicodeChar, string &buf)
 {
        if(mode!=ASCII)
-               switch_mode(ASCII);
-       append(032);
+               switch_mode(ASCII, buf);
+       buf+='?';
 }
 
+
 Iso2022Jp::Decoder::Decoder(ErrorMode em):
-       StringCodec::Decoder(em),
+       Codec::Decoder(em),
        mode(ASCII),
-       dec(new Ascii::Decoder),
-       escape(0)
+       dec(new Ascii::Decoder)
 { }
 
-void Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
+UnicodeChar Iso2022Jp::Decoder::decode_char(const string &str, string::const_iterator &i)
 {
+       if(i==str.end())
+               return error("No input");
+
        while(i!=str.end())
        {
-               if(escape)
+               string::const_iterator j=i;
+
+               UnicodeChar result=-1;
+               if(*j==033)
                {
-                       escape=escape<<8 | static_cast<unsigned char>(*i);
-                       if(*i>='@' && *i<='Z')
+                       unsigned escape=0;
+                       for(++j; j!=str.end(); ++j)
                        {
-                               switch(escape)
-                               {
-                               case 0x1B2842: switch_mode(ASCII); break;    // ESC ( B
-                               case 0x1B284A: switch_mode(JISX0201); break; // ESC ( J
-                               case 0x1B2440:                               // ESC $ @
-                               case 0x1B2442: switch_mode(JISX0208); break; // ESC $ B
-                               default: error("Invalid ISO-2022-JP escape sequence");
-                               }
-                               escape=0;
+                               escape=escape<<8 | static_cast<unsigned char>(*j);
+                               if(*j>='@' && *j<='Z')
+                                       break;
                        }
+
+                       bool ok=true;
+                       switch(escape)
+                       {
+                       case 0x2842: switch_mode(ASCII); break;    // ESC ( B
+                       case 0x284A: switch_mode(JISX0201); break; // ESC ( J
+                       case 0x2440:                               // ESC $ @
+                       case 0x2442: switch_mode(JISX0208); break; // ESC $ B
+                       default: ok=false;
+                       }
+
+                       if(ok)
+                               i=j;
+                       else
+                               result=*i;
                        ++i;
                }
-               else if(*i==0x1B)
-               {
-                       escape=0x1B;
-                       ++i;
-               }
+               else if(dec)
+                       return dec->decode_char(str, i);
                else
-               {
-                       dec->decode_char(str, i);
-                       break;
-               }
+                       throw CodecError("WTF?  No sub-decoder for Iso2022Jp::Decoder");
+
+               if(result>=0)
+                       return result;
        }
+
+       return -1;
 }
 
-void Iso2022Jp::Decoder::sync()
+void Iso2022Jp::Decoder::reset()
 {
-       if(escape)
-       {
-               error("Sync in middle of ISO-2022-JP escape sequence");
-               escape=0;
-       }
-       
-       if(mode!=ASCII)
-       {
-               error("Sync while not in ASCII mode");
-               switch_mode(ASCII);
-       }
-       else
-       {
-               append(dec->get());
-               dec->flush();
-       }
+       delete dec;
+       mode=ASCII;
+       dec=new Ascii::Decoder;
 }
 
 void Iso2022Jp::Decoder::switch_mode(Mode m)
 {
-       append(dec->get());
        delete dec;
 
        mode=m;
@@ -155,4 +156,5 @@ void Iso2022Jp::Decoder::switch_mode(Mode m)
        }
 }
 
+} // namespace Codecs
 } // namespace Msp