]> git.tdb.fi Git - libs/core.git/blobdiff - source/jisx0208.cpp
Further style and comment adjustments
[libs/core.git] / source / jisx0208.cpp
index 1e5aec7db0bbafb63f06c55c961e5b2648acb69f..6b46b1b679b5a94636e807d350510174eb0eb86f 100644 (file)
@@ -12,73 +12,82 @@ Distributed under the LGPL
 using namespace std;
 
 namespace Msp {
+namespace Codecs {
 
-void JisX0208::Encoder::encode_char(wchar_t ucs)
+void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf)
 {
-       unsigned short jis=ucs_to_jisx0208(ucs);
+       unsigned short jis = ucs_to_jisx0208(ucs);
        if(jis)
        {
-               char buf[2]={jis>>8, jis};
-               append(buf, 2);
+               char jbuf[2] = {jis>>8, jis};
+               buf.append(jbuf, 2);
        }
        else
-               error("Can't express character in JIS X 0208");
+               error(ucs, buf, "Can't express character in JIS X 0208");
 }
 
-
-void JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i)
+void JisX0208::Encoder::transliterate(UnicodeChar, string &buf)
 {
-       if(i==str.end())
-               return;
+       buf.append("!)", 2);
+}
 
-       if(!high)
-               high=*i++;
 
+UnicodeChar JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i)
+{
        if(i==str.end())
-               return;
+               return error("No input");
 
-       wchar_t ucs=jisx0208_to_ucs(high<<8 | *i++);
-       high=0;
+       string::const_iterator j = i;
+       Kuten jis;
+       jis.ku = *j++-0x20;
 
-       if(ucs)
-               append(ucs);
+       UnicodeChar result;
+       if(j==str.end())
+               result = error("Incomplete JIS X 0208 character");
        else
-               error("Invalid JIS X 0208 string (undefined character)");
-}
-
-void JisX0208::Decoder::sync()
-{
-       if(high)
        {
-               error("Sync in middle of JIS X 0208 character");
-               high=0;
+               jis.ten = *j++-0x20;
+               result = jisx0208_to_ucs(jis);
+               if(result==0)
+                       result = error("Undefined JIS X 0208 character");
        }
+
+       i = j;
+       return result;
 }
 
-wchar_t jisx0208_to_ucs(unsigned short jis)
+
+UnicodeChar jisx0208_to_ucs(Kuten jis)
 {
-       if((jis&0xFF)<0x21 || (jis&0xFF)>0x7E || (jis&0xFF00)<0x2100 || (jis&0xFF00)>0x7E00)
+       if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E)
                return 0;
 
-       return jisx0208_to_ucs_table[((jis>>8)&0xFF)*94 + (jis&0xFF)-0x21*95];
+       return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95];
 }
 
-unsigned short ucs_to_jisx0208(wchar_t c)
+Kuten ucs_to_jisx0208(UnicodeChar c)
 {
-       if(c&0xFFFF0000) return 0;
+       if(c<0 || c>0xFFFF)
+               return Kuten();
 
-       unsigned i=0;
+       unsigned i = 0;
        for(unsigned bit=0x1000; bit; bit>>=1)
        {
                if(i+bit>=ucs_to_jisx0208_table_size)
                        continue;
                if(ucs_to_jisx0208_table[i+bit].ucs<=static_cast<unsigned short>(c))
-                       i+=bit;
+                       i += bit;
        }
 
+       Kuten result;
        if(ucs_to_jisx0208_table[i].ucs==static_cast<unsigned short>(c))
-               return ucs_to_jisx0208_table[i].jis;
-       return 0;
+       {
+               result.ku = (ucs_to_jisx0208_table[i].jis>>8)+1;
+               result.ten = ucs_to_jisx0208_table[i].jis+1;
+       }
+
+       return result;
 }
 
+} // namespace Codecs
 } // namespace Msp