X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fjisx0208.cpp;h=0dd0d211743c5f3680376e069760189e26c2313c;hp=0f0e73d7545afdba78e5ddc68f3ad92686bd28c0;hb=f47bc86e6ce900c5323e593db003c93110538268;hpb=58384e355b4a78730d69243f1092e47591f2f384 diff --git a/source/jisx0208.cpp b/source/jisx0208.cpp index 0f0e73d..0dd0d21 100644 --- a/source/jisx0208.cpp +++ b/source/jisx0208.cpp @@ -1,3 +1,10 @@ +/* $Id$ + +This file is part of libmspstrings +Copyright © 2006-2007 Mikko Rasa +Distributed under the LGPL +*/ + #include "jisx0208.h" #include "jisx0208.table" @@ -5,68 +12,82 @@ using namespace std; namespace Msp { +namespace Codecs { -void JisX0208::Encoder::encode_char(wchar_t ucs) +void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf) { unsigned short jis=ucs_to_jisx0208(ucs); - if(!jis) throw CodecError("Can't express character in JIS X 0208"); - - char buf[2]={jis>>8, jis}; - append(buf, 2); + if(jis) + { + char jbuf[2]={jis>>8, jis}; + buf.append(jbuf, 2); + } + else + error(ucs, buf, "Can't express character in JIS X 0208"); } - -void JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i) +void JisX0208::Encoder::transliterate(UnicodeChar, string &buf) { - if(i==str.end()) - return; + buf.append("!)", 2); +} - if(!high) - high=*i++; +UnicodeChar JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i) +{ if(i==str.end()) - return; + return error("No input"); - wchar_t ucs=jisx0208_to_ucs(high<<8 | *i++); - high=0; + string::const_iterator j=i; + Kuten jis; + jis.ku=*j++-0x20; - if(!ucs) - throw CodecError("Invalid JIS X 0208 string (undefined character)"); - - append(ucs); -} + UnicodeChar result; + if(j==str.end()) + result=error("Incomplete JIS X 0208 character"); + else + { + jis.ten=*j++-0x20; + result=jisx0208_to_ucs(jis); + if(result==0) + result=error("Undefined JIS X 0208 character"); + } -void JisX0208::Decoder::sync() -{ - if(high) - throw CodecError("Sync in middle of JIS X 0208 character"); + i=j; + return result; } -wchar_t jisx0208_to_ucs(unsigned short jis) +UnicodeChar jisx0208_to_ucs(Kuten jis) { - if((jis&0xFF)<0x21 || (jis&0xFF)>0x7E || (jis&0xFF00)<0x2100 || (jis&0xFF00)>0x7E00) + if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E) return 0; - return jisx0208_to_ucs_table[((jis>>8)&0xFF)*94 + (jis&0xFF)-0x21*95]; + return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95]; } -unsigned short ucs_to_jisx0208(wchar_t c) +Kuten ucs_to_jisx0208(UnicodeChar c) { - if(c&0xFFFF0000) return 0; + if(c<0 || c>0xFFFF) + return Kuten(); unsigned i=0; for(unsigned bit=0x1000; bit; bit>>=1) { if(i+bit>=ucs_to_jisx0208_table_size) continue; - if(ucs_to_jisx0208_table[i+bit].ucs<=(unsigned short)c) + if(ucs_to_jisx0208_table[i+bit].ucs<=static_cast(c)) i+=bit; } - if(ucs_to_jisx0208_table[i].ucs==(unsigned short)c) - return ucs_to_jisx0208_table[i].jis; - return 0; + Kuten result; + if(ucs_to_jisx0208_table[i].ucs==static_cast(c)) + { + result.ku=(ucs_to_jisx0208_table[i].jis>>8)+1; + result.ten=ucs_to_jisx0208_table[i].jis+1; + } + + return result; } +} // namespace Codecs } // namespace Msp