X-Git-Url: http://git.tdb.fi/?p=libs%2Fcore.git;a=blobdiff_plain;f=source%2Fjisx0208.cpp;h=6b46b1b679b5a94636e807d350510174eb0eb86f;hp=1e5aec7db0bbafb63f06c55c961e5b2648acb69f;hb=5b1368cb791cab043f0435628cacbaff36e39b7b;hpb=9da6abdcabec59f4845da256a8ad75a810ed1589 diff --git a/source/jisx0208.cpp b/source/jisx0208.cpp index 1e5aec7..6b46b1b 100644 --- a/source/jisx0208.cpp +++ b/source/jisx0208.cpp @@ -12,73 +12,82 @@ Distributed under the LGPL using namespace std; namespace Msp { +namespace Codecs { -void JisX0208::Encoder::encode_char(wchar_t ucs) +void JisX0208::Encoder::encode_char(UnicodeChar ucs, string &buf) { - unsigned short jis=ucs_to_jisx0208(ucs); + unsigned short jis = ucs_to_jisx0208(ucs); if(jis) { - char buf[2]={jis>>8, jis}; - append(buf, 2); + char jbuf[2] = {jis>>8, jis}; + buf.append(jbuf, 2); } else - error("Can't express character in JIS X 0208"); + error(ucs, buf, "Can't express character in JIS X 0208"); } - -void JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i) +void JisX0208::Encoder::transliterate(UnicodeChar, string &buf) { - if(i==str.end()) - return; + buf.append("!)", 2); +} - if(!high) - high=*i++; +UnicodeChar JisX0208::Decoder::decode_char(const string &str, string::const_iterator &i) +{ if(i==str.end()) - return; + return error("No input"); - wchar_t ucs=jisx0208_to_ucs(high<<8 | *i++); - high=0; + string::const_iterator j = i; + Kuten jis; + jis.ku = *j++-0x20; - if(ucs) - append(ucs); + UnicodeChar result; + if(j==str.end()) + result = error("Incomplete JIS X 0208 character"); else - error("Invalid JIS X 0208 string (undefined character)"); -} - -void JisX0208::Decoder::sync() -{ - if(high) { - error("Sync in middle of JIS X 0208 character"); - high=0; + jis.ten = *j++-0x20; + result = jisx0208_to_ucs(jis); + if(result==0) + result = error("Undefined JIS X 0208 character"); } + + i = j; + return result; } -wchar_t jisx0208_to_ucs(unsigned short jis) + +UnicodeChar jisx0208_to_ucs(Kuten jis) { - if((jis&0xFF)<0x21 || (jis&0xFF)>0x7E || (jis&0xFF00)<0x2100 || (jis&0xFF00)>0x7E00) + if(jis.ku==0 || jis.ku>0x5E || jis.ten==0 || jis.ten>0x5E) return 0; - return jisx0208_to_ucs_table[((jis>>8)&0xFF)*94 + (jis&0xFF)-0x21*95]; + return jisx0208_to_ucs_table[jis.ku*94 + jis.ten - 95]; } -unsigned short ucs_to_jisx0208(wchar_t c) +Kuten ucs_to_jisx0208(UnicodeChar c) { - if(c&0xFFFF0000) return 0; + if(c<0 || c>0xFFFF) + return Kuten(); - unsigned i=0; + unsigned i = 0; for(unsigned bit=0x1000; bit; bit>>=1) { if(i+bit>=ucs_to_jisx0208_table_size) continue; if(ucs_to_jisx0208_table[i+bit].ucs<=static_cast(c)) - i+=bit; + i += bit; } + Kuten result; if(ucs_to_jisx0208_table[i].ucs==static_cast(c)) - return ucs_to_jisx0208_table[i].jis; - return 0; + { + result.ku = (ucs_to_jisx0208_table[i].jis>>8)+1; + result.ten = ucs_to_jisx0208_table[i].jis+1; + } + + return result; } +} // namespace Codecs } // namespace Msp