io.encodings.korean modified

db4
Yun, Jonghyouk 2009-02-28 01:34:04 +09:00
parent c3be596926
commit e36967d3b8
1 changed files with 7 additions and 36 deletions

View File

@ -2,11 +2,10 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs byte-arrays combinators io io.encodings USING: assocs byte-arrays combinators io io.encodings
io.encodings.ascii io.encodings.iana io.files kernel locals math io.encodings.ascii io.encodings.iana io.files kernel locals math
math.order math.parser memoize multiline sequences splitting math.order math.parser values multiline sequences splitting
values hashtables io.binary ; values hashtables io.binary io.encodings.asian ;
IN: io.encodings.korean IN: io.encodings.korean
! TODO: migrate to common code-table parser (by Dan).
SINGLETON: cp949 SINGLETON: cp949
@ -14,44 +13,16 @@ cp949 "EUC-KR" register-encoding
<PRIVATE <PRIVATE
! parse cp949.txt > table VALUE: cp949-table
: cp949.txt-lines ( -- seq ) "vocab:io/encodings/korean/data/cp949.txt" <code-table>*
! "cp949.txt" from ... to: cp949-table
! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT>
"vocab:io/encodings/korean/data/cp949.txt"
ascii file-lines ;
: drop-comments ( seq -- newseq )
[ "#" split1 drop ] map harvest ;
: split-column ( line -- columns )
"\t" split 2 head ;
: parse-hex ( s -- n )
2 short tail hex> ;
: parse-line ( line -- code-unicode )
split-column [ parse-hex ] map ;
: process-codetable-lines ( lines -- assoc )
drop-comments [ parse-line ] map ;
! convert cp949 <> unicode
MEMO: cp949>unicode-table ( -- hashtable )
cp949.txt-lines process-codetable-lines >hashtable ;
MEMO: unicode>cp949-table ( -- hashtable )
cp949>unicode-table [ swap ] assoc-map ;
unicode>cp949-table drop
: cp949>unicode ( b -- u ) : cp949>unicode ( b -- u )
cp949>unicode-table at ; cp949-table n>u ;
: unicode>cp949 ( u -- b ) : unicode>cp949 ( u -- b )
unicode>cp949-table at ; cp949-table u>n ;
: cp949-1st? ( n -- ? ) : cp949-1st? ( n -- ? )
dup [ HEX: 81 HEX: fe between? ] when ; dup [ HEX: 81 HEX: fe between? ] when ;