Merge branch 'master' of git://factorcode.org/git/factor
commit
c5e1a26ac5
|
@ -0,0 +1 @@
|
|||
Yun, Jonghyouk
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,46 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays byte-arrays io io.encodings io.encodings.korean
|
||||
io.encodings.korean.private io.encodings.string io.streams.string
|
||||
kernel locals multiline namespaces sequences strings tools.test ;
|
||||
IN: io.encodings.korean.tests
|
||||
|
||||
! convert cp949 <> unicode
|
||||
|
||||
[ f ] [ HEX: 80 cp949>unicode ] unit-test
|
||||
[ f ] [ HEX: ff cp949>unicode ] unit-test
|
||||
[ HEX: ac02 ] [ HEX: 8141 cp949>unicode ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f cp949>unicode ] unit-test
|
||||
[ HEX: c724 ] [ HEX: c0b1 cp949>unicode ] unit-test
|
||||
|
||||
[ HEX: 8141 ] [ HEX: ac02 unicode>cp949 ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f unicode>cp949 ] unit-test
|
||||
[ HEX: c0b1 ] [ HEX: c724 unicode>cp949 ] unit-test
|
||||
|
||||
: phrase-unicode ( -- s )
|
||||
"\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
|
||||
|
||||
: phrase-cp949 ( -- s )
|
||||
{
|
||||
HEX: b5 HEX: bf HEX: c7 HEX: d8
|
||||
HEX: b9 HEX: b0 HEX: b0 HEX: fa
|
||||
HEX: 20 HEX: b9 HEX: e9 HEX: b5
|
||||
HEX: ce HEX: bb HEX: ea HEX: c0
|
||||
HEX: cc HEX: 21
|
||||
} ;
|
||||
|
||||
: phrase-unicode>cp949 ( -- s )
|
||||
phrase-unicode cp949 encode ;
|
||||
|
||||
: phrase-cp949>unicode ( -- s )
|
||||
phrase-cp949 cp949 decode ;
|
||||
|
||||
[ t ] [ phrase-unicode>cp949 >array phrase-cp949 = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949>unicode phrase-unicode = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 1 head* cp949 decode phrase-unicode 1 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
|
|
@ -0,0 +1,79 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs byte-arrays combinators io io.encodings
|
||||
io.encodings.ascii io.encodings.iana io.files kernel locals math
|
||||
math.order math.parser memoize multiline sequences splitting
|
||||
values hashtables io.binary ;
|
||||
IN: io.encodings.korean
|
||||
|
||||
SINGLETON: cp949
|
||||
|
||||
cp949 "EUC-KR" register-encoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! parse cp949.txt > table
|
||||
|
||||
: cp949.txt-lines ( -- seq )
|
||||
! "cp949.txt" from ...
|
||||
! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT>
|
||||
"resource:basis/io/encodings/korean/data/cp949.txt"
|
||||
ascii file-lines ;
|
||||
|
||||
: drop-comments ( seq -- newseq )
|
||||
[ "#" split1 drop ] map harvest ;
|
||||
|
||||
: split-column ( line -- columns )
|
||||
"\t" split 2 head ;
|
||||
|
||||
: parse-hex ( s -- n )
|
||||
2 short tail hex> ;
|
||||
|
||||
: parse-line ( line -- code-unicode )
|
||||
split-column [ parse-hex ] map ;
|
||||
|
||||
: process-codetable-lines ( lines -- assoc )
|
||||
drop-comments [ parse-line ] map ;
|
||||
|
||||
! convert cp949 <> unicode
|
||||
|
||||
MEMO: cp949>unicode-table ( -- hashtable )
|
||||
cp949.txt-lines process-codetable-lines >hashtable ;
|
||||
|
||||
MEMO: unicode>cp949-table ( -- hashtable )
|
||||
cp949>unicode-table [ swap ] assoc-map ;
|
||||
|
||||
unicode>cp949-table drop
|
||||
|
||||
: cp949>unicode ( b -- u )
|
||||
cp949>unicode-table at ;
|
||||
|
||||
: unicode>cp949 ( u -- b )
|
||||
unicode>cp949-table at ;
|
||||
|
||||
: cp949-1st? ( n -- ? )
|
||||
dup [ HEX: 81 HEX: fe between? ] when ;
|
||||
|
||||
: byte? ( n -- ? )
|
||||
0 HEX: ff between? ;
|
||||
|
||||
M:: cp949 encode-char ( char stream encoding -- )
|
||||
char unicode>cp949 byte?
|
||||
[ char 1byte-array stream stream-write ] [
|
||||
char unicode>cp949
|
||||
h>b/b swap 2byte-array
|
||||
stream stream-write
|
||||
] if ;
|
||||
|
||||
: decode-char-step2 ( c stream -- char )
|
||||
stream-read1
|
||||
[ 2byte-array be> cp949>unicode ]
|
||||
[ drop replacement-char ] if* ;
|
||||
|
||||
M:: cp949 decode-char ( stream encoding -- char/f )
|
||||
stream stream-read1
|
||||
{
|
||||
{ [ dup not ] [ drop f ] }
|
||||
{ [ dup cp949-1st? ] [ stream decode-char-step2 ] }
|
||||
[ ]
|
||||
} cond ;
|
|
@ -0,0 +1 @@
|
|||
Korean text encodings
|
|
@ -0,0 +1 @@
|
|||
text
|
Loading…
Reference in New Issue