Merge branch 'master' of git://factorcode.org/git/factor into experimental
						commit
						a2d42241d5
					
				|  | @ -0,0 +1 @@ | ||||||
|  | Yun, Jonghyouk | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,46 @@ | ||||||
|  | ! Copyright (C) 2009 Yun, Jonghyouk. | ||||||
|  | ! See http://factorcode.org/license.txt for BSD license. | ||||||
|  | USING: arrays byte-arrays io io.encodings io.encodings.korean | ||||||
|  | io.encodings.korean.private io.encodings.string io.streams.string | ||||||
|  | kernel locals multiline namespaces sequences strings tools.test ; | ||||||
|  | IN: io.encodings.korean.tests | ||||||
|  | 
 | ||||||
|  | ! convert cp949 <> unicode | ||||||
|  | 
 | ||||||
|  | [ f ] [ HEX: 80 cp949>unicode ] unit-test | ||||||
|  | [ f ] [ HEX: ff cp949>unicode ] unit-test | ||||||
|  | [ HEX: ac02 ] [ HEX: 8141 cp949>unicode ] unit-test | ||||||
|  | [ HEX: 7f ] [ HEX: 7f cp949>unicode ] unit-test | ||||||
|  | [ HEX: c724 ] [ HEX: c0b1 cp949>unicode ] unit-test | ||||||
|  | 
 | ||||||
|  | [ HEX: 8141 ] [ HEX: ac02 unicode>cp949 ] unit-test | ||||||
|  | [ HEX: 7f ] [ HEX: 7f unicode>cp949 ] unit-test | ||||||
|  | [ HEX: c0b1 ] [ HEX: c724 unicode>cp949 ] unit-test | ||||||
|  | 
 | ||||||
|  | : phrase-unicode ( -- s ) | ||||||
|  |     "\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ; | ||||||
|  | 
 | ||||||
|  | : phrase-cp949 ( -- s ) | ||||||
|  |     { | ||||||
|  |         HEX: b5 HEX: bf HEX: c7 HEX: d8 | ||||||
|  |         HEX: b9 HEX: b0 HEX: b0 HEX: fa | ||||||
|  |         HEX: 20 HEX: b9 HEX: e9 HEX: b5 | ||||||
|  |         HEX: ce HEX: bb HEX: ea HEX: c0 | ||||||
|  |         HEX: cc HEX: 21 | ||||||
|  |     } ; | ||||||
|  | 
 | ||||||
|  | : phrase-unicode>cp949 ( -- s ) | ||||||
|  |     phrase-unicode cp949 encode ; | ||||||
|  | 
 | ||||||
|  | : phrase-cp949>unicode ( -- s ) | ||||||
|  |     phrase-cp949 cp949 decode ; | ||||||
|  | 
 | ||||||
|  | [ t ] [ phrase-unicode>cp949 >array phrase-cp949 = ] unit-test | ||||||
|  | 
 | ||||||
|  | [ t ]  [ phrase-cp949>unicode phrase-unicode = ] unit-test | ||||||
|  | 
 | ||||||
|  | [ t ] [ phrase-cp949 1 head* cp949 decode phrase-unicode 1 head* = ] unit-test | ||||||
|  | 
 | ||||||
|  | [ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test | ||||||
|  | 
 | ||||||
|  | [ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test | ||||||
|  | @ -0,0 +1,79 @@ | ||||||
|  | ! Copyright (C) 2009 Yun, Jonghyouk. | ||||||
|  | ! See http://factorcode.org/license.txt for BSD license. | ||||||
|  | USING: assocs byte-arrays combinators io io.encodings | ||||||
|  | io.encodings.ascii io.encodings.iana io.files kernel locals math | ||||||
|  | math.order math.parser memoize multiline sequences splitting | ||||||
|  | values hashtables io.binary ; | ||||||
|  | IN: io.encodings.korean | ||||||
|  | 
 | ||||||
|  | SINGLETON: cp949 | ||||||
|  | 
 | ||||||
|  | cp949 "EUC-KR" register-encoding | ||||||
|  | 
 | ||||||
|  | <PRIVATE | ||||||
|  | 
 | ||||||
|  | ! parse cp949.txt > table | ||||||
|  | 
 | ||||||
|  | : cp949.txt-lines ( -- seq ) | ||||||
|  |     ! "cp949.txt" from ... | ||||||
|  |     ! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT> | ||||||
|  |     "resource:basis/io/encodings/korean/data/cp949.txt" | ||||||
|  |     ascii file-lines ; | ||||||
|  | 
 | ||||||
|  | : drop-comments ( seq -- newseq ) | ||||||
|  |     [ "#" split1 drop ] map harvest ; | ||||||
|  | 
 | ||||||
|  | : split-column ( line -- columns ) | ||||||
|  |     "\t" split 2 head ; | ||||||
|  | 
 | ||||||
|  | : parse-hex ( s -- n ) | ||||||
|  |     2 short tail hex> ; | ||||||
|  | 
 | ||||||
|  | : parse-line ( line -- code-unicode ) | ||||||
|  |     split-column [ parse-hex ] map ; | ||||||
|  | 
 | ||||||
|  | : process-codetable-lines ( lines -- assoc ) | ||||||
|  |     drop-comments [ parse-line ] map ;  | ||||||
|  | 
 | ||||||
|  | ! convert cp949 <> unicode | ||||||
|  | 
 | ||||||
|  | MEMO: cp949>unicode-table ( -- hashtable ) | ||||||
|  |     cp949.txt-lines process-codetable-lines >hashtable ; | ||||||
|  | 
 | ||||||
|  | MEMO: unicode>cp949-table ( -- hashtable ) | ||||||
|  |     cp949>unicode-table [ swap ] assoc-map ; | ||||||
|  | 
 | ||||||
|  | unicode>cp949-table drop | ||||||
|  | 
 | ||||||
|  | : cp949>unicode ( b -- u ) | ||||||
|  |     cp949>unicode-table at ; | ||||||
|  | 
 | ||||||
|  | : unicode>cp949 ( u -- b ) | ||||||
|  |     unicode>cp949-table at ; | ||||||
|  | 
 | ||||||
|  | : cp949-1st? ( n -- ? ) | ||||||
|  |     dup [ HEX: 81 HEX: fe between? ] when ; | ||||||
|  | 
 | ||||||
|  | : byte? ( n -- ? ) | ||||||
|  |     0 HEX: ff between? ; | ||||||
|  | 
 | ||||||
|  | M:: cp949 encode-char ( char stream encoding -- ) | ||||||
|  |     char unicode>cp949 byte? | ||||||
|  |     [ char 1byte-array stream stream-write ] [ | ||||||
|  |         char unicode>cp949 | ||||||
|  |         h>b/b swap 2byte-array | ||||||
|  |         stream stream-write | ||||||
|  |     ] if ; | ||||||
|  | 
 | ||||||
|  | : decode-char-step2 ( c stream -- char ) | ||||||
|  |     stream-read1 | ||||||
|  |     [ 2byte-array be> cp949>unicode ] | ||||||
|  |     [ drop replacement-char ] if* ; | ||||||
|  | 
 | ||||||
|  | M:: cp949 decode-char ( stream encoding -- char/f ) | ||||||
|  |     stream stream-read1 | ||||||
|  |     { | ||||||
|  |         { [ dup not ] [ drop f ] } | ||||||
|  |         { [ dup cp949-1st? ] [ stream decode-char-step2 ] } | ||||||
|  |         [ ] | ||||||
|  |     } cond ; | ||||||
|  | @ -0,0 +1 @@ | ||||||
|  | Korean text encodings | ||||||
|  | @ -0,0 +1 @@ | ||||||
|  | text | ||||||
		Loading…
	
		Reference in New Issue