Cleanup of various encodings implementations
parent
7512ec1110
commit
ce83be67a4
|
@ -2,28 +2,24 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences kernel io io.files combinators.short-circuit
|
USING: sequences kernel io io.files combinators.short-circuit
|
||||||
math.order values assocs io.encodings io.binary fry strings
|
math.order values assocs io.encodings io.binary fry strings
|
||||||
math io.encodings.utf8 arrays accessors splitting math.parser ;
|
math io.encodings.ascii arrays accessors splitting math.parser
|
||||||
|
biassocs ;
|
||||||
IN: io.encodings.japanese
|
IN: io.encodings.japanese
|
||||||
|
|
||||||
! The code page used is Microsoft Code Page 932,
|
|
||||||
! which is a set of extensions to JIS X 0208:1997
|
|
||||||
|
|
||||||
VALUE: shift-jis
|
VALUE: shift-jis
|
||||||
|
|
||||||
VALUE: windows-31j
|
VALUE: windows-31j
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
TUPLE: jis jis>ch ch>jis ;
|
TUPLE: jis assoc ;
|
||||||
|
|
||||||
: <jis> ( assoc -- jis )
|
: <jis> ( assoc -- jis )
|
||||||
[ nip ] assoc-filter
|
[ nip ] assoc-filter H{ } assoc-like
|
||||||
[ H{ } assoc-like ]
|
>biassoc jis boa ;
|
||||||
[ [ swap ] H{ } assoc-map-as ] bi
|
|
||||||
jis boa ;
|
|
||||||
|
|
||||||
: ch>jis ( ch tuple -- jis ) ch>jis>> at [ encode-error ] unless* ;
|
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
|
||||||
: jis>ch ( jis tuple -- string ) jis>ch>> at replacement-char or ;
|
: jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
|
||||||
|
|
||||||
: process-jis ( lines -- assoc )
|
: process-jis ( lines -- assoc )
|
||||||
[ "#" split1 drop ] map harvest [
|
[ "#" split1 drop ] map harvest [
|
||||||
|
@ -32,7 +28,7 @@ TUPLE: jis jis>ch ch>jis ;
|
||||||
] map ;
|
] map ;
|
||||||
|
|
||||||
: make-jis ( filename -- jis )
|
: make-jis ( filename -- jis )
|
||||||
utf8 file-lines process-jis <jis> ;
|
ascii file-lines process-jis <jis> ;
|
||||||
|
|
||||||
"resource:basis/io/encodings/japanese/CP932.txt"
|
"resource:basis/io/encodings/japanese/CP932.txt"
|
||||||
make-jis to: windows-31j
|
make-jis to: windows-31j
|
||||||
|
@ -45,7 +41,7 @@ make-jis to: shift-jis
|
||||||
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
|
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
|
||||||
|
|
||||||
: write-halfword ( stream halfword -- )
|
: write-halfword ( stream halfword -- )
|
||||||
h>b/b swap 2array >string swap stream-write ;
|
h>b/b swap B{ } 2sequence swap stream-write ;
|
||||||
|
|
||||||
M: jis encode-char
|
M: jis encode-char
|
||||||
swapd ch>jis
|
swapd ch>jis
|
||||||
|
|
|
@ -101,9 +101,9 @@ M: utf16le encode-char ( char stream encoding -- )
|
||||||
|
|
||||||
! UTF-16
|
! UTF-16
|
||||||
|
|
||||||
: bom-le B{ HEX: ff HEX: fe } ; inline
|
CONSTANT: bom-le B{ HEX: ff HEX: fe }
|
||||||
|
|
||||||
: bom-be B{ HEX: fe HEX: ff } ; inline
|
CONSTANT: bom-be B{ HEX: fe HEX: ff }
|
||||||
|
|
||||||
: bom>le/be ( bom -- le/be )
|
: bom>le/be ( bom -- le/be )
|
||||||
dup bom-le sequence= [ drop utf16le ] [
|
dup bom-le sequence= [ drop utf16le ] [
|
||||||
|
|
|
@ -40,9 +40,9 @@ M: utf32le encode-char
|
||||||
|
|
||||||
! UTF-32
|
! UTF-32
|
||||||
|
|
||||||
: bom-le B{ HEX: ff HEX: fe 0 0 } ; inline
|
CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
|
||||||
|
|
||||||
: bom-be B{ 0 0 HEX: fe HEX: ff } ; inline
|
CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
|
||||||
|
|
||||||
: bom>le/be ( bom -- le/be )
|
: bom>le/be ( bom -- le/be )
|
||||||
dup bom-le sequence= [ drop utf32le ] [
|
dup bom-le sequence= [ drop utf32le ] [
|
||||||
|
|
Loading…
Reference in New Issue