factor/basis/io/encodings/iso2022/iso2022.factor

110 lines
3.2 KiB
Factor
Raw Normal View History

2009-03-20 17:15:26 -04:00
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings kernel sequences io simple-flat-file sets math
2011-11-29 00:26:06 -05:00
combinators.short-circuit io.binary arrays assocs namespaces
locals accessors combinators biassocs byte-arrays parser literals ;
2009-03-20 17:15:26 -04:00
IN: io.encodings.iso2022
SINGLETON: iso2022
<PRIVATE
2011-11-29 00:26:06 -05:00
SYMBOL: jis201
SYMBOL: jis208
SYMBOL: jis212
2009-03-20 17:15:26 -04:00
2011-11-29 00:26:06 -05:00
"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc jis201 set-global
"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc jis208 set-global
"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc jis212 set-global
2009-03-20 17:15:26 -04:00
2011-11-29 00:26:06 -05:00
SYMBOL: ascii
128 iota unique >biassoc ascii set-global
2009-03-20 17:15:26 -04:00
TUPLE: iso2022-state type ;
: make-iso-coder ( encoding -- state )
2011-11-29 00:26:06 -05:00
drop ascii get-global iso2022-state boa ;
2009-03-20 17:15:26 -04:00
M: iso2022 <encoder>
make-iso-coder <encoder> ;
M: iso2022 <decoder>
make-iso-coder <decoder> ;
CONSTANT: ESC 0x16
2009-03-20 17:15:26 -04:00
CONSTANT: switch-ascii B{ $ ESC CHAR: ( CHAR: B }
CONSTANT: switch-jis201 B{ $ ESC CHAR: ( CHAR: J }
CONSTANT: switch-jis208 B{ $ ESC CHAR: $ CHAR: B }
CONSTANT: switch-jis212 B{ $ ESC CHAR: $ CHAR: ( CHAR: D }
2009-03-20 17:15:26 -04:00
: find-type ( char -- code type )
{
2011-11-29 00:26:06 -05:00
{ [ dup ascii get-global value? ] [ drop switch-ascii ascii get-global ] }
{ [ dup jis201 get-global value? ] [ drop switch-jis201 jis201 get-global ] }
{ [ dup jis208 get-global value? ] [ drop switch-jis208 jis208 get-global ] }
{ [ dup jis212 get-global value? ] [ drop switch-jis212 jis212 get-global ] }
[ throw-encode-error ]
2009-03-20 17:15:26 -04:00
} cond ;
: stream-write-num ( num stream -- )
over 256 >=
[ [ h>b/b swap 2byte-array ] dip stream-write ]
[ stream-write1 ] if ;
M:: iso2022-state encode-char ( char stream encoding -- )
char encoding type>> value? [
char find-type
[ stream stream-write ]
[ encoding type<< ] bi*
2009-03-20 17:15:26 -04:00
] unless
char encoding type>> value-at stream stream-write-num ;
: read-escape ( stream -- type/f )
dup stream-read1 {
{ CHAR: ( [
stream-read1 {
2011-11-29 00:26:06 -05:00
{ CHAR: B [ ascii get-global ] }
{ CHAR: J [ jis201 get-global ] }
2009-03-20 17:15:26 -04:00
[ drop f ]
} case
] }
{ CHAR: $ [
dup stream-read1 {
{ CHAR: @ [ drop jis208 get-global ] } ! want: JIS X 0208-1978
2011-11-29 00:26:06 -05:00
{ CHAR: B [ drop jis208 get-global ] }
2009-03-20 17:15:26 -04:00
{ CHAR: ( [
2011-11-29 00:26:06 -05:00
stream-read1 CHAR: D = jis212 get-global f ?
2009-03-20 17:15:26 -04:00
] }
[ 2drop f ]
} case
] }
[ 2drop f ]
} case ;
: double-width? ( type -- ? )
2011-11-29 00:26:06 -05:00
{ [ jis208 get-global eq? ] [ jis212 get-global eq? ] } 1|| ;
2009-03-20 17:15:26 -04:00
: finish-decode ( num encoding -- char )
type>> at replacement-char or ;
M:: iso2022-state decode-char ( stream encoding -- char )
stream stream-read1 {
{ $ ESC [
2009-03-20 17:15:26 -04:00
stream read-escape [
encoding type<<
2009-03-20 17:15:26 -04:00
stream encoding decode-char
] [ replacement-char ] if*
] }
{ f [ f ] }
[
encoding type>> double-width? [
stream stream-read1
[ 2byte-array be> encoding finish-decode ]
[ drop replacement-char ] if*
] [ encoding finish-decode ] if
]
} case ;
2013-03-10 21:45:17 -04:00
PRIVATE>