2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2009 Daniel Ehrenberg
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								USING: io.encodings kernel sequences io simple-flat-file sets math
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								combinators.short-circuit io.binary arrays assocs namespaces
							 | 
						
					
						
							
								
									
										
										
										
											2015-07-21 19:14:53 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								locals accessors combinators biassocs byte-arrays parser literals ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: io.encodings.iso2022
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SINGLETON: iso2022
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								<PRIVATE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								SYMBOL: jis201
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: jis208
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: jis212
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc jis201 set-global
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc jis208 set-global
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc jis212 set-global
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								SYMBOL: ascii
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								128 iota unique >biassoc ascii set-global
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								TUPLE: iso2022-state type ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: make-iso-coder ( encoding -- state )
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    drop ascii get-global iso2022-state boa ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								M: iso2022 <encoder>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    make-iso-coder <encoder> ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								M: iso2022 <decoder>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    make-iso-coder <decoder> ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-07-21 19:14:53 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								CONSTANT: ESC 0x16
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-07-21 19:14:53 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								CONSTANT: switch-ascii B{ $ ESC CHAR: ( CHAR: B }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CONSTANT: switch-jis201 B{ $ ESC CHAR: ( CHAR: J }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CONSTANT: switch-jis208 B{ $ ESC CHAR: $ CHAR: B }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CONSTANT: switch-jis212 B{ $ ESC CHAR: $ CHAR: ( CHAR: D }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: find-type ( char -- code type )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ dup ascii get-global value? ] [ drop switch-ascii ascii get-global ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup jis201 get-global value? ] [ drop switch-jis201 jis201 get-global ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup jis208 get-global value? ] [ drop switch-jis208 jis208 get-global ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup jis212 get-global value? ] [ drop switch-jis212 jis212 get-global ] }
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-13 19:13:05 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ encode-error ]
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } cond ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: stream-write-num ( num stream -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    over 256 >=
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ h>b/b swap 2byte-array ] dip stream-write ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ stream-write1 ] if ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								M:: iso2022-state encode-char ( char stream encoding -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    char encoding type>> value? [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        char find-type
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ stream stream-write ]
							 | 
						
					
						
							
								
									
										
										
										
											2010-05-05 16:52:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ encoding type<< ] bi*
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] unless
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    char encoding type>> value-at stream stream-write-num ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: read-escape ( stream -- type/f )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup stream-read1 {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: ( [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream-read1 {
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                { CHAR: B [ ascii get-global ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                { CHAR: J [ jis201 get-global ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                [ drop f ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            } case
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: $ [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            dup stream-read1 {
							 | 
						
					
						
							
								
									
										
										
										
											2015-07-18 23:14:22 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                { CHAR: @ [ drop jis208 get-global ] } ! want: JIS X 0208-1978
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                { CHAR: B [ drop jis208 get-global ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                { CHAR: ( [
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    stream-read1 CHAR: D = jis212 get-global f ?
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                [ 2drop f ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            } case
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ 2drop f ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: double-width? ( type -- ? )
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-29 00:26:06 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    { [ jis208 get-global eq? ] [ jis212 get-global eq? ] } 1|| ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: finish-decode ( num encoding -- char )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    type>> at replacement-char or ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								M:: iso2022-state decode-char ( stream encoding -- char )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    stream stream-read1 {
							 | 
						
					
						
							
								
									
										
										
										
											2015-07-21 19:14:53 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { $ ESC [
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream read-escape [
							 | 
						
					
						
							
								
									
										
										
										
											2010-05-05 16:52:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                encoding type<<
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 17:15:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                stream encoding decode-char
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            ] [ replacement-char ] if*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { f [ f ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            encoding type>> double-width? [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                stream stream-read1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                [ 2byte-array be> encoding finish-decode ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                [ drop replacement-char ] if*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            ] [ encoding finish-decode ] if
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							
								
									
										
										
										
											2013-03-10 21:45:17 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								PRIVATE>
							 |