Unicode upgrade and fix
							parent
							
								
									e293856072
								
							
						
					
					
						commit
						6ff319cfcb
					
				| 
						 | 
					@ -26,6 +26,8 @@ SYMBOL: begin
 | 
				
			||||||
: start-decoding ( seq length -- buf ch state seq )
 | 
					: start-decoding ( seq length -- buf ch state seq )
 | 
				
			||||||
    <sbuf> 0 begin roll ;
 | 
					    <sbuf> 0 begin roll ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GENERIC: decode-step ( buf byte ch state encoding -- buf ch state )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: decode ( seq quot -- string )
 | 
					: decode ( seq quot -- string )
 | 
				
			||||||
    >r dup length start-decoding r>
 | 
					    >r dup length start-decoding r>
 | 
				
			||||||
    [ -rot ] swap compose each
 | 
					    [ -rot ] swap compose each
 | 
				
			||||||
| 
						 | 
					@ -39,26 +41,54 @@ SYMBOL: begin
 | 
				
			||||||
: end-read-loop ( buf ch state stream quot -- string/f )
 | 
					: end-read-loop ( buf ch state stream quot -- string/f )
 | 
				
			||||||
    2drop 2drop >string f like ;
 | 
					    2drop 2drop >string f like ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: under ( a b c -- c a b c )
 | 
					: decode-read-loop ( buf ch state stream encoding -- string/f )
 | 
				
			||||||
    tuck >r swapd r> ; inline
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
: decode-read-loop ( buf ch state stream quot -- string/f )
 | 
					 | 
				
			||||||
    >r >r pick r> r> rot full?  [ end-read-loop ] [
 | 
					    >r >r pick r> r> rot full?  [ end-read-loop ] [
 | 
				
			||||||
        over stream-read1 [
 | 
					        over stream-read1 [
 | 
				
			||||||
            -rot tuck >r >r >r -rot r> call r> r> decode-read-loop
 | 
					            -rot tuck >r >r >r -rot r> decode-step r> r> decode-read-loop
 | 
				
			||||||
        ] [ end-read-loop ] if*
 | 
					        ] [ end-read-loop ] if*
 | 
				
			||||||
    ] if ; inline
 | 
					    ] if ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: decode-read ( length stream quot -- string )
 | 
					: decode-read ( length stream encoding -- string )
 | 
				
			||||||
    >r swap start-decoding r>
 | 
					    >r swap start-decoding r>
 | 
				
			||||||
    decode-read-loop ; inline
 | 
					    decode-read-loop ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GENERIC: init-decoding ( stream encoding -- decoded-stream )
 | 
					GENERIC: init-decoding ( stream encoding -- decoded-stream )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: <decoding> ( stream decoding-class -- decoded-stream )
 | 
					: <decoding> ( stream decoding-class -- decoded-stream )
 | 
				
			||||||
    construct-empty init-decoding ;
 | 
					    construct-empty init-decoding <line-reader> ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GENERIC: init-encoding ( stream encoding -- encoded-stream )
 | 
					GENERIC: init-encoding ( stream encoding -- encoded-stream )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: <encoding> ( stream encoding-class -- encoded-stream )
 | 
					: <encoding> ( stream encoding-class -- encoded-stream )
 | 
				
			||||||
    construct-empty init-encoding ;
 | 
					    construct-empty init-encoding <plain-writer> ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GENERIC: encode-string ( string encoding -- byte-array )
 | 
				
			||||||
 | 
					M: tuple-class encode-string construct-empty encode-string ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MIXIN: encoding-stream
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream init-decoding ( stream encoding-stream -- encoding-stream )
 | 
				
			||||||
 | 
					    tuck set-delegate ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream init-encoding ( stream encoding-stream -- encoding-stream )
 | 
				
			||||||
 | 
					    tuck set-delegate ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-read1 1 swap stream-read ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-read
 | 
				
			||||||
 | 
					    [ delegate ] keep decode-read ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-read-partial stream-read ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-read-until
 | 
				
			||||||
 | 
					    ! Copied from { c-reader stream-read-until }!!!
 | 
				
			||||||
 | 
					    [ swap read-until-loop ] "" make
 | 
				
			||||||
 | 
					    swap over empty? over not and [ 2drop f f ] when ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-write1
 | 
				
			||||||
 | 
					    >r 1string r> stream-write ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream stream-write
 | 
				
			||||||
 | 
					    [ encode-string ] keep delegate stream-write ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: encoding-stream dispose delegate dispose ;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -115,34 +115,16 @@ SYMBOL: ignore
 | 
				
			||||||
        { [ t ] [ decode-error ] }
 | 
					        { [ t ] [ decode-error ] }
 | 
				
			||||||
    } cond ;
 | 
					    } cond ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
! UTF16LE streams
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
TUPLE: utf16le ;
 | 
					TUPLE: utf16le ;
 | 
				
			||||||
: <utf16le> utf16le construct-delegate ;
 | 
					: <utf16le> utf16le construct-delegate ;
 | 
				
			||||||
! In the future, this should detect and ignore a BOM at the beginning
 | 
					INSTANCE: encoding-stream utf16le
 | 
				
			||||||
 | 
					
 | 
				
			||||||
M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
 | 
					M: utf16le encode-string drop encode-utf16le ;
 | 
				
			||||||
    tuck set-delegate ;
 | 
					M: utf16le decode-step drop decode-utf16le-step ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
 | 
					TUPLE: utf16be ;
 | 
				
			||||||
    tuck set-delegate ;
 | 
					: <utf16be> utf16be construct-delegate ;
 | 
				
			||||||
 | 
					INSTANCE: encoding-stream utf16be
 | 
				
			||||||
 | 
					
 | 
				
			||||||
M: utf16le stream-read1 1 swap stream-read ;
 | 
					M: utf16be encode-string drop encode-utf16be ;
 | 
				
			||||||
 | 
					M: utf16le decode-step drop decode-utf16be-step ;
 | 
				
			||||||
M: utf16le stream-read
 | 
					 | 
				
			||||||
    delegate [ decode-utf16le-step ] decode-read ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf16le stream-read-partial stream-read ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf16le stream-read-until
 | 
					 | 
				
			||||||
    ! Copied from { c-reader stream-read-until }!!!
 | 
					 | 
				
			||||||
    [ swap read-until-loop ] "" make
 | 
					 | 
				
			||||||
    swap over empty? over not and [ 2drop f f ] when ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf16le stream-write1
 | 
					 | 
				
			||||||
    >r 1string r> stream-write ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf16le stream-write
 | 
					 | 
				
			||||||
    >r encode-utf16le r> delegate stream-write ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf16le dispose delegate dispose ;
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -79,30 +79,8 @@ SYMBOL: quad3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TUPLE: utf8 ;
 | 
					TUPLE: utf8 ;
 | 
				
			||||||
: <utf8> utf8 construct-delegate ;
 | 
					: <utf8> utf8 construct-delegate ;
 | 
				
			||||||
 | 
					INSTANCE: encoding-stream utf8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: utf8 encode-string drop encode-utf8 ;
 | 
				
			||||||
 | 
					M: utf8 decode-step drop decode-utf8-step ;
 | 
				
			||||||
! In the future, this should detect and ignore a BOM at the beginning
 | 
					! In the future, this should detect and ignore a BOM at the beginning
 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 init-decoding ( stream utf8 -- utf8-stream )
 | 
					 | 
				
			||||||
    tuck set-delegate ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 init-encoding ( stream utf8 -- utf8-stream )
 | 
					 | 
				
			||||||
    tuck set-delegate ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-read1 1 swap stream-read ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-read
 | 
					 | 
				
			||||||
    delegate [ decode-utf8-step ] decode-read ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-read-partial stream-read ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-read-until
 | 
					 | 
				
			||||||
    ! Copied from { c-reader stream-read-until }!!!
 | 
					 | 
				
			||||||
    [ swap read-until-loop ] "" make
 | 
					 | 
				
			||||||
    swap over empty? over not and [ 2drop f f ] when ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-write1
 | 
					 | 
				
			||||||
    >r 1string r> stream-write ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 stream-write
 | 
					 | 
				
			||||||
    >r encode-utf8 r> delegate stream-write ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
M: utf8 dispose delegate dispose ;
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,8 +1,16 @@
 | 
				
			||||||
USING: assocs math kernel sequences io.files hashtables
 | 
					USING: assocs math kernel sequences io.files hashtables
 | 
				
			||||||
quotations splitting arrays math.parser combinators.lib hash2
 | 
					quotations splitting arrays math.parser combinators.lib hash2
 | 
				
			||||||
byte-arrays words namespaces words compiler.units const ;
 | 
					byte-arrays words namespaces words compiler.units parser ;
 | 
				
			||||||
IN: unicode.data
 | 
					IN: unicode.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<<
 | 
				
			||||||
 | 
					: VALUE:
 | 
				
			||||||
 | 
					    CREATE dup reset-generic { f } clone [ first ] curry define ; parsing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					: set-value ( value word -- )
 | 
				
			||||||
 | 
					    word-def first set-first ;
 | 
				
			||||||
 | 
					>>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
! Convenience functions
 | 
					! Convenience functions
 | 
				
			||||||
: 1+* ( n/f _ -- n+1 )
 | 
					: 1+* ( n/f _ -- n+1 )
 | 
				
			||||||
    drop [ 1+ ] [ 0 ] if* ;
 | 
					    drop [ 1+ ] [ 0 ] if* ;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue