Unicode upgrade and fix

db4
Daniel Ehrenberg 2008-02-13 19:53:53 -06:00
parent e293856072
commit 6ff319cfcb
4 changed files with 61 additions and 63 deletions
core/io/encodings
extra/unicode/data

View File

@ -26,6 +26,8 @@ SYMBOL: begin
: start-decoding ( seq length -- buf ch state seq )
<sbuf> 0 begin roll ;
GENERIC: decode-step ( buf byte ch state encoding -- buf ch state )
: decode ( seq quot -- string )
>r dup length start-decoding r>
[ -rot ] swap compose each
@ -39,26 +41,54 @@ SYMBOL: begin
: end-read-loop ( buf ch state stream quot -- string/f )
2drop 2drop >string f like ;
: under ( a b c -- c a b c )
tuck >r swapd r> ; inline
: decode-read-loop ( buf ch state stream quot -- string/f )
: decode-read-loop ( buf ch state stream encoding -- string/f )
>r >r pick r> r> rot full? [ end-read-loop ] [
over stream-read1 [
-rot tuck >r >r >r -rot r> call r> r> decode-read-loop
-rot tuck >r >r >r -rot r> decode-step r> r> decode-read-loop
] [ end-read-loop ] if*
] if ; inline
] if ;
: decode-read ( length stream quot -- string )
: decode-read ( length stream encoding -- string )
>r swap start-decoding r>
decode-read-loop ; inline
decode-read-loop ;
GENERIC: init-decoding ( stream encoding -- decoded-stream )
: <decoding> ( stream decoding-class -- decoded-stream )
construct-empty init-decoding ;
construct-empty init-decoding <line-reader> ;
GENERIC: init-encoding ( stream encoding -- encoded-stream )
: <encoding> ( stream encoding-class -- encoded-stream )
construct-empty init-encoding ;
construct-empty init-encoding <plain-writer> ;
GENERIC: encode-string ( string encoding -- byte-array )
M: tuple-class encode-string construct-empty encode-string ;
MIXIN: encoding-stream
M: encoding-stream init-decoding ( stream encoding-stream -- encoding-stream )
tuck set-delegate ;
M: encoding-stream init-encoding ( stream encoding-stream -- encoding-stream )
tuck set-delegate ;
M: encoding-stream stream-read1 1 swap stream-read ;
M: encoding-stream stream-read
[ delegate ] keep decode-read ;
M: encoding-stream stream-read-partial stream-read ;
M: encoding-stream stream-read-until
! Copied from { c-reader stream-read-until }!!!
[ swap read-until-loop ] "" make
swap over empty? over not and [ 2drop f f ] when ;
M: encoding-stream stream-write1
>r 1string r> stream-write ;
M: encoding-stream stream-write
[ encode-string ] keep delegate stream-write ;
M: encoding-stream dispose delegate dispose ;

View File

@ -115,34 +115,16 @@ SYMBOL: ignore
{ [ t ] [ decode-error ] }
} cond ;
! UTF16LE streams
TUPLE: utf16le ;
: <utf16le> utf16le construct-delegate ;
! In the future, this should detect and ignore a BOM at the beginning
INSTANCE: encoding-stream utf16le
M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
tuck set-delegate ;
M: utf16le encode-string drop encode-utf16le ;
M: utf16le decode-step drop decode-utf16le-step ;
M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
tuck set-delegate ;
TUPLE: utf16be ;
: <utf16be> utf16be construct-delegate ;
INSTANCE: encoding-stream utf16be
M: utf16le stream-read1 1 swap stream-read ;
M: utf16le stream-read
delegate [ decode-utf16le-step ] decode-read ;
M: utf16le stream-read-partial stream-read ;
M: utf16le stream-read-until
! Copied from { c-reader stream-read-until }!!!
[ swap read-until-loop ] "" make
swap over empty? over not and [ 2drop f f ] when ;
M: utf16le stream-write1
>r 1string r> stream-write ;
M: utf16le stream-write
>r encode-utf16le r> delegate stream-write ;
M: utf16le dispose delegate dispose ;
M: utf16be encode-string drop encode-utf16be ;
M: utf16le decode-step drop decode-utf16be-step ;

View File

@ -79,30 +79,8 @@ SYMBOL: quad3
TUPLE: utf8 ;
: <utf8> utf8 construct-delegate ;
INSTANCE: encoding-stream utf8
M: utf8 encode-string drop encode-utf8 ;
M: utf8 decode-step drop decode-utf8-step ;
! In the future, this should detect and ignore a BOM at the beginning
M: utf8 init-decoding ( stream utf8 -- utf8-stream )
tuck set-delegate ;
M: utf8 init-encoding ( stream utf8 -- utf8-stream )
tuck set-delegate ;
M: utf8 stream-read1 1 swap stream-read ;
M: utf8 stream-read
delegate [ decode-utf8-step ] decode-read ;
M: utf8 stream-read-partial stream-read ;
M: utf8 stream-read-until
! Copied from { c-reader stream-read-until }!!!
[ swap read-until-loop ] "" make
swap over empty? over not and [ 2drop f f ] when ;
M: utf8 stream-write1
>r 1string r> stream-write ;
M: utf8 stream-write
>r encode-utf8 r> delegate stream-write ;
M: utf8 dispose delegate dispose ;

View File

@ -1,8 +1,16 @@
USING: assocs math kernel sequences io.files hashtables
quotations splitting arrays math.parser combinators.lib hash2
byte-arrays words namespaces words compiler.units const ;
byte-arrays words namespaces words compiler.units parser ;
IN: unicode.data
<<
: VALUE:
CREATE dup reset-generic { f } clone [ first ] curry define ; parsing
: set-value ( value word -- )
word-def first set-first ;
>>
! Convenience functions
: 1+* ( n/f _ -- n+1 )
drop [ 1+ ] [ 0 ] if* ;