Unicode upgrade and fix
parent
e293856072
commit
6ff319cfcb
core/io/encodings
extra/unicode/data
|
@ -26,6 +26,8 @@ SYMBOL: begin
|
|||
: start-decoding ( seq length -- buf ch state seq )
|
||||
<sbuf> 0 begin roll ;
|
||||
|
||||
GENERIC: decode-step ( buf byte ch state encoding -- buf ch state )
|
||||
|
||||
: decode ( seq quot -- string )
|
||||
>r dup length start-decoding r>
|
||||
[ -rot ] swap compose each
|
||||
|
@ -39,26 +41,54 @@ SYMBOL: begin
|
|||
: end-read-loop ( buf ch state stream quot -- string/f )
|
||||
2drop 2drop >string f like ;
|
||||
|
||||
: under ( a b c -- c a b c )
|
||||
tuck >r swapd r> ; inline
|
||||
|
||||
: decode-read-loop ( buf ch state stream quot -- string/f )
|
||||
: decode-read-loop ( buf ch state stream encoding -- string/f )
|
||||
>r >r pick r> r> rot full? [ end-read-loop ] [
|
||||
over stream-read1 [
|
||||
-rot tuck >r >r >r -rot r> call r> r> decode-read-loop
|
||||
-rot tuck >r >r >r -rot r> decode-step r> r> decode-read-loop
|
||||
] [ end-read-loop ] if*
|
||||
] if ; inline
|
||||
] if ;
|
||||
|
||||
: decode-read ( length stream quot -- string )
|
||||
: decode-read ( length stream encoding -- string )
|
||||
>r swap start-decoding r>
|
||||
decode-read-loop ; inline
|
||||
decode-read-loop ;
|
||||
|
||||
GENERIC: init-decoding ( stream encoding -- decoded-stream )
|
||||
|
||||
: <decoding> ( stream decoding-class -- decoded-stream )
|
||||
construct-empty init-decoding ;
|
||||
construct-empty init-decoding <line-reader> ;
|
||||
|
||||
GENERIC: init-encoding ( stream encoding -- encoded-stream )
|
||||
|
||||
: <encoding> ( stream encoding-class -- encoded-stream )
|
||||
construct-empty init-encoding ;
|
||||
construct-empty init-encoding <plain-writer> ;
|
||||
|
||||
GENERIC: encode-string ( string encoding -- byte-array )
|
||||
M: tuple-class encode-string construct-empty encode-string ;
|
||||
|
||||
MIXIN: encoding-stream
|
||||
|
||||
M: encoding-stream init-decoding ( stream encoding-stream -- encoding-stream )
|
||||
tuck set-delegate ;
|
||||
|
||||
M: encoding-stream init-encoding ( stream encoding-stream -- encoding-stream )
|
||||
tuck set-delegate ;
|
||||
|
||||
M: encoding-stream stream-read1 1 swap stream-read ;
|
||||
|
||||
M: encoding-stream stream-read
|
||||
[ delegate ] keep decode-read ;
|
||||
|
||||
M: encoding-stream stream-read-partial stream-read ;
|
||||
|
||||
M: encoding-stream stream-read-until
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
[ swap read-until-loop ] "" make
|
||||
swap over empty? over not and [ 2drop f f ] when ;
|
||||
|
||||
M: encoding-stream stream-write1
|
||||
>r 1string r> stream-write ;
|
||||
|
||||
M: encoding-stream stream-write
|
||||
[ encode-string ] keep delegate stream-write ;
|
||||
|
||||
M: encoding-stream dispose delegate dispose ;
|
||||
|
|
|
@ -115,34 +115,16 @@ SYMBOL: ignore
|
|||
{ [ t ] [ decode-error ] }
|
||||
} cond ;
|
||||
|
||||
! UTF16LE streams
|
||||
|
||||
TUPLE: utf16le ;
|
||||
: <utf16le> utf16le construct-delegate ;
|
||||
! In the future, this should detect and ignore a BOM at the beginning
|
||||
INSTANCE: encoding-stream utf16le
|
||||
|
||||
M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
|
||||
tuck set-delegate ;
|
||||
M: utf16le encode-string drop encode-utf16le ;
|
||||
M: utf16le decode-step drop decode-utf16le-step ;
|
||||
|
||||
M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
|
||||
tuck set-delegate ;
|
||||
TUPLE: utf16be ;
|
||||
: <utf16be> utf16be construct-delegate ;
|
||||
INSTANCE: encoding-stream utf16be
|
||||
|
||||
M: utf16le stream-read1 1 swap stream-read ;
|
||||
|
||||
M: utf16le stream-read
|
||||
delegate [ decode-utf16le-step ] decode-read ;
|
||||
|
||||
M: utf16le stream-read-partial stream-read ;
|
||||
|
||||
M: utf16le stream-read-until
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
[ swap read-until-loop ] "" make
|
||||
swap over empty? over not and [ 2drop f f ] when ;
|
||||
|
||||
M: utf16le stream-write1
|
||||
>r 1string r> stream-write ;
|
||||
|
||||
M: utf16le stream-write
|
||||
>r encode-utf16le r> delegate stream-write ;
|
||||
|
||||
M: utf16le dispose delegate dispose ;
|
||||
M: utf16be encode-string drop encode-utf16be ;
|
||||
M: utf16le decode-step drop decode-utf16be-step ;
|
||||
|
|
|
@ -79,30 +79,8 @@ SYMBOL: quad3
|
|||
|
||||
TUPLE: utf8 ;
|
||||
: <utf8> utf8 construct-delegate ;
|
||||
INSTANCE: encoding-stream utf8
|
||||
|
||||
M: utf8 encode-string drop encode-utf8 ;
|
||||
M: utf8 decode-step drop decode-utf8-step ;
|
||||
! In the future, this should detect and ignore a BOM at the beginning
|
||||
|
||||
M: utf8 init-decoding ( stream utf8 -- utf8-stream )
|
||||
tuck set-delegate ;
|
||||
|
||||
M: utf8 init-encoding ( stream utf8 -- utf8-stream )
|
||||
tuck set-delegate ;
|
||||
|
||||
M: utf8 stream-read1 1 swap stream-read ;
|
||||
|
||||
M: utf8 stream-read
|
||||
delegate [ decode-utf8-step ] decode-read ;
|
||||
|
||||
M: utf8 stream-read-partial stream-read ;
|
||||
|
||||
M: utf8 stream-read-until
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
[ swap read-until-loop ] "" make
|
||||
swap over empty? over not and [ 2drop f f ] when ;
|
||||
|
||||
M: utf8 stream-write1
|
||||
>r 1string r> stream-write ;
|
||||
|
||||
M: utf8 stream-write
|
||||
>r encode-utf8 r> delegate stream-write ;
|
||||
|
||||
M: utf8 dispose delegate dispose ;
|
||||
|
|
|
@ -1,8 +1,16 @@
|
|||
USING: assocs math kernel sequences io.files hashtables
|
||||
quotations splitting arrays math.parser combinators.lib hash2
|
||||
byte-arrays words namespaces words compiler.units const ;
|
||||
byte-arrays words namespaces words compiler.units parser ;
|
||||
IN: unicode.data
|
||||
|
||||
<<
|
||||
: VALUE:
|
||||
CREATE dup reset-generic { f } clone [ first ] curry define ; parsing
|
||||
|
||||
: set-value ( value word -- )
|
||||
word-def first set-first ;
|
||||
>>
|
||||
|
||||
! Convenience functions
|
||||
: 1+* ( n/f _ -- n+1 )
|
||||
drop [ 1+ ] [ 0 ] if* ;
|
||||
|
|
Loading…
Reference in New Issue