Merge branch 'master' of git://factorcode.org/git/littledan

2008-02-13 17:56:30 -06:00 · 2008-02-13 17:56:30 -06:00 · 205d5ca6cb
parent 5d16e232e8 01e9a5cb1a
commit 205d5ca6cb
31 changed files with 201 additions and 64 deletions
--- a/core/io/encodings/binary/about.txt
+++ b/core/io/encodings/binary/about.txt
@ -0,0 +1 @@
+Dummy encoding for binary I/O
--- a/core/io/encodings/binary/authors.txt
+++ b/core/io/encodings/binary/authors.txt
--- a/core/io/encodings/binary/binary-docs.factor
+++ b/core/io/encodings/binary/binary-docs.factor
@ -0,0 +1,5 @@
+USING: help.syntax help.markup ;
+IN: io.encodings.binary
+
+HELP: binary
+{ $class-description "This is the encoding descriptor for binary I/O." } ;
--- a/core/io/encodings/binary/binary.factor
+++ b/core/io/encodings/binary/binary.factor
@ -0,0 +1,6 @@
+USING: kernel io.encodings ;
+
+TUPLE: binary ;
+
+M: binary init-decoding drop ;
+M: binary init-encoding drop ;
--- a/core/io/encodings/binary/tags.txt
+++ b/core/io/encodings/binary/tags.txt
@ -0,0 +1 @@
+text
--- a/core/io/encodings/encodings.factor
+++ b/core/io/encodings/encodings.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2006, 2007 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel sequences sbufs vectors
-namespaces unicode.syntax ;
+namespaces unicode.syntax growable strings io ;
 IN: io.encodings

 TUPLE: encode-error ;
@ -23,6 +23,42 @@ SYMBOL: begin
 : finish-decoding ( buf ch state -- str )
    begin eq? [ decode-error ] unless drop "" like ;

-: decode ( seq quot -- str )
-    >r [ length <sbuf> 0 begin ] keep r> each
+: start-decoding ( seq length -- buf ch state seq )
+    <sbuf> 0 begin roll ;
+
+: decode ( seq quot -- string )
+    >r dup length start-decoding r>
+    [ -rot ] swap compose each
    finish-decoding ; inline
+
+: space ( resizable -- room-left )
+    dup underlying swap [ length ] 2apply - ;
+
+: full? ( resizable -- ? ) space zero? ;
+
+: end-read-loop ( buf ch state stream quot -- string/f )
+    2drop 2drop >string f like ;
+
+: under ( a b c -- c a b c )
+    tuck >r swapd r> ; inline
+
+: decode-read-loop ( buf ch state stream quot -- string/f )
+    >r >r pick r> r> rot full?  [ end-read-loop ] [
+        over stream-read1 [
+            -rot tuck >r >r >r -rot r> call r> r> decode-read-loop
+        ] [ end-read-loop ] if*
+    ] if ; inline
+
+: decode-read ( length stream quot -- string )
+    >r swap start-decoding r>
+    decode-read-loop ; inline
+
+GENERIC: init-decoding ( stream encoding -- decoded-stream )
+
+: <decoding> ( stream decoding-class -- decoded-stream )
+    construct-empty init-decoding ;
+
+GENERIC: init-encoding ( stream encoding -- encoded-stream )
+
+: <encoding> ( stream encoding-class -- encoded-stream )
+    construct-empty init-encoding ;
--- a/core/io/encodings/latin1/about.txt
+++ b/core/io/encodings/latin1/about.txt
@ -0,0 +1 @@
+ISO 8859-1 encoding/decoding
--- a/core/io/encodings/latin1/authors.txt
+++ b/core/io/encodings/latin1/authors.txt
--- a/core/io/encodings/latin1/latin1-docs.factor
+++ b/core/io/encodings/latin1/latin1-docs.factor
@ -0,0 +1,5 @@
+USING: help.syntax help.markup ;
+IN: io.encodings.latin1
+
+HELP: latin1
+{ $class-description "This class is used for Latin 1 (ISO 8859-1) encoding and decoding" } ;
--- a/core/io/encodings/latin1/latin1.factor
+++ b/core/io/encodings/latin1/latin1.factor
@ -0,0 +1,19 @@
+USING: io.encodings strings kernel ;
+IN: io.encodings.latin1
+
+TUPLE: latin1 stream ;
+
+M: latin1 init-decoding tuck set-latin1-stream ;
+M: latin1 init-encoding drop ;
+
+M: latin1 stream-read1
+    latin1-stream stream-read1 ;
+
+M: latin1 stream-read
+    latin1-stream stream-read >string ;
+
+M: latin1 stream-read-until
+    latin1-stream stream-read-until >string ;
+
+M: latin1 stream-readln
+    latin1-stream stream-readln >string ;
--- a/core/io/encodings/latin1/tags.txt
+++ b/core/io/encodings/latin1/tags.txt
@ -0,0 +1 @@
+text
--- a/core/io/encodings/utf16/.utf16.factor.swo
+++ b/core/io/encodings/utf16/.utf16.factor.swo
--- a/core/io/encodings/utf16/about.txt
+++ b/core/io/encodings/utf16/about.txt
@ -0,0 +1 @@
+UTF-16, UTF-16LE, UTF-16BE encoding and decoding
--- a/core/io/encodings/utf16/authors.txt
+++ b/core/io/encodings/utf16/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/core/io/encodings/utf16/summary.txt
+++ b/core/io/encodings/utf16/summary.txt
--- a/core/io/encodings/utf16/tags.txt
+++ b/core/io/encodings/utf16/tags.txt
@ -0,0 +1 @@
+text
--- a/core/io/encodings/utf16/utf16-docs.factor
+++ b/core/io/encodings/utf16/utf16-docs.factor
@ -1,5 +1,5 @@
 USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf16
+IN: io.encodings.utf16

 ARTICLE: "io.utf16" "Working with UTF16-encoded data"
 "The UTF16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences."
--- a/core/io/encodings/utf16/utf16-tests.factor
+++ b/core/io/encodings/utf16/utf16-tests.factor
--- a/core/io/encodings/utf16/utf16.factor
+++ b/core/io/encodings/utf16/utf16.factor
@ -30,7 +30,7 @@ SYMBOL: ignore
        >r 2 shift r> BIN: 11 bitand bitor quad3
    ] [ 2drop do-ignore ] if ;

-: (decode-utf16be) ( buf byte ch state -- buf ch state )
+: decode-utf16be-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop begin-utf16be ] }
        { double [ end-multibyte ] }
@ -41,7 +41,7 @@ SYMBOL: ignore
    } case ;

 : decode-utf16be ( seq -- str )
-    [ -rot (decode-utf16be) ] decode ;
+    [ decode-utf16be-step ] decode ;

 : handle-double ( buf byte ch -- buf ch state )
    swap dup -3 shift BIN: 11011 = [
@ -55,7 +55,7 @@ SYMBOL: ignore
        BIN: 11 bitand append-nums HEX: 10000 + decoded
    ] [ 2drop push-replacement ] if ;

-: (decode-utf16le) ( buf byte ch state -- buf ch state )
+: decode-utf16le-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop double ] }
        { double [ handle-double ] }
@ -65,7 +65,7 @@ SYMBOL: ignore
    } case ;

 : decode-utf16le ( seq -- str )
-    [ -rot (decode-utf16le) ] decode ;
+    [ decode-utf16le-step ] decode ;

 : encode-first
    -10 shift
@ -114,3 +114,35 @@ SYMBOL: ignore
        { [ utf16be? ] [ decode-utf16be ] }
        { [ t ] [ decode-error ] }
    } cond ;
+
+! UTF16LE streams
+
+TUPLE: utf16le ;
+: <utf16le> utf16le construct-delegate ;
+! In the future, this should detect and ignore a BOM at the beginning
+
+M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
+    tuck set-delegate ;
+
+M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
+    tuck set-delegate ;
+
+M: utf16le stream-read1 1 swap stream-read ;
+
+M: utf16le stream-read
+    delegate [ decode-utf16le-step ] decode-read ;
+
+M: utf16le stream-read-partial stream-read ;
+
+M: utf16le stream-read-until
+    ! Copied from { c-reader stream-read-until }!!!
+    [ swap read-until-loop ] "" make
+    swap over empty? over not and [ 2drop f f ] when ;
+
+M: utf16le stream-write1
+    >r 1string r> stream-write ;
+
+M: utf16le stream-write
+    >r encode-utf16le r> delegate stream-write ;
+
+M: utf16le dispose delegate dispose ;
--- a/core/io/encodings/utf8/about.txt
+++ b/core/io/encodings/utf8/about.txt
@ -0,0 +1 @@
+UTF-8 encoding and decoding
--- a/core/io/encodings/utf8/authors.txt
+++ b/core/io/encodings/utf8/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/core/io/encodings/utf8/summary.txt
+++ b/core/io/encodings/utf8/summary.txt
--- a/core/io/encodings/utf8/tags.txt
+++ b/core/io/encodings/utf8/tags.txt
@ -0,0 +1 @@
+text
--- a/core/io/encodings/utf8/utf8-docs.factor
+++ b/core/io/encodings/utf8/utf8-docs.factor
@ -1,5 +1,5 @@
 USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf8
+IN: io.encodings.utf8

 ARTICLE: "io.utf8" "Working with UTF8-encoded data"
 "The UTF8 encoding is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences."
--- a/core/io/encodings/utf8/utf8-tests.factor
+++ b/core/io/encodings/utf8/utf8-tests.factor
@ -0,0 +1,23 @@
+USING: io.encodings.utf8 tools.test sbufs kernel io
+sequences strings arrays unicode.syntax ;
+
+: decode-utf8-w/stream ( array -- newarray )
+    >sbuf dup reverse-here <utf8> contents >array ;
+
+: encode-utf8-w/stream ( array -- newarray )
+    SBUF" " clone tuck <utf8> write >array ;
+
+[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream >array ] unit-test
+
+[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
+
+[ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test
+
+[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
+
+[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
+
+[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
+
+[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
+[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
--- a/core/io/encodings/utf8/utf8.factor
+++ b/core/io/encodings/utf8/utf8.factor
@ -1,8 +1,10 @@
 ! Copyright (C) 2006, 2007 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: math kernel sequences sbufs vectors
-namespaces io.encodings combinators ;
-IN: io.utf8
+USING: math kernel sequences sbufs vectors growable io continuations
+namespaces io.encodings combinators strings io.streams.c ;
+IN: io.encodings.utf8
+
+! Decoding UTF-8

 SYMBOL: double
 SYMBOL: triple
@ -31,7 +33,7 @@ SYMBOL: quad3
 : end-multibyte ( buf byte ch -- buf ch state )
    f append-nums [ decoded ] unless* ;

-: (decode-utf8) ( buf byte ch state -- buf ch state )
+: decode-utf8-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop begin-utf8 ] }
        { double [ end-multibyte ] }
@ -43,7 +45,9 @@ SYMBOL: quad3
    } case ;

 : decode-utf8 ( seq -- str )
-    [ -rot (decode-utf8) ] decode ;
+    [ decode-utf8-step ] decode ; 
+
+! Encoding UTF-8

 : encoded ( char -- )
    BIN: 111111 bitand BIN: 10000000 bitor , ;
@ -70,3 +74,35 @@ SYMBOL: quad3

 : encode-utf8 ( str -- seq )
    [ [ char>utf8 ] each ] B{ } make ;
+
+! Interface for streams
+
+TUPLE: utf8 ;
+: <utf8> utf8 construct-delegate ;
+! In the future, this should detect and ignore a BOM at the beginning
+
+M: utf8 init-decoding ( stream utf8 -- utf8-stream )
+    tuck set-delegate ;
+
+M: utf8 init-encoding ( stream utf8 -- utf8-stream )
+    tuck set-delegate ;
+
+M: utf8 stream-read1 1 swap stream-read ;
+
+M: utf8 stream-read
+    delegate [ decode-utf8-step ] decode-read ;
+
+M: utf8 stream-read-partial stream-read ;
+
+M: utf8 stream-read-until
+    ! Copied from { c-reader stream-read-until }!!!
+    [ swap read-until-loop ] "" make
+    swap over empty? over not and [ 2drop f f ] when ;
+
+M: utf8 stream-write1
+    >r 1string r> stream-write ;
+
+M: utf8 stream-write
+    >r encode-utf8 r> delegate stream-write ;
+
+M: utf8 dispose delegate dispose ;
--- a/core/io/utf8/utf8-tests.factor
+++ b/core/io/utf8/utf8-tests.factor
@ -1,16 +0,0 @@
-USING: io.utf8 tools.test strings arrays unicode.syntax ;
-
-[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test
-
-[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
-
-[ "x" ] [ "x" decode-utf8 >string ] unit-test
-
-[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test
-
-[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test
-
-[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
-
-[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
-[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
--- a/extra/const/const.factor
+++ b/extra/const/const.factor
@ -1,24 +0,0 @@
-USING: kernel parser words sequences ;
-IN: const
-
-: define-const ( word value -- )
-    [ parsed ] curry dupd define
-    t "parsing" set-word-prop ;
-
-: CONST:
-    CREATE scan-word dup parsing?
-    [ execute dup pop ] when define-const ; parsing
-
-: define-enum ( words -- )
-    dup length [ define-const ] 2each ;
-
-: ENUM:
-    ";" parse-tokens [ create-in ] map define-enum ; parsing
-
-: define-value ( word -- )
-    { f } clone [ first ] curry define ;
-
-: VALUE: CREATE define-value ; parsing
-
-: set-value ( value word -- )
-    word-def first set-first ;
--- a/extra/delegate/protocols/protocols.factor
+++ b/extra/delegate/protocols/protocols.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2007 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: delegate sequences.private sequences assocs prettyprint.sections 
-io definitions kernel ;
+io definitions kernel continuations ;
 IN: delegate.protocols

 PROTOCOL: sequence-protocol
@ -15,7 +15,7 @@ PROTOCOL: assoc-protocol
    ! everything should work, just slower (with >alist)

 PROTOCOL: stream-protocol
-    stream-read1 stream-read stream-read-until
+    stream-read1 stream-read stream-read-until dispose
    stream-flush stream-write1 stream-write stream-format
    stream-nl make-span-stream make-block-stream stream-readln
    make-cell-stream stream-write-table ;
--- a/extra/multiline/multiline.factor
+++ b/extra/multiline/multiline.factor
@ -7,8 +7,11 @@ IN: multiline
    lexer get dup next-line lexer-line-text ;

 : (parse-here) ( -- )
-    next-line-text dup ";" =
-    [ drop lexer get next-line ] [ % "\n" % (parse-here) ] if ;
+    next-line-text [
+        dup ";" =
+        [ drop lexer get next-line ]
+        [ % "\n" % (parse-here) ] if
+    ] [ ";" unexpected-eof ] if* ;

 : parse-here ( -- str )
    [ (parse-here) ] "" make 1 head*
@ -19,11 +22,13 @@ IN: multiline
    parse-here 1quotation define-inline ; parsing

 : (parse-multiline-string) ( start-index end-text -- end-index )
-    lexer get lexer-line-text 2dup start
-    [ rot dupd >r >r swap subseq % r> r> length + ] [
-        rot tail % "\n" % 0
-        lexer get next-line swap (parse-multiline-string)
-    ] if* ;
+    lexer get lexer-line-text [
+        2dup start
+        [ rot dupd >r >r swap subseq % r> r> length + ] [
+            rot tail % "\n" % 0
+            lexer get next-line swap (parse-multiline-string)
+        ] if*
+    ] [ nip unexpected-eof ] if* ;

 : parse-multiline-string ( end-text -- str )
    [
--- a/extra/unicode/breaks/breaks.factor
+++ b/extra/unicode/breaks/breaks.factor
@ -1,7 +1,7 @@
 USING: unicode.categories kernel math combinators splitting
 sequences math.parser io.files io assocs arrays namespaces
 combinators.lib assocs.lib math.ranges unicode.normalize
-unicode.syntax unicode.data compiler.units alien.syntax const ;
+unicode.syntax unicode.data compiler.units alien.syntax ;
 IN: unicode.breaks

 C-ENUM: Any L V T Extend Control CR LF graphemes ;
				`@ -0,0 +1 @@`
				`UTF-16, UTF-16LE, UTF-16BE encoding and decoding`