Merge branch 'master' of git://factorcode.org/git/littledan

2008-02-13 17:56:30 -06:00 · 2008-02-13 17:56:30 -06:00 · 205d5ca6cb
parent 5d16e232e8 01e9a5cb1a
commit 205d5ca6cb
31 changed files with 201 additions and 64 deletions
--- a/core/io/encodings/binary/about.txt
+++ b/core/io/encodings/binary/about.txt
@ -0,0 +1 @@
 Dummy encoding for binary I/O
--- a/core/io/encodings/binary/authors.txt
+++ b/core/io/encodings/binary/authors.txt
--- a/core/io/encodings/binary/binary-docs.factor
+++ b/core/io/encodings/binary/binary-docs.factor
@ -0,0 +1,5 @@
 USING: help.syntax help.markup ;
 IN: io.encodings.binary
 HELP: binary
 { $class-description "This is the encoding descriptor for binary I/O." } ;
--- a/core/io/encodings/binary/binary.factor
+++ b/core/io/encodings/binary/binary.factor
@ -0,0 +1,6 @@
 USING: kernel io.encodings ;
 TUPLE: binary ;
 M: binary init-decoding drop ;
 M: binary init-encoding drop ;
--- a/core/io/encodings/binary/tags.txt
+++ b/core/io/encodings/binary/tags.txt
@ -0,0 +1 @@
 text
--- a/core/io/encodings/encodings.factor
+++ b/core/io/encodings/encodings.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2006, 2007 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel sequences sbufs vectors
-namespaces unicode.syntax ;
+namespaces unicode.syntax growable strings io ;
 IN: io.encodings
 TUPLE: encode-error ;
@ -23,6 +23,42 @@ SYMBOL: begin
 : finish-decoding ( buf ch state -- str )
    begin eq? [ decode-error ] unless drop "" like ;
-: decode ( seq quot -- str )
+: start-decoding ( seq length -- buf ch state seq )
-    >r [ length <sbuf> 0 begin ] keep r> each
+    <sbuf> 0 begin roll ;
 : decode ( seq quot -- string )
    >r dup length start-decoding r>
    [ -rot ] swap compose each
    finish-decoding ; inline
 : space ( resizable -- room-left )
    dup underlying swap [ length ] 2apply - ;
 : full? ( resizable -- ? ) space zero? ;
 : end-read-loop ( buf ch state stream quot -- string/f )
    2drop 2drop >string f like ;
 : under ( a b c -- c a b c )
    tuck >r swapd r> ; inline
 : decode-read-loop ( buf ch state stream quot -- string/f )
    >r >r pick r> r> rot full?  [ end-read-loop ] [
        over stream-read1 [
            -rot tuck >r >r >r -rot r> call r> r> decode-read-loop
        ] [ end-read-loop ] if*
    ] if ; inline
 : decode-read ( length stream quot -- string )
    >r swap start-decoding r>
    decode-read-loop ; inline
 GENERIC: init-decoding ( stream encoding -- decoded-stream )
 : <decoding> ( stream decoding-class -- decoded-stream )
    construct-empty init-decoding ;
 GENERIC: init-encoding ( stream encoding -- encoded-stream )
 : <encoding> ( stream encoding-class -- encoded-stream )
    construct-empty init-encoding ;
--- a/core/io/encodings/latin1/about.txt
+++ b/core/io/encodings/latin1/about.txt
@ -0,0 +1 @@
 ISO 8859-1 encoding/decoding
--- a/core/io/encodings/latin1/authors.txt
+++ b/core/io/encodings/latin1/authors.txt
--- a/core/io/encodings/latin1/latin1-docs.factor
+++ b/core/io/encodings/latin1/latin1-docs.factor
@ -0,0 +1,5 @@
 USING: help.syntax help.markup ;
 IN: io.encodings.latin1
 HELP: latin1
 { $class-description "This class is used for Latin 1 (ISO 8859-1) encoding and decoding" } ;
--- a/core/io/encodings/latin1/latin1.factor
+++ b/core/io/encodings/latin1/latin1.factor
@ -0,0 +1,19 @@
 USING: io.encodings strings kernel ;
 IN: io.encodings.latin1
 TUPLE: latin1 stream ;
 M: latin1 init-decoding tuck set-latin1-stream ;
 M: latin1 init-encoding drop ;
 M: latin1 stream-read1
    latin1-stream stream-read1 ;
 M: latin1 stream-read
    latin1-stream stream-read >string ;
 M: latin1 stream-read-until
    latin1-stream stream-read-until >string ;
 M: latin1 stream-readln
    latin1-stream stream-readln >string ;
--- a/core/io/encodings/latin1/tags.txt
+++ b/core/io/encodings/latin1/tags.txt
@ -0,0 +1 @@
 text
--- a/core/io/encodings/utf16/.utf16.factor.swo
+++ b/core/io/encodings/utf16/.utf16.factor.swo
--- a/core/io/encodings/utf16/about.txt
+++ b/core/io/encodings/utf16/about.txt
@ -0,0 +1 @@
 UTF-16, UTF-16LE, UTF-16BE encoding and decoding
--- a/core/io/encodings/utf16/authors.txt
+++ b/core/io/encodings/utf16/authors.txt
@ -0,0 +1 @@
 Daniel Ehrenberg
--- a/core/io/encodings/utf16/summary.txt
+++ b/core/io/encodings/utf16/summary.txt
--- a/core/io/encodings/utf16/tags.txt
+++ b/core/io/encodings/utf16/tags.txt
@ -0,0 +1 @@
 text
--- a/core/io/encodings/utf16/utf16-docs.factor
+++ b/core/io/encodings/utf16/utf16-docs.factor
@ -1,5 +1,5 @@
 USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf16
+IN: io.encodings.utf16
 ARTICLE: "io.utf16" "Working with UTF16-encoded data"
 "The UTF16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences."
--- a/core/io/encodings/utf16/utf16-tests.factor
+++ b/core/io/encodings/utf16/utf16-tests.factor
--- a/core/io/encodings/utf16/utf16.factor
+++ b/core/io/encodings/utf16/utf16.factor
@ -30,7 +30,7 @@ SYMBOL: ignore
        >r 2 shift r> BIN: 11 bitand bitor quad3
    ] [ 2drop do-ignore ] if ;
-: (decode-utf16be) ( buf byte ch state -- buf ch state )
+: decode-utf16be-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop begin-utf16be ] }
        { double [ end-multibyte ] }
@ -41,7 +41,7 @@ SYMBOL: ignore
    } case ;
 : decode-utf16be ( seq -- str )
-    [ -rot (decode-utf16be) ] decode ;
+    [ decode-utf16be-step ] decode ;
 : handle-double ( buf byte ch -- buf ch state )
    swap dup -3 shift BIN: 11011 = [
@ -55,7 +55,7 @@ SYMBOL: ignore
        BIN: 11 bitand append-nums HEX: 10000 + decoded
    ] [ 2drop push-replacement ] if ;
-: (decode-utf16le) ( buf byte ch state -- buf ch state )
+: decode-utf16le-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop double ] }
        { double [ handle-double ] }
@ -65,7 +65,7 @@ SYMBOL: ignore
    } case ;
 : decode-utf16le ( seq -- str )
-    [ -rot (decode-utf16le) ] decode ;
+    [ decode-utf16le-step ] decode ;
 : encode-first
    -10 shift
@ -114,3 +114,35 @@ SYMBOL: ignore
        { [ utf16be? ] [ decode-utf16be ] }
        { [ t ] [ decode-error ] }
    } cond ;
 ! UTF16LE streams
 TUPLE: utf16le ;
 : <utf16le> utf16le construct-delegate ;
 ! In the future, this should detect and ignore a BOM at the beginning
 M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
    tuck set-delegate ;
 M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
    tuck set-delegate ;
 M: utf16le stream-read1 1 swap stream-read ;
 M: utf16le stream-read
    delegate [ decode-utf16le-step ] decode-read ;
 M: utf16le stream-read-partial stream-read ;
 M: utf16le stream-read-until
    ! Copied from { c-reader stream-read-until }!!!
    [ swap read-until-loop ] "" make
    swap over empty? over not and [ 2drop f f ] when ;
 M: utf16le stream-write1
    >r 1string r> stream-write ;
 M: utf16le stream-write
    >r encode-utf16le r> delegate stream-write ;
 M: utf16le dispose delegate dispose ;
--- a/core/io/encodings/utf8/about.txt
+++ b/core/io/encodings/utf8/about.txt
@ -0,0 +1 @@
 UTF-8 encoding and decoding
--- a/core/io/encodings/utf8/authors.txt
+++ b/core/io/encodings/utf8/authors.txt
@ -0,0 +1 @@
 Daniel Ehrenberg
--- a/core/io/encodings/utf8/summary.txt
+++ b/core/io/encodings/utf8/summary.txt
--- a/core/io/encodings/utf8/tags.txt
+++ b/core/io/encodings/utf8/tags.txt
@ -0,0 +1 @@
 text
--- a/core/io/encodings/utf8/utf8-docs.factor
+++ b/core/io/encodings/utf8/utf8-docs.factor
@ -1,5 +1,5 @@
 USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf8
+IN: io.encodings.utf8
 ARTICLE: "io.utf8" "Working with UTF8-encoded data"
 "The UTF8 encoding is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences."
--- a/core/io/encodings/utf8/utf8-tests.factor
+++ b/core/io/encodings/utf8/utf8-tests.factor
@ -0,0 +1,23 @@
 USING: io.encodings.utf8 tools.test sbufs kernel io
 sequences strings arrays unicode.syntax ;
 : decode-utf8-w/stream ( array -- newarray )
    >sbuf dup reverse-here <utf8> contents >array ;
 : encode-utf8-w/stream ( array -- newarray )
    SBUF" " clone tuck <utf8> write >array ;
 [ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream >array ] unit-test
 [ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
 [ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test
 [ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
 [ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
 [ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
 [ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
 [ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
--- a/core/io/encodings/utf8/utf8.factor
+++ b/core/io/encodings/utf8/utf8.factor
@ -1,8 +1,10 @@
 ! Copyright (C) 2006, 2007 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: math kernel sequences sbufs vectors
+USING: math kernel sequences sbufs vectors growable io continuations
-namespaces io.encodings combinators ;
+namespaces io.encodings combinators strings io.streams.c ;
-IN: io.utf8
+IN: io.encodings.utf8
 ! Decoding UTF-8
 SYMBOL: double
 SYMBOL: triple
@ -31,7 +33,7 @@ SYMBOL: quad3
 : end-multibyte ( buf byte ch -- buf ch state )
    f append-nums [ decoded ] unless* ;
-: (decode-utf8) ( buf byte ch state -- buf ch state )
+: decode-utf8-step ( buf byte ch state -- buf ch state )
    {
        { begin [ drop begin-utf8 ] }
        { double [ end-multibyte ] }
@ -43,7 +45,9 @@ SYMBOL: quad3
    } case ;
 : decode-utf8 ( seq -- str )
-    [ -rot (decode-utf8) ] decode ;
+    [ decode-utf8-step ] decode ; 
 ! Encoding UTF-8
 : encoded ( char -- )
    BIN: 111111 bitand BIN: 10000000 bitor , ;
@ -70,3 +74,35 @@ SYMBOL: quad3
 : encode-utf8 ( str -- seq )
    [ [ char>utf8 ] each ] B{ } make ;
 ! Interface for streams
 TUPLE: utf8 ;
 : <utf8> utf8 construct-delegate ;
 ! In the future, this should detect and ignore a BOM at the beginning
 M: utf8 init-decoding ( stream utf8 -- utf8-stream )
    tuck set-delegate ;
 M: utf8 init-encoding ( stream utf8 -- utf8-stream )
    tuck set-delegate ;
 M: utf8 stream-read1 1 swap stream-read ;
 M: utf8 stream-read
    delegate [ decode-utf8-step ] decode-read ;
 M: utf8 stream-read-partial stream-read ;
 M: utf8 stream-read-until
    ! Copied from { c-reader stream-read-until }!!!
    [ swap read-until-loop ] "" make
    swap over empty? over not and [ 2drop f f ] when ;
 M: utf8 stream-write1
    >r 1string r> stream-write ;
 M: utf8 stream-write
    >r encode-utf8 r> delegate stream-write ;
 M: utf8 dispose delegate dispose ;
--- a/core/io/utf8/utf8-tests.factor
+++ b/core/io/utf8/utf8-tests.factor
@ -1,16 +0,0 @@
 USING: io.utf8 tools.test strings arrays unicode.syntax ;
 [ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test
 [ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
 [ "x" ] [ "x" decode-utf8 >string ] unit-test
 [ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test
 [ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test
 [ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
 [ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
 [ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
--- a/extra/const/const.factor
+++ b/extra/const/const.factor
@ -1,24 +0,0 @@
 USING: kernel parser words sequences ;
 IN: const
 : define-const ( word value -- )
    [ parsed ] curry dupd define
    t "parsing" set-word-prop ;
 : CONST:
    CREATE scan-word dup parsing?
    [ execute dup pop ] when define-const ; parsing
 : define-enum ( words -- )
    dup length [ define-const ] 2each ;
 : ENUM:
    ";" parse-tokens [ create-in ] map define-enum ; parsing
 : define-value ( word -- )
    { f } clone [ first ] curry define ;
 : VALUE: CREATE define-value ; parsing
 : set-value ( value word -- )
    word-def first set-first ;
--- a/extra/delegate/protocols/protocols.factor
+++ b/extra/delegate/protocols/protocols.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2007 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: delegate sequences.private sequences assocs prettyprint.sections 
-io definitions kernel ;
+io definitions kernel continuations ;
 IN: delegate.protocols
 PROTOCOL: sequence-protocol
@ -15,7 +15,7 @@ PROTOCOL: assoc-protocol
    ! everything should work, just slower (with >alist)
 PROTOCOL: stream-protocol
-    stream-read1 stream-read stream-read-until
+    stream-read1 stream-read stream-read-until dispose
    stream-flush stream-write1 stream-write stream-format
    stream-nl make-span-stream make-block-stream stream-readln
    make-cell-stream stream-write-table ;
--- a/extra/multiline/multiline.factor
+++ b/extra/multiline/multiline.factor
@ -7,8 +7,11 @@ IN: multiline
    lexer get dup next-line lexer-line-text ;
 : (parse-here) ( -- )
-    next-line-text dup ";" =
+    next-line-text [
-    [ drop lexer get next-line ] [ % "\n" % (parse-here) ] if ;
+        dup ";" =
        [ drop lexer get next-line ]
        [ % "\n" % (parse-here) ] if
    ] [ ";" unexpected-eof ] if* ;
 : parse-here ( -- str )
    [ (parse-here) ] "" make 1 head*
@ -19,11 +22,13 @@ IN: multiline
    parse-here 1quotation define-inline ; parsing
 : (parse-multiline-string) ( start-index end-text -- end-index )
-    lexer get lexer-line-text 2dup start
+    lexer get lexer-line-text [
-    [ rot dupd >r >r swap subseq % r> r> length + ] [
+        2dup start
-        rot tail % "\n" % 0
+        [ rot dupd >r >r swap subseq % r> r> length + ] [
-        lexer get next-line swap (parse-multiline-string)
+            rot tail % "\n" % 0
-    ] if* ;
+            lexer get next-line swap (parse-multiline-string)
        ] if*
    ] [ nip unexpected-eof ] if* ;
 : parse-multiline-string ( end-text -- str )
    [
--- a/extra/unicode/breaks/breaks.factor
+++ b/extra/unicode/breaks/breaks.factor
@ -1,7 +1,7 @@
 USING: unicode.categories kernel math combinators splitting
 sequences math.parser io.files io assocs arrays namespaces
 combinators.lib assocs.lib math.ranges unicode.normalize
-unicode.syntax unicode.data compiler.units alien.syntax const ;
+unicode.syntax unicode.data compiler.units alien.syntax ;
 IN: unicode.breaks
 C-ENUM: Any L V T Extend Control CR LF graphemes ;
		`@ -0,0 +1 @@`
							`UTF-16, UTF-16LE, UTF-16BE encoding and decoding`