diff --git a/core/io/encodings/encodings-docs.factor b/core/io/encodings/encodings-docs.factor index 562e7dcd9a..e5e71b05f0 100644 --- a/core/io/encodings/encodings-docs.factor +++ b/core/io/encodings/encodings-docs.factor @@ -46,23 +46,23 @@ ARTICLE: "encodings-protocol" "Encoding protocol" "An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again." { $subsection decode-step } { $subsection init-decoder } -{ $subsection encode-string } ; +{ $subsection stream-write-encoded } ; HELP: decode-step ( buf char encoding -- ) { $values { "buf" "A string buffer which characters can be pushed to" } { "char" "An octet which is read from a stream" } { "encoding" "An encoding descriptor tuple" } } -{ $description "A single step in the decoding process must be defined for the decoding descriptor. When each octet is read, this word is called, and depending on the decoder's internal state, something may be pushed to the buffer or the state may change." } ; +{ $description "A single step in the decoding process must be defined for the decoding descriptor. When each octet is read, this word is called, and depending on the decoder's internal state, something may be pushed to the buffer or the state may change. This should not be used directly." } ; -HELP: encode-string ( string encoding -- byte-array ) +HELP: stream-write-encoded ( string stream encoding -- ) { $values { "string" "a string" } - { "encoding" "an encoding descriptor" } - { "byte-array" "an encoded byte-array" } } -{ $description "Encodes the string with the given encoding descriptor, outputting the result to a byte-array." } ; + { "stream" "an output stream" } + { "encoding" "an encoding descriptor" } } +{ $description "Encodes the string with the given encoding descriptor, outputing the result to the given stream. This should not be used directly." } ; HELP: init-decoder ( stream encoding -- encoding ) { $values { "stream" "an input stream" } { "encoding" "an encoding descriptor" } } -{ $description "Initializes the decoder tuple's state. The stream is exposed so that it can be read, eg for a BOM." } ; +{ $description "Initializes the decoder tuple's state. The stream is exposed so that it can be read, eg for a BOM. This should not be used directly." } ; -{ init-decoder decode-step encode-string } related-words +{ init-decoder decode-step stream-write-encoded } related-words diff --git a/core/io/encodings/encodings.factor b/core/io/encodings/encodings.factor index 8489c46d2e..2f68334bde 100755 --- a/core/io/encodings/encodings.factor +++ b/core/io/encodings/encodings.factor @@ -15,9 +15,8 @@ GENERIC: init-decoder ( stream encoding -- encoding ) M: tuple-class init-decoder construct-empty init-decoder ; M: object init-decoder nip ; -GENERIC: encode-string ( string encoding -- byte-array ) -M: tuple-class encode-string construct-empty encode-string ; -M: object encode-string drop >byte-array ; +GENERIC: stream-write-encoded ( string stream encoding -- byte-array ) +M: object stream-write-encoded drop stream-write ; ! Decoding @@ -136,7 +135,7 @@ M: encoder stream-write1 >r 1string r> stream-write ; M: encoder stream-write - [ encoder-code encode-string ] keep delegate stream-write ; + { delegate encoder-code } get-slots stream-write-encoded ; M: encoder dispose delegate dispose ; diff --git a/core/io/encodings/string/string-docs.factor b/core/io/encodings/string/string-docs.factor index a5f92db165..0a35eee272 100644 --- a/core/io/encodings/string/string-docs.factor +++ b/core/io/encodings/string/string-docs.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2008 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. USING: help.markup help.syntax byte-arrays strings ; IN: io.encodings.string diff --git a/core/io/encodings/string/string-tests.factor b/core/io/encodings/string/string-tests.factor index 8d8db84799..ddae9c8734 100644 --- a/core/io/encodings/string/string-tests.factor +++ b/core/io/encodings/string/string-tests.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2008 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. USING: strings io.encodings.utf8 io.encodings.utf16 io.encodings.string tools.test ; IN: io.encodings.string.tests diff --git a/core/io/encodings/string/string.factor b/core/io/encodings/string/string.factor index c6e01121dc..5e57a943a9 100644 --- a/core/io/encodings/string/string.factor +++ b/core/io/encodings/string/string.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2008 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. USING: io io.streams.byte-array ; IN: io.encodings.string diff --git a/core/io/encodings/utf8/utf8-tests.factor b/core/io/encodings/utf8/utf8-tests.factor index 0671fe2129..8f1c998f3d 100644 --- a/core/io/encodings/utf8/utf8-tests.factor +++ b/core/io/encodings/utf8/utf8-tests.factor @@ -1,21 +1,20 @@ -USING: io.encodings.utf8 tools.test sbufs kernel io io.encodings -sequences strings arrays unicode io.streams.byte-array ; +USING: io.encodings.utf8 tools.test io.encodings.string strings arrays ; : decode-utf8-w/stream ( array -- newarray ) - utf8 contents >array ; + utf8 decode >array ; : encode-utf8-w/stream ( array -- newarray ) - utf8 [ write ] with-byte-writer >array ; + utf8 encode >array ; -[ { CHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream >array ] unit-test +[ { CHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test -[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test +[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream ] unit-test [ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test [ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8-w/stream >array ] unit-test -[ { CHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream >array ] unit-test +[ { CHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream ] unit-test [ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index edc4663214..5887a8375e 100644 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -60,29 +60,28 @@ M: utf8 init-decoder nip begin over set-utf8-state ; ! Encoding UTF-8 : encoded ( char -- ) - BIN: 111111 bitand BIN: 10000000 bitor , ; + BIN: 111111 bitand BIN: 10000000 bitor write1 ; : char>utf8 ( char -- ) { - { [ dup -7 shift zero? ] [ , ] } + { [ dup -7 shift zero? ] [ write1 ] } { [ dup -11 shift zero? ] [ - dup -6 shift BIN: 11000000 bitor , + dup -6 shift BIN: 11000000 bitor write1 encoded ] } { [ dup -16 shift zero? ] [ - dup -12 shift BIN: 11100000 bitor , + dup -12 shift BIN: 11100000 bitor write1 dup -6 shift encoded encoded ] } { [ t ] [ - dup -18 shift BIN: 11110000 bitor , + dup -18 shift BIN: 11110000 bitor write1 dup -12 shift encoded dup -6 shift encoded encoded ] } } cond ; -: encode-utf8 ( str -- seq ) - [ [ char>utf8 ] each ] B{ } make ; - -M: utf8 encode-string drop encode-utf8 ; +M: utf8 stream-write-encoded + ! For efficiency, this should be modified to avoid variable reads + drop [ [ char>utf8 ] each ] with-stream* ; diff --git a/extra/io/encodings/ascii/ascii.factor b/extra/io/encodings/ascii/ascii.factor index 6dd8d23155..f310f53e29 100644 --- a/extra/io/encodings/ascii/ascii.factor +++ b/extra/io/encodings/ascii/ascii.factor @@ -3,13 +3,13 @@ USING: io io.encodings strings kernel math sequences byte-arrays io.encodings ; IN: io.encodings.ascii -: encode-check<= ( string max -- byte-array ) - dupd [ <= ] curry all? [ >byte-array ] [ encode-error ] if ; +: encode-check<= ( string stream max -- ) + [ pick > [ encode-error ] [ stream-write1 ] if ] 2curry each ; TUPLE: ascii ; -M: ascii encode-string +M: ascii stream-write-encoded ( string stream encoding -- ) drop 127 encode-check<= ; M: ascii decode-step - drop dup 128 >= [ encode-error ] [ swap push ] if ; + drop dup 128 >= [ decode-error ] [ swap push ] if ; diff --git a/extra/io/encodings/latin1/latin1.factor b/extra/io/encodings/latin1/latin1.factor index 36e38caa1c..989f45bc64 100755 --- a/extra/io/encodings/latin1/latin1.factor +++ b/extra/io/encodings/latin1/latin1.factor @@ -5,8 +5,8 @@ IN: io.encodings.latin1 TUPLE: latin1 ; -M: latin1 encode-string +M: latin1 stream-write-encoded drop 255 encode-check<= ; M: latin1 decode-step - drop dup 256 >= [ encode-error ] [ swap push ] if ; + drop dup 256 >= [ decode-error ] [ swap push ] if ; diff --git a/extra/io/encodings/utf16/utf16-tests.factor b/extra/io/encodings/utf16/utf16-tests.factor index 7ee5c9574e..89b61a3e37 100755 --- a/extra/io/encodings/utf16/utf16-tests.factor +++ b/extra/io/encodings/utf16/utf16-tests.factor @@ -1,28 +1,22 @@ USING: kernel tools.test io.encodings.utf16 arrays sbufs -sequences io.encodings io unicode io.streams.byte-array ; +sequences io.encodings io unicode io.encodings.string ; -: decode-w/stream ( array encoding -- newarray ) - contents >array ; +[ { CHAR: x } ] [ { 0 CHAR: x } utf16be decode >array ] unit-test +[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode >array ] unit-test +[ { CHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } utf16be decode >array ] unit-test +[ { CHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } utf16be decode >array ] unit-test -: encode-w/stream ( array encoding -- newarray ) - [ write ] with-byte-writer >array ; +[ { 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } utf16be encode >array ] unit-test -[ { CHAR: x } ] [ { 0 CHAR: x } utf16be decode-w/stream ] unit-test -[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode-w/stream ] unit-test -[ { CHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } utf16be decode-w/stream ] unit-test -[ { CHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } utf16be decode-w/stream ] unit-test +[ { CHAR: x } ] [ { CHAR: x 0 } utf16le decode >array ] unit-test +[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test +[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } utf16le decode >array ] unit-test +[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } utf16le decode >array ] unit-test +[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test -[ { 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } utf16be encode-w/stream ] unit-test +[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16le encode >array ] unit-test -[ { CHAR: x } ] [ { CHAR: x 0 } utf16le decode-w/stream ] unit-test -[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode-w/stream ] unit-test -[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } utf16le decode-w/stream ] unit-test -[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } utf16le decode-w/stream ] unit-test -[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode-w/stream ] unit-test +[ { CHAR: x } ] [ { HEX: ff HEX: fe CHAR: x 0 } utf16 decode >array ] unit-test +[ { CHAR: x } ] [ { HEX: fe HEX: ff 0 CHAR: x } utf16 decode >array ] unit-test -[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16le encode-w/stream ] unit-test - -[ { CHAR: x } ] [ { HEX: ff HEX: fe CHAR: x 0 } utf16 decode-w/stream ] unit-test -[ { CHAR: x } ] [ { HEX: fe HEX: ff 0 CHAR: x } utf16 decode-w/stream ] unit-test - -[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16 encode-w/stream ] unit-test +[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16 encode >array ] unit-test diff --git a/extra/io/encodings/utf16/utf16.factor b/extra/io/encodings/utf16/utf16.factor index 8815d588ad..a501fad0bd 100755 --- a/extra/io/encodings/utf16/utf16.factor +++ b/extra/io/encodings/utf16/utf16.factor @@ -106,25 +106,28 @@ M: utf16le init-decoder nip begin over set-utf16le-state ; : char>utf16be ( char -- ) dup HEX: FFFF > [ HEX: 10000 - - dup encode-first swap , , - encode-second swap , , - ] [ h>b/b , , ] if ; + dup encode-first swap write1 write1 + encode-second swap write1 write1 + ] [ h>b/b write1 write1 ] if ; -: encode-utf16be ( str -- seq ) - [ [ char>utf16be ] each ] B{ } make ; +: stream-write-utf16be ( string stream -- ) + [ [ char>utf16be ] each ] with-stream* ; + +M: utf16be stream-write-encoded ( string stream encoding -- ) + drop stream-write-utf16be ; : char>utf16le ( char -- ) dup HEX: FFFF > [ HEX: 10000 - - dup encode-first , , - encode-second , , - ] [ h>b/b swap , , ] if ; + dup encode-first write1 write1 + encode-second write1 write1 + ] [ h>b/b swap write1 write1 ] if ; -: encode-utf16le ( str -- seq ) - [ [ char>utf16le ] each ] B{ } make ; +: stream-write-utf16le ( string stream -- ) + [ [ char>utf16le ] each ] with-stream* ; -M: utf16le encode-string drop encode-utf16le ; -M: utf16be encode-string drop encode-utf16be ; +M: utf16le stream-write-encoded ( string stream encoding -- ) + drop stream-write-utf16le ; ! UTF-16 @@ -132,19 +135,16 @@ M: utf16be encode-string drop encode-utf16be ; : bom-be B{ HEX: fe HEX: ff } ; inline -: encode-utf16 ( str -- seq ) - encode-utf16le bom-le swap append ; - : start-utf16le? ( seq1 -- seq2 ? ) bom-le ?head ; : start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ; TUPLE: utf16 started? ; -M: utf16 encode-string - >r encode-utf16le r> +M: utf16 stream-write-encoded dup utf16-started? [ drop ] - [ t swap set-utf16-started? bom-le swap append ] if ; + [ t swap set-utf16-started? bom-le over stream-write ] if + stream-write-utf16le ; : bom>le/be ( bom -- le/be ) dup bom-le sequence= [ drop utf16le ] [