io/utf8 and 16 were moved to core/io/encodings

2008-02-11 17:44:14 -06:00 · 2008-02-11 17:44:14 -06:00 · 8bbc144ce7
parent 99ff43b404
commit 8bbc144ce7
10 changed files with 0 additions and 286 deletions
--- a/core/io/utf16/authors.txt
+++ b/core/io/utf16/authors.txt
@ -1 +0,0 @@
-Daniel Ehrenberg
--- a/core/io/utf16/summary.txt
+++ b/core/io/utf16/summary.txt
@ -1 +0,0 @@
-UTF16 encoding/decoding
--- a/core/io/utf16/utf16-docs.factor
+++ b/core/io/utf16/utf16-docs.factor
@ -1,45 +0,0 @@
-USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf16
-
-ARTICLE: "io.utf16" "Working with UTF16-encoded data"
-"The UTF16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences."
-{ $subsection encode-utf16le }
-{ $subsection encode-utf16be }
-{ $subsection decode-utf16le }
-{ $subsection decode-utf16be }
-"Support for UTF16 data with a byte order mark:"
-{ $subsection encode-utf16 }
-{ $subsection decode-utf16 } ;
-
-ABOUT: "io.utf16"
-
-HELP: decode-utf16
-{ $values { "seq" "a sequence of bytes" } { "str" string } }
-{ $description "Decodes a sequence of bytes representing a Unicode string in UTF16 format. The bytes must begin with a UTF16 byte order mark, which determines if the input is in little or big endian. To decode data without a byte order mark, use " { $link decode-utf16le } " or " { $link decode-utf16be } "." }
-{ $errors "Throws a " { $link decode-error } " if the input is malformed." } ;
-
-HELP: decode-utf16be
-{ $values { "seq" "a sequence of bytes" } { "str" string } }
-{ $description "Decodes a sequence of bytes representing a Unicode string in big endian UTF16 format. The bytes must not begin with a UTF16 byte order mark. To decode data with a byte order mark, use " { $link decode-utf16 } "." }
-{ $errors "Throws a " { $link decode-error } " if the input is malformed." } ;
-
-HELP: decode-utf16le
-{ $values { "seq" "a sequence of bytes" } { "str" string } }
-{ $description "Decodes a sequence of bytes representing a Unicode string in little endian UTF16 format. The bytes must not begin with a UTF16 byte order mark. To decode data with a byte order mark, use " { $link decode-utf16 } "." }
-{ $errors "Throws a " { $link decode-error } " if the input is malformed." } ;
-
-{ decode-utf16 decode-utf16le decode-utf16be } related-words
-
-HELP: encode-utf16be
-{ $values { "str" string } { "seq" "a sequence of bytes" } }
-{ $description "Encodes a Unicode string as a sequence of bytes in big endian UTF16 format." } ;
-
-HELP: encode-utf16le
-{ $values { "str" string } { "seq" "a sequence of bytes" } }
-{ $description "Encodes a Unicode string as a sequence of bytes in little endian UTF16 format." } ;
-
-HELP: encode-utf16
-{ $values { "str" string } { "seq" "a sequence of bytes" } }
-{ $description "Encodes a Unicode string as a sequence of bytes in UTF16 format with a byte order mark." } ;
-
-{ encode-utf16 encode-utf16be encode-utf16le } related-words
--- a/core/io/utf16/utf16-tests.factor
+++ b/core/io/utf16/utf16-tests.factor
@ -1,15 +0,0 @@
-USING: tools.test io.utf16 arrays unicode.syntax ;
-
-[ { CHAR: x } ] [ { 0 CHAR: x } decode-utf16be >array ] unit-test
-[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } decode-utf16be >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } decode-utf16be >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } decode-utf16be >array ] unit-test
-
-[ B{ 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } encode-utf16be ] unit-test
-
-[ { CHAR: x } ] [ { CHAR: x 0 } decode-utf16le >array ] unit-test
-[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } decode-utf16le >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011111 } decode-utf16le >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } decode-utf16le >array ] unit-test
-
-[ B{ 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } encode-utf16le ] unit-test
--- a/core/io/utf16/utf16.factor
+++ b/core/io/utf16/utf16.factor
@ -1,116 +0,0 @@
-! Copyright (C) 2006, 2007 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: math kernel sequences sbufs vectors namespaces io.binary
-io.encodings combinators splitting ;
-IN: io.utf16
-
-SYMBOL: double
-SYMBOL: quad1
-SYMBOL: quad2
-SYMBOL: quad3
-SYMBOL: ignore
-
-: do-ignore ( -- ch state ) 0 ignore ;
-
-: append-nums ( byte ch -- ch )
-    8 shift bitor ;
-
-: end-multibyte ( buf byte ch -- buf ch state )
-    append-nums decoded ;
-
-: begin-utf16be ( buf byte -- buf ch state )
-    dup -3 shift BIN: 11011 number= [
-        dup BIN: 00000100 bitand zero?
-        [ BIN: 11 bitand quad1 ]
-        [ drop do-ignore ] if
-    ] [ double ] if ;
-
-: handle-quad2be ( byte ch -- ch state )
-    swap dup -2 shift BIN: 110111 number= [
-        >r 2 shift r> BIN: 11 bitand bitor quad3
-    ] [ 2drop do-ignore ] if ;
-
-: (decode-utf16be) ( buf byte ch state -- buf ch state )
-    {
-        { begin [ drop begin-utf16be ] }
-        { double [ end-multibyte ] }
-        { quad1 [ append-nums quad2 ] }
-        { quad2 [ handle-quad2be ] }
-        { quad3 [ append-nums HEX: 10000 + decoded ] }
-        { ignore [ 2drop push-replacement ] }
-    } case ;
-
-: decode-utf16be ( seq -- str )
-    [ -rot (decode-utf16be) ] decode ;
-
-: handle-double ( buf byte ch -- buf ch state )
-    swap dup -3 shift BIN: 11011 = [
-        dup BIN: 100 bitand 0 number=
-        [ BIN: 11 bitand 8 shift bitor quad2 ]
-        [ 2drop push-replacement ] if
-    ] [ end-multibyte ] if ;
-
-: handle-quad3le ( buf byte ch -- buf ch state )
-    swap dup -2 shift BIN: 110111 = [
-        BIN: 11 bitand append-nums HEX: 10000 + decoded
-    ] [ 2drop push-replacement ] if ;
-
-: (decode-utf16le) ( buf byte ch state -- buf ch state )
-    {
-        { begin [ drop double ] }
-        { double [ handle-double ] }
-        { quad1 [ append-nums quad2 ] }
-        { quad2 [ 10 shift bitor quad3 ] }
-        { quad3 [ handle-quad3le ] }
-    } case ;
-
-: decode-utf16le ( seq -- str )
-    [ -rot (decode-utf16le) ] decode ;
-
-: encode-first
-    -10 shift
-    dup -8 shift BIN: 11011000 bitor
-    swap HEX: FF bitand ;
-
-: encode-second
-    BIN: 1111111111 bitand
-    dup -8 shift BIN: 11011100 bitor
-    swap BIN: 11111111 bitand ;
-
-: char>utf16be ( char -- )
-    dup HEX: FFFF > [
-        HEX: 10000 -
-        dup encode-first swap , ,
-        encode-second swap , ,
-    ] [ h>b/b , , ] if ;
-
-: encode-utf16be ( str -- seq )
-    [ [ char>utf16be ] each ] B{ } make ;
-
-: char>utf16le ( char -- )
-    dup HEX: FFFF > [
-        HEX: 10000 -
-        dup encode-first , ,
-        encode-second , ,
-    ] [ h>b/b swap , , ] if ; 
-
-: encode-utf16le ( str -- seq )
-    [ [ char>utf16le ] each ] B{ } make ;
-
-: bom-le B{ HEX: ff HEX: fe } ; inline
-
-: bom-be B{ HEX: fe HEX: ff } ; inline
-
-: encode-utf16 ( str -- seq )
-    encode-utf16le bom-le swap append ;
-
-: utf16le? ( seq1 -- seq2 ? ) bom-le ?head ;
-
-: utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
-
-: decode-utf16 ( seq -- str )
-    {
-        { [ utf16le? ] [ decode-utf16le ] }
-        { [ utf16be? ] [ decode-utf16be ] }
-        { [ t ] [ decode-error ] }
-    } cond ;
--- a/core/io/utf8/authors.txt
+++ b/core/io/utf8/authors.txt
@ -1 +0,0 @@
-Daniel Ehrenberg
--- a/core/io/utf8/summary.txt
+++ b/core/io/utf8/summary.txt
@ -1 +0,0 @@
-UTF8 encoding/decoding
--- a/core/io/utf8/utf8-docs.factor
+++ b/core/io/utf8/utf8-docs.factor
@ -1,18 +0,0 @@
-USING: help.markup help.syntax io.encodings strings ;
-IN: io.utf8
-
-ARTICLE: "io.utf8" "Working with UTF8-encoded data"
-"The UTF8 encoding is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences."
-{ $subsection encode-utf8 }
-{ $subsection decode-utf8 } ;
-
-ABOUT: "io.utf8"
-
-HELP: decode-utf8
-{ $values { "seq" "a sequence of bytes" } { "str" string } }
-{ $description "Decodes a sequence of bytes representing a Unicode string in UTF8 format." }
-{ $errors "Throws a " { $link decode-error } " if the input is malformed." } ;
-
-HELP: encode-utf8
-{ $values { "str" string } { "seq" "a sequence of bytes" } }
-{ $description "Encodes a Unicode string as a sequence of bytes in UTF8 format." } ;
--- a/core/io/utf8/utf8-tests.factor
+++ b/core/io/utf8/utf8-tests.factor
@ -1,16 +0,0 @@
-USING: io.utf8 tools.test strings arrays unicode.syntax ;
-
-[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test
-
-[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
-
-[ "x" ] [ "x" decode-utf8 >string ] unit-test
-
-[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test
-
-[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test
-
-[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
-
-[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
-[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
--- a/core/io/utf8/utf8.factor
+++ b/core/io/utf8/utf8.factor
@ -1,72 +0,0 @@
-! Copyright (C) 2006, 2007 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: math kernel sequences sbufs vectors
-namespaces io.encodings combinators ;
-IN: io.utf8
-
-SYMBOL: double
-SYMBOL: triple
-SYMBOL: triple2
-SYMBOL: quad
-SYMBOL: quad2
-SYMBOL: quad3
-
-: starts-2? ( char -- ? )
-    -6 shift BIN: 10 number= ;
-
-: append-nums ( buf bottom top state-out -- buf num state )
-    >r over starts-2?
-    [ 6 shift swap BIN: 111111 bitand bitor r> ]
-    [ r> 3drop push-replacement ] if ;
-
-: begin-utf8 ( buf byte -- buf ch state )
-    {
-        { [ dup -7 shift zero? ] [ decoded ] }
-        { [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
-        { [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
-        { [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
-        { [ t ] [ drop push-replacement ] }
-    } cond ;
-
-: end-multibyte ( buf byte ch -- buf ch state )
-    f append-nums [ decoded ] unless* ;
-
-: (decode-utf8) ( buf byte ch state -- buf ch state )
-    {
-        { begin [ drop begin-utf8 ] }
-        { double [ end-multibyte ] }
-        { triple [ triple2 append-nums ] }
-        { triple2 [ end-multibyte ] }
-        { quad [ quad2 append-nums ] }
-        { quad2 [ quad3 append-nums ] }
-        { quad3 [ end-multibyte ] }
-    } case ;
-
-: decode-utf8 ( seq -- str )
-    [ -rot (decode-utf8) ] decode ;
-
-: encoded ( char -- )
-    BIN: 111111 bitand BIN: 10000000 bitor , ;
-
-: char>utf8 ( char -- )
-    {
-        { [ dup -7 shift zero? ] [ , ] }
-        { [ dup -11 shift zero? ] [
-            dup -6 shift BIN: 11000000 bitor ,
-            encoded
-        ] }
-        { [ dup -16 shift zero? ] [
-            dup -12 shift BIN: 11100000 bitor ,
-            dup -6 shift encoded
-            encoded
-        ] }
-        { [ t ] [
-            dup -18 shift BIN: 11110000 bitor ,
-            dup -12 shift encoded
-            dup -6 shift encoded
-            encoded
-        ] }
-    } cond ;
-
-: encode-utf8 ( str -- seq )
-    [ [ char>utf8 ] each ] B{ } make ;