Merge branch 'master' of git://factorcode.org/git/factor

2009-02-04 07:42:46 -08:00 · 2009-02-04 07:42:46 -08:00 · 7282010cec
parent bda4070abb 489019acd0
commit 7282010cec
29 changed files with 342 additions and 145 deletions
--- a/basis/http/http.factor
+++ b/basis/http/http.factor
@ -3,17 +3,11 @@
 USING: accessors kernel combinators math namespaces make assocs
 sequences splitting sorting sets strings vectors hashtables
 quotations arrays byte-arrays math.parser calendar
-calendar.format present urls
-
+calendar.format present urls fry
 io io.encodings io.encodings.iana io.encodings.binary
 io.encodings.8-bit io.crlf
-
 unicode.case unicode.categories
-
 http.parsers ;
-
-EXCLUDE: fry => , ;
-
 IN: http

 : (read-header) ( -- alist )
@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ;
    " " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;

 : parse-content-type ( content-type -- type encoding )
-    ";" split1 parse-content-type-attributes "charset" swap at
-    name>encoding over "text/" head? latin1 binary ? or ;
+    ";" split1
+    parse-content-type-attributes "charset" swap at
+    [ name>encoding ]
+    [ dup "text/" head? latin1 binary ? ] if* ;
--- a/basis/http/server/server-tests.factor
+++ b/basis/http/server/server-tests.factor
@ -1,6 +1,21 @@
-USING: http http.server math sequences continuations tools.test ;
+USING: http http.server math sequences continuations tools.test
+io.encodings.utf8 io.encodings.binary accessors ;
 IN: http.server.tests

 [ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test

 \ make-http-error must-infer
+
+[ "text/plain; charset=UTF-8" ] [
+    <response>
+        "text/plain" >>content-type
+        utf8 >>content-charset
+    unparse-content-type
+] unit-test
+
+[ "text/xml" ] [
+    <response>
+        "text/xml" >>content-type
+        binary >>content-charset
+    unparse-content-type
+] unit-test
--- a/basis/http/server/server.factor
+++ b/basis/http/server/server.factor
@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- )
    tri ;

 : unparse-content-type ( request -- content-type )
-    [ content-type>> "application/octet-stream" or ]
-    [ content-charset>> encoding>name ]
-    bi
-    [ "; charset=" glue ] when* ;
+    [ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi
+    dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ;

 : ensure-domain ( cookie -- cookie )
    [
--- a/basis/io/encodings/8-bit/8-bit.factor
+++ b/basis/io/encodings/8-bit/8-bit.factor
@ -3,31 +3,33 @@
 USING: math.parser arrays io.encodings sequences kernel assocs
 hashtables io.encodings.ascii generic parser classes.tuple words
 words.symbol io io.files splitting namespaces math
-compiler.units accessors ;
+compiler.units accessors classes.singleton classes.mixin
+io.encodings.iana ;
 IN: io.encodings.8-bit

 <PRIVATE

 : mappings {
-    { "latin1" "8859-1" }
-    { "latin2" "8859-2" }
-    { "latin3" "8859-3" }
-    { "latin4" "8859-4" }
-    { "latin/cyrillic" "8859-5" }
-    { "latin/arabic" "8859-6" }
-    { "latin/greek" "8859-7" }
-    { "latin/hebrew" "8859-8" }
-    { "latin5" "8859-9" }
-    { "latin6" "8859-10" }
-    { "latin/thai" "8859-11" }
-    { "latin7" "8859-13" }
-    { "latin8" "8859-14" }
-    { "latin9" "8859-15" }
-    { "latin10" "8859-16" }
-    { "koi8-r" "KOI8-R" }
-    { "windows-1252" "CP1252" }
-    { "ebcdic" "CP037" }
-    { "mac-roman" "ROMAN" }
+    ! encoding-name iana-name file-name
+    { "latin1" "ISO_8859-1:1987" "8859-1" }
+    { "latin2" "ISO_8859-2:1987" "8859-2" }
+    { "latin3" "ISO_8859-3:1988" "8859-3" }
+    { "latin4" "ISO_8859-4:1988" "8859-4" }
+    { "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
+    { "latin/arabic" "ISO_8859-6:1987" "8859-6" }
+    { "latin/greek" "ISO_8859-7:1987" "8859-7" }
+    { "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
+    { "latin5" "ISO_8859-9:1989" "8859-9" }
+    { "latin6" "ISO-8859-10" "8859-10" }
+    { "latin/thai" "TIS-620" "8859-11" }
+    { "latin7" "ISO-8859-13" "8859-13" }
+    { "latin8" "ISO-8859-14" "8859-14" }
+    { "latin9" "ISO-8859-15" "8859-15" }
+    { "latin10" "ISO-8859-16" "8859-16" }
+    { "koi8-r" "KOI8-R" "KOI8-R" }
+    { "windows-1252" "windows-1252" "CP1252" }
+    { "ebcdic" "IBM037" "CP037" }
+    { "mac-roman" "macintosh" "ROMAN" }
 } ;

 : encoding-file ( file-name -- stream )
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;

 M: 8-bit decode-char decode>> decode-8-bit ;

-PREDICATE: 8-bit-encoding < word
-    8-bit-encodings get-global key? ;
+MIXIN: 8-bit-encoding

 M: 8-bit-encoding <encoder>
    8-bit-encodings get-global at <encoder> ;
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
 M: 8-bit-encoding <decoder>
    8-bit-encodings get-global at <decoder> ;

+: create-encoding ( name -- word )
+    "io.encodings.8-bit" create
+    [ define-singleton-class ]
+    [ 8-bit-encoding add-mixin-instance ]
+    [ ] tri ;
+
 PRIVATE>

 [
    mappings [
-        [ "io.encodings.8-bit" create ]
+        first3
+        [ create-encoding ]
+        [ dupd register-encoding ]
        [ encoding-file parse-file 8-bit boa ]
-        bi*
-    ] assoc-map
-    [ keys [ define-symbol ] each ]
-    [ 8-bit-encodings set-global ]
-    bi
+        tri*
+    ] H{ } map>assoc
+    8-bit-encodings set-global
 ] with-compilation-unit
--- a/basis/io/encodings/ascii/ascii.factor
+++ b/basis/io/encodings/ascii/ascii.factor
@ -1,6 +1,6 @@
 ! Copyright (C) 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: io io.encodings kernel math io.encodings.private ;
+USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
 IN: io.encodings.ascii

 <PRIVATE
@ -20,3 +20,5 @@ M: ascii encode-char

 M: ascii decode-char
    128 decode-if< ;
+
+ascii "ANSI_X3.4-1968" register-encoding
--- a/basis/io/encodings/chinese/chinese.factor
+++ b/basis/io/encodings/chinese/chinese.factor
@ -1,13 +1,15 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: xml xml.data kernel io io.encodings interval-maps splitting fry
-math.parser sequences combinators assocs locals accessors math 
-arrays values io.encodings.ascii ascii io.files biassocs math.order
-combinators.short-circuit io.binary ;
+math.parser sequences combinators assocs locals accessors math arrays
+byte-arrays values io.encodings.ascii ascii io.files biassocs
+math.order combinators.short-circuit io.binary io.encodings.iana ;
 IN: io.encodings.chinese

 SINGLETON: gb18030

+gb18030 "GB18030" register-encoding
+
 <PRIVATE

 ! GB to mean GB18030 is a terrible abuse of notation
@ -15,6 +17,14 @@ SINGLETON: gb18030
 ! Resource file from:
 ! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml

+! Algorithms from:
+! http://www-128.ibm.com/developerworks/library/u-china.html
+
+: linear ( bytes -- num )
+    ! This hard-codes bMin and bMax
+    reverse first4
+    10 * + 126 * + 10 * + ; foldable
+
 TUPLE: range ufirst ulast bfirst blast ;

 : b>byte-array ( string -- byte-array )
@ -25,8 +35,8 @@ TUPLE: range ufirst ulast bfirst blast ;
        {
            [ "uFirst" attr hex> ]
            [ "uLast" attr hex> ]
-            [ "bFirst" attr b>byte-array ]
-            [ "bLast" attr b>byte-array ]
+            [ "bFirst" attr b>byte-array linear ]
+            [ "bLast" attr b>byte-array linear ]
        } cleave range boa
    ] dip push ;

@ -49,21 +59,13 @@ TUPLE: range ufirst ulast bfirst blast ;
        ] each-element mapping ranges 
    ] ;

-! Algorithms from:
-! http://www-128.ibm.com/developerworks/library/u-china.html
-
-: linear ( bytes -- num )
-    ! This hard-codes bMin and bMax
-    reverse first4
-    10 * + 126 * + 10 * + ;
-
 : unlinear ( num -- bytes )
    B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear -
-    10 /mod swap [ HEX: 30 + ] dip
-    126 /mod swap [ HEX: 81 + ] dip
-    10 /mod swap [ HEX: 30 + ] dip
+    10 /mod HEX: 30 + swap
+    126 /mod HEX: 81 + swap
+    10 /mod HEX: 30 + swap
    HEX: 81 +
-    B{ } 4sequence reverse ;
+    4byte-array dup reverse-here ;

 : >interval-map-by ( start-quot end-quot value-quot seq -- interval-map )
    '[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline
@ -72,7 +74,7 @@ TUPLE: range ufirst ulast bfirst blast ;
    [ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ;

 : ranges-gb>u ( ranges -- interval-map )
-    [ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ;
+    [ bfirst>> ] [ blast>> ] [ ] >interval-map-by ;

 VALUE: gb>u
 VALUE: u>gb
@ -85,7 +87,7 @@ ascii <file-reader> xml>gb-data

 : lookup-range ( char -- byte-array )
    dup u>gb interval-at [
-        [ ufirst>> - ] [ bfirst>> linear ] bi + unlinear
+        [ ufirst>> - ] [ bfirst>> ] bi + unlinear
    ] [ encode-error ] if* ;

 M: gb18030 encode-char ( char stream encoding -- )
@ -107,19 +109,19 @@ M: gb18030 encode-char ( char stream encoding -- )
 : decode-quad ( byte-array -- char )
    dup mapping value-at [ ] [
        linear dup gb>u interval-at [
-            [ bfirst>> linear - ] [ ufirst>> ] bi +
+            [ bfirst>> - ] [ ufirst>> ] bi +
        ] [ drop replacement-char ] if*
    ] ?if ;

 : four-byte ( stream byte1 byte2 -- char )
    rot 2 swap stream-read dup last-bytes?
-    [ first2 B{ } 4sequence decode-quad ]
+    [ first2 4byte-array decode-quad ]
    [ 3drop replacement-char ] if ;

 : two-byte ( stream byte -- char )
    over stream-read1 {
        { [ dup not ] [ 3drop replacement-char ] }
-        { [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] }
+        { [ dup second-byte? ] [ 2byte-array mapping value-at nip ] }
        { [ dup quad-2/4? ] [ four-byte ] }
        [ 3drop replacement-char ]
    } cond ;
@ -127,7 +129,7 @@ M: gb18030 encode-char ( char stream encoding -- )
 M: gb18030 decode-char ( stream encoding -- char )
    drop dup stream-read1 {
        { [ dup not ] [ 2drop f ] }
-        { [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] }
+        { [ dup ascii? ] [ nip 1byte-array mapping value-at ] }
        { [ dup quad-1/3? ] [ two-byte ] }
        [ 2drop replacement-char ]
    } cond ;
--- a/basis/io/encodings/iana/iana-docs.factor
+++ b/basis/io/encodings/iana/iana-docs.factor
@ -1,12 +1,35 @@
-USING: help.syntax help.markup ;
+USING: help.syntax help.markup strings ;
 IN: io.encodings.iana

+ABOUT: "io.encodings.iana"
+
+ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
+"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
+"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:" 
+{ $subsection name>encoding }
+{ $subsection encoding>name }
+"To let a new encoding be used with the above words, use the following:"
+{ $subsection register-encoding }
+"Exceptions when encodings or names are not found:"
+{ $subsection missing-encoding }
+{ $subsection missing-name } ;
+
+HELP: missing-encoding
+{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
+
+HELP: missing-name
+{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
+
 HELP: name>encoding
 { $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
-{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
+{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;

 HELP: encoding>name
 { $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
-{ "Given an encoding descriptor, return the preferred IANA name." } ;
+{ $description "Given an encoding descriptor, return the preferred IANA name." } ;

 { name>encoding encoding>name } related-words
+
+HELP: register-encoding
+{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
+{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;
--- a/basis/io/encodings/iana/iana-tests.factor
+++ b/basis/io/encodings/iana/iana-tests.factor
@ -1,5 +1,28 @@
-USING: io.encodings.iana io.encodings.ascii tools.test ;
+USING: io.encodings.iana io.encodings.iana.private
+io.encodings.utf8 tools.test assocs namespaces ;
+IN: io.encodings.iana.tests

-[ ascii ] [ "US-ASCII" name>encoding ] unit-test
-[ ascii ] [ "ASCII" name>encoding ] unit-test
-[ "US-ASCII" ] [ ascii encoding>name ] unit-test
+[ utf8 ] [ "UTF-8" name>encoding ] unit-test
+[ utf8 ] [ "utf8" name>encoding ] unit-test
+[ "UTF-8" ] [ utf8 encoding>name ] unit-test
+
+! We will never implement EBCDIC-FI-SE-A
+SINGLETON: ebcdic-fisea
+ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
+[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
+[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
+[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
+
+! Clean up after myself
+[ ] [
+    "EBCDIC-FI-SE-A" n>e-table get delete-at
+    "csEBCDICFISEA" n>e-table get delete-at
+    ebcdic-fisea e>n-table get delete-at
+] unit-test
+[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
+[ "csEBCDICFISEA" name>encoding ] must-fail
+[ ebcdic-fisea encoding>name ] must-fail
+
+[ ebcdic-fisea "foobar" register-encoding ] must-fail
+[ "foobar" name>encoding ] must-fail
+[ ebcdic-fisea encoding>name ] must-fail
--- a/basis/io/encodings/iana/iana.factor
+++ b/basis/io/encodings/iana/iana.factor
@ -1,41 +1,24 @@
 ! Copyright (C) 2008 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel strings values io.files assocs
-splitting sequences io namespaces sets io.encodings.8-bit
-io.encodings.ascii io.encodings.utf8 io.encodings.utf16 
-io.encodings.chinese io.encodings.japanese ;
+splitting sequences io namespaces sets io.encodings.utf8 ;
 IN: io.encodings.iana

 <PRIVATE
-VALUE: n>e-table
-
-: e>n-table H{
-    { ascii "US-ASCII" }
-    { utf8 "UTF-8" }
-    { utf16 "UTF-16" }
-    { utf16be "UTF-16BE" }
-    { utf16le "UTF-16LE" }
-    { latin1 "ISO-8859-1" }
-    { latin2 "ISO-8859-2" }
-    { latin3 "ISO-8859-3" }
-    { latin4 "ISO-8859-4" }
-    { latin/cyrillic "ISO-8859-5" }
-    { latin/arabic "ISO-8859-6" }
-    { latin/greek "ISO-8859-7" }
-    { latin/hebrew "ISO-8859-8" }
-    { latin5 "ISO-8859-9" }
-    { latin6 "ISO-8859-10" }
-    { shift-jis "Shift_JIS" }
-    { windows-31j "Windows-31J" }
-    { gb18030 "GB18030" }
-} ;
+SYMBOL: n>e-table
+SYMBOL: e>n-table
+SYMBOL: aliases
 PRIVATE>

+ERROR: missing-encoding name ;
+
 : name>encoding ( name -- encoding )
-    n>e-table at ;
+    dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
+
+ERROR: missing-name encoding ;

 : encoding>name ( encoding -- name )
-    e>n-table at ;
+    dup e>n-table get-global at [ ] [ missing-name ] ?if ;

 <PRIVATE
 : parse-iana ( stream -- synonym-set )
@ -43,24 +26,33 @@ PRIVATE>
        [ " " split ] map
        [ first { "Name:" "Alias:" } member? ] filter
        [ second ] map { "None" } diff
-    ] map ;
+    ] map harvest ;

-: more-aliases ( -- assoc )
+: make-aliases ( stream -- n>e )
+    parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
+
+: initial-n>e ( -- assoc )
    H{
        { "UTF8" utf8 }
        { "utf8" utf8 }
        { "utf-8" utf8 }
-    } ;
+        { "UTF-8" utf8 }
+    } clone ;
+
+: initial-e>n ( -- assoc )
+    H{ { utf8 "UTF-8" } } clone ;

-: make-n>e ( stream -- n>e )
-    parse-iana [ [
-        dup [
-            e>n-table value-at
-            [ swap [ set ] with each ]
-            [ drop ] if*
-        ] with each
-    ] each ] H{ } make-assoc more-aliases assoc-union ;
 PRIVATE>

 "resource:basis/io/encodings/iana/character-sets"
-ascii <file-reader> make-n>e to: n>e-table
+utf8 <file-reader> make-aliases aliases set-global
+
+n>e-table global [ initial-n>e or ] change-at
+e>n-table global [ initial-e>n or ] change-at
+
+: register-encoding ( descriptor name -- )
+    [
+        aliases get at [
+            [ n>e-table get-global set-at ] with each
+        ] [ "Bad encoding registration" throw ] if*
+    ] [ swap e>n-table get-global set-at ] 2bi ;
--- a/basis/io/encodings/japanese/japanese-docs.factor
+++ b/basis/io/encodings/japanese/japanese-docs.factor
@ -4,7 +4,7 @@ USING: help.markup help.syntax ;
 IN: io.encodings.japanese

 ARTICLE: "io.encodings.japanese" "Japanese text encodings"
-"The " { $vocab-link "io.encodings.japanese" } " vocabulary implements Japanese-specific text encodings. Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
+"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
 { $subsection shift-jis }
 { $subsection windows-31j } ;

--- a/basis/io/encodings/japanese/japanese.factor
+++ b/basis/io/encodings/japanese/japanese.factor
@ -1,17 +1,31 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: sequences kernel io io.files combinators.short-circuit
-math.order values assocs io.encodings io.binary fry strings
-math io.encodings.ascii arrays accessors splitting math.parser
-biassocs ;
+math.order values assocs io.encodings io.binary fry strings math
+io.encodings.ascii arrays byte-arrays accessors splitting
+math.parser biassocs io.encodings.iana ;
 IN: io.encodings.japanese

-VALUE: shift-jis
+SINGLETON: shift-jis

-VALUE: windows-31j
+shift-jis "Shift_JIS" register-encoding
+
+SINGLETON: windows-31j
+
+windows-31j "Windows-31J" register-encoding

 <PRIVATE

+VALUE: shift-jis-table
+
+M: shift-jis <encoder> drop shift-jis-table <encoder> ;
+M: shift-jis <decoder> drop shift-jis-table <decoder> ;
+
+VALUE: windows-31j-table
+
+M: windows-31j <encoder> drop windows-31j-table <encoder> ;
+M: windows-31j <decoder> drop windows-31j-table <decoder> ;
+
 TUPLE: jis assoc ;

 : <jis> ( assoc -- jis )
@ -31,17 +45,17 @@ TUPLE: jis assoc ;
    ascii file-lines process-jis <jis> ;

 "resource:basis/io/encodings/japanese/CP932.txt"
-make-jis to: windows-31j
+make-jis to: windows-31j-table

 "resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
-make-jis to: shift-jis
+make-jis to: shift-jis-table

 : small? ( char -- ? )
    ! ASCII range or single-byte halfwidth katakana
    { [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;

 : write-halfword ( stream halfword -- )
-    h>b/b swap B{ } 2sequence swap stream-write ;
+    h>b/b swap 2byte-array swap stream-write ;

 M: jis encode-char
    swapd ch>jis
--- a/basis/io/encodings/utf16/utf16.factor
+++ b/basis/io/encodings/utf16/utf16.factor
@ -1,15 +1,21 @@
-! Copyright (C) 2006, 2008 Daniel Ehrenberg.
+! Copyright (C) 2006, 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel sequences sbufs vectors namespaces io.binary
-io.encodings combinators splitting io byte-arrays ;
+io.encodings combinators splitting io byte-arrays io.encodings.iana ;
 IN: io.encodings.utf16

 SINGLETON: utf16be

+utf16be "UTF-16BE" register-encoding
+
 SINGLETON: utf16le

+utf16le "UTF-16LE" register-encoding
+
 SINGLETON: utf16

+utf16 "UTF-16" register-encoding
+
 ERROR: missing-bom ;

 <PRIVATE
--- a/basis/io/encodings/utf32/utf32.factor
+++ b/basis/io/encodings/utf32/utf32.factor
@ -1,15 +1,21 @@
 ! Copyright (C) 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel io.encodings combinators io io.encodings.utf16
-sequences io.binary ;
+sequences io.binary io.encodings.iana ;
 IN: io.encodings.utf32

 SINGLETON: utf32be

+utf32be "UTF-32BE" register-encoding
+
 SINGLETON: utf32le

+utf32le "UTF-32LE" register-encoding
+
 SINGLETON: utf32

+utf32 "UTF-32" register-encoding
+
 <PRIVATE

 ! Decoding
--- a/basis/stack-checker/transforms/transforms.factor
+++ b/basis/stack-checker/transforms/transforms.factor
@ -70,7 +70,7 @@ IN: stack-checker.transforms
    [
        [ no-case ]
    ] [
-        dup peek quotation? [
+        dup peek callable? [
            dup peek swap but-last
        ] [
            [ no-case ] swap
--- a/basis/wrap/authors.txt
+++ b/basis/wrap/authors.txt
@ -1 +1,2 @@
 Daniel Ehrenberg
+Slava Pestov
--- a/basis/wrap/wrap-docs.factor
+++ b/basis/wrap/wrap-docs.factor
@ -0,0 +1,41 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup strings math kernel ;
+IN: wrap
+
+ABOUT: "wrap"
+
+ARTICLE: "wrap" "Word wrapping"
+"The " { $vocab-link "wrap" } " vocabulary implements word wrapping. There is support for simple string wrapping, with the following words:"
+{ $subsection wrap-lines }
+{ $subsection wrap-string }
+{ $subsection wrap-indented-string }
+"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
+{ $subsection wrap }
+{ $subsection word }
+{ $subsection <word> } ;
+
+HELP: wrap-lines
+{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
+{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
+
+HELP: wrap-string
+{ $values { "string" string } { "width" integer } { "newstring" string } }
+{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
+
+HELP: wrap-indented-string
+{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
+{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
+
+HELP: wrap
+{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
+{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
+
+HELP: word
+{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
+{ $see-also wrap } ;
+
+HELP: <word>
+{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
+{ $description "Creates a " { $link word } " object with the given parameters." }
+{ $see-also wrap } ;
--- a/basis/wrap/wrap-tests.factor
+++ b/basis/wrap/wrap-tests.factor
@ -1,5 +1,7 @@
-IN: wrap.tests
+! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
+! See http://factorcode.org/license.txt for BSD license.
 USING: tools.test wrap multiline sequences ;
+IN: wrap.tests
    
 [
    {
@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ;
    } 35 wrap [ { } like ] map
 ] unit-test

+[
+    {
+        {
+            T{ word f 1 10 f }
+            T{ word f 2 10 f }
+            T{ word f 3 9 t }
+            T{ word f 3 9 t }
+            T{ word f 3 9 t }
+        }
+        {
+            T{ word f 4 10 f }
+            T{ word f 5 10 f }
+        }
+    }
+] [
+    {
+        T{ word f 1 10 f }
+        T{ word f 2 10 f }
+        T{ word f 3 9 t }
+        T{ word f 3 9 t }
+        T{ word f 3 9 t }
+        T{ word f 4 10 f }
+        T{ word f 5 10 f }
+    } 35 wrap [ { } like ] map
+] unit-test
+
 [
    <" This is a
 long piece
@ -45,4 +73,10 @@ word wrap.">
 ] [
    <" This is a long piece of text that we wish to word wrap."> 12
    "  " wrap-indented-string
-] unit-test
+] unit-test
+
+[ "this text\nhas lots of\nspaces" ]
+[ "this text        has lots of       spaces" 12 wrap-string ] unit-test
+
+[ "hello\nhow\nare\nyou\ntoday?" ]
+[ "hello how are you today?" 3 wrap-string ] unit-test
--- a/basis/wrap/wrap.factor
+++ b/basis/wrap/wrap.factor
@ -1,3 +1,5 @@
+! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
+! See http://factorcode.org/license.txt for BSD license.
 USING: sequences kernel namespaces make splitting
 math math.order fry assocs accessors ;
 IN: wrap
@ -15,12 +17,25 @@ SYMBOL: width
 : break-here? ( column word -- ? )
    break?>> not [ width get > ] [ drop f ] if ;

+: walk ( n words -- n )
+    ! If on a break, take the rest of the breaks
+    ! If not on a break, go back until you hit a break
+    2dup bounds-check? [
+        2dup nth break?>>
+        [ [ break?>> not ] find-from drop ]
+        [ [ break?>> ] find-last-from drop 1+ ] if
+   ] [ drop ] if ;
+
 : find-optimal-break ( words -- n )
-    [ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ;
+    [ 0 ] keep
+    [ [ width>> + dup ] keep break-here? ] find drop nip
+    [ 1 max swap walk ] [ drop f ] if* ;

 : (wrap) ( words -- )
-    dup find-optimal-break
-    [ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ;
+    [
+        dup find-optimal-break
+        [ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
+    ] unless-empty ;

 : intersperse ( seq elt -- seq' )
    [ '[ _ , ] [ , ] interleave ] { } make ;
@ -34,9 +49,7 @@ SYMBOL: width

 : join-words ( wrapped-lines -- lines )
    [
-        [ break?>> ]
-        [ trim-head-slice ]
-        [ trim-tail-slice ] bi
+        [ break?>> ] trim-slice
        [ key>> ] map concat
    ] map ;

--- a/basis/xml/tests/encodings.factor
+++ b/basis/xml/tests/encodings.factor
@ -1,4 +1,5 @@
-USING: xml xml.data xml.utilities tools.test accessors kernel ;
+USING: xml xml.data xml.utilities tools.test accessors kernel
+io.encodings.8-bit ;

 [ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
 [ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
@ -11,4 +12,4 @@ USING: xml xml.data xml.utilities tools.test accessors kernel ;
 [ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
 [ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
 [ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
-[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
+[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
--- a/core/byte-arrays/byte-arrays-tests.factor
+++ b/core/byte-arrays/byte-arrays-tests.factor
@ -9,3 +9,5 @@ USING: tools.test byte-arrays sequences kernel ;
 [ B{ 1 2 } ] [ 2 B{ 1 2 3 4 5 6 7 8 9 } resize-byte-array ] unit-test

 [ -10 B{ } resize-byte-array ] must-fail
+
+[ B{ 123 } ] [ 123 1byte-array ] unit-test
--- a/core/byte-arrays/byte-arrays.factor
+++ b/core/byte-arrays/byte-arrays.factor
@ -1,4 +1,4 @@
-! Copyright (C) 2007, 2008 Slava Pestov.
+! Copyright (C) 2007, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors kernel kernel.private alien.accessors sequences
 sequences.private math ;
@ -19,7 +19,7 @@ M: byte-array resize

 INSTANCE: byte-array sequence

-: 1byte-array ( x -- byte-array ) 1 <byte-array> [ set-first ] keep ; inline
+: 1byte-array ( x -- byte-array ) B{ } 1sequence ; inline

 : 2byte-array ( x y -- byte-array ) B{ } 2sequence ; inline

--- a/core/combinators/combinators-tests.factor
+++ b/core/combinators/combinators-tests.factor
@ -1,6 +1,6 @@
 USING: alien strings kernel math tools.test io prettyprint
 namespaces combinators words classes sequences accessors 
-math.functions ;
+math.functions arrays ;
 IN: combinators.tests

 ! Compiled
@ -314,3 +314,13 @@ IN: combinators.tests
 \ test-case-7 must-infer

 [ "plus" ] [ \ + test-case-7 ] unit-test
+
+! Some corner cases (no pun intended)
+DEFER: corner-case-1
+
+<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >>
+
+[ t ] [ \ corner-case-1 optimized>> ] unit-test
+[ 4 ] [ 2 corner-case-1 ] unit-test
+
+[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test
--- a/core/combinators/combinators.factor
+++ b/core/combinators/combinators.factor
@ -59,13 +59,13 @@ ERROR: no-case ;
            ] [
                dup wrapper? [ wrapped>> ] when
            ] if =
-        ] [ quotation? ] if
+        ] [ callable? ] if
    ] find nip ;

 : case ( obj assoc -- )
    case-find {
        { [ dup array? ] [ nip second call ] }
-        { [ dup quotation? ] [ call ] }
+        { [ dup callable? ] [ call ] }
        { [ dup not ] [ no-case ] }
    } cond ;

--- a/core/continuations/continuations-docs.factor
+++ b/core/continuations/continuations-docs.factor
@ -246,8 +246,8 @@ HELP: retry
 { $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
 { $examples
    "Try to get a 0 as a random number:"
-    { $unchecked-example "USING: continuations math prettyprint ;"
-        "[ 5 random 0 = ] 5 retry t"
+    { $unchecked-example "USING: continuations math prettyprint random ;"
+        "[ 5 random 0 = ] 5 retry"
        "t"
    }
 } ;
--- a/core/sequences/sequences-docs.factor
+++ b/core/sequences/sequences-docs.factor
@ -207,6 +207,10 @@ HELP: first4-unsafe
 { $values { "seq" sequence } { "first" "the first element" } { "second" "the second element" } { "third" "the third element" } { "fourth" "the fourth element" } }
 { $contract "Unsafe variant of " { $link first4 } " that does not perform bounds checks." } ;

+HELP: 1sequence
+{ $values { "obj" object } { "exemplar" sequence } { "seq" sequence } }
+{ $description "Creates a one-element sequence of the same type as " { $snippet "exemplar" } "." } ;
+
 HELP: 2sequence
 { $values { "obj1" object } { "obj2" object } { "exemplar" sequence } { "seq" sequence } }
 { $description "Creates a two-element sequence of the same type as " { $snippet "exemplar" } "." } ;
--- a/core/sequences/sequences.factor
+++ b/core/sequences/sequences.factor
@ -137,9 +137,12 @@ INSTANCE: iota immutable-sequence

 : from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline

+: (1sequence) ( obj seq -- seq )
+    [ 0 swap set-nth-unsafe ] keep ; inline
+
 : (2sequence) ( obj1 obj2 seq -- seq )
    [ 1 swap set-nth-unsafe ] keep
-    [ 0 swap set-nth-unsafe ] keep ; inline
+    (1sequence) ; inline

 : (3sequence) ( obj1 obj2 obj3 seq -- seq )
    [ 2 swap set-nth-unsafe ] keep
@ -151,6 +154,9 @@ INSTANCE: iota immutable-sequence

 PRIVATE>

+: 1sequence ( obj exemplar -- seq )
+    1 swap [ (1sequence) ] new-like ; inline
+
 : 2sequence ( obj1 obj2 exemplar -- seq )
    2 swap [ (2sequence) ] new-like ; inline

--- a/core/vectors/vectors-tests.factor
+++ b/core/vectors/vectors-tests.factor
@ -97,3 +97,5 @@ IN: vectors.tests
 [ fixnum ] [ 1 >bignum V{ } new-sequence length class ] unit-test

 [ fixnum ] [ 1 >bignum [ ] V{ } map-as length class ] unit-test
+
+[ V{ "lulz" } ] [ "lulz" 1vector ] unit-test
--- a/core/vectors/vectors.factor
+++ b/core/vectors/vectors.factor
@ -40,7 +40,7 @@ M: sequence new-resizable drop <vector> ;

 INSTANCE: vector growable

-: 1vector ( x -- vector ) 1array >vector ;
+: 1vector ( x -- vector ) V{ } 1sequence ;

 : ?push ( elt seq/f -- seq )
    [ 1 <vector> ] unless* [ push ] keep ;
--- a/extra/reports/noise/noise.factor
+++ b/extra/reports/noise/noise.factor
@ -53,7 +53,6 @@ IN: reports.noise
        { nipd 3 }
        { nkeep 5 }
        { npick 6 }
-        { nrev 5 }
        { nrot 5 }
        { nslip 5 }
        { ntuck 6 }