Merge branch 'master' of git://factorcode.org/git/factor

2009-01-26 09:00:48 -06:00 · 2009-01-26 09:00:48 -06:00 · 9985b1ee2b
parent c2df6db543 c440b889ea
commit 9985b1ee2b
40 changed files with 487 additions and 343 deletions
--- a/basis/ascii/ascii-docs.factor
+++ b/basis/ascii/ascii-docs.factor
@ -57,8 +57,10 @@ HELP: >upper
 { $values { "str" "a string" } { "upper" "a string" } }
 { $description "Converts an ASCII string to upper case." } ;
-ARTICLE: "ascii" "ASCII character classes"
+ARTICLE: "ascii" "ASCII"
-"The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:"
+"The " { $vocab-link "ascii" } " vocabulary implements support for the legacy ASCII character set. Most applications should use " { $link "unicode" } " instead."
 $nl
 "ASCII character classes:"
 { $subsection blank? }
 { $subsection letter? }
 { $subsection LETTER? }
@ -67,11 +69,10 @@ ARTICLE: "ascii" "ASCII character classes"
 { $subsection control? }
 { $subsection quotable? }
 { $subsection ascii? }
-"ASCII case conversion is also implemented:"
+"ASCII case conversion:"
 { $subsection ch>lower }
 { $subsection ch>upper }
 { $subsection >lower }
-{ $subsection >upper }
+{ $subsection >upper } ;
 "Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ;
 ABOUT: "ascii"
--- a/basis/ascii/ascii.factor
+++ b/basis/ascii/ascii.factor
@ -1,41 +1,23 @@
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel math math.order sequences
+USING: kernel math math.order sequences strings
-combinators.short-circuit ;
+combinators.short-circuit hints ;
 IN: ascii
 : ascii? ( ch -- ? ) 0 127 between? ; inline
 : blank? ( ch -- ? ) " \t\n\r" member? ; inline
 : letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline
 : LETTER? ( ch -- ? ) CHAR: A CHAR: Z between? ; inline
 : digit? ( ch -- ? ) CHAR: 0 CHAR: 9 between? ; inline
 : printable? ( ch -- ? ) CHAR: \s CHAR: ~ between? ; inline
 : control? ( ch -- ? ) "\0\e\r\n\t\u000008\u00007f" member? ; inline
 : quotable? ( ch -- ? ) { [ printable? ] [ "\"\\" member? not ] } 1&& ; inline
 : Letter? ( ch -- ? ) { [ letter? ] [ LETTER? ] } 1|| ; inline
 : alpha? ( ch -- ? ) { [ Letter? ] [ digit? ] } 1|| ; inline
 : ch>lower ( ch -- lower ) dup LETTER? [ HEX: 20 + ] when ; inline
 : >lower ( str -- lower ) [ ch>lower ] map ;
 : ch>upper ( ch -- upper ) dup letter? [ HEX: 20 - ] when ; inline
 : >upper ( str -- upper ) [ ch>upper ] map ;
-: control? ( ch -- ? )
+HINTS: >lower string ;
-    "\0\e\r\n\t\u000008\u00007f" member? ; inline
+HINTS: >upper string ;
 : quotable? ( ch -- ? )
    dup printable? [ "\"\\" member? not ] [ drop f ] if ; inline
 : Letter? ( ch -- ? )
    [ [ letter? ] [ LETTER? ] ] 1|| ;
 : alpha? ( ch -- ? )
    [ [ Letter? ] [ digit? ] ] 1|| ;
 : ch>lower ( ch -- lower )
   dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ;
 : >lower ( str -- lower )
   [ ch>lower ] map ;
 : ch>upper ( ch -- upper )
    dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ;
 : >upper ( str -- upper )
    [ ch>upper ] map ;
--- a/basis/bootstrap/image/image.factor
+++ b/basis/bootstrap/image/image.factor
@ -1,4 +1,4 @@
-! Copyright (C) 2004, 2008 Slava Pestov.
+! Copyright (C) 2004, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: alien arrays byte-arrays generic assocs hashtables assocs
 hashtables.private io io.binary io.files io.encodings.binary
@ -10,7 +10,7 @@ classes.tuple.private words.private vocabs
 vocabs.loader source-files definitions debugger
 quotations.private sequences.private combinators
 math.order math.private accessors
-slots.private compiler.units ;
+slots.private compiler.units fry ;
 IN: bootstrap.image
 : arch ( os cpu -- arch )
@ -73,7 +73,7 @@ SYMBOL: objects
 : put-object ( n obj -- ) (objects) set-at ;
 : cache-object ( obj quot -- value )
-    [ (objects) ] dip [ obj>> ] prepose cache ; inline
+    [ (objects) ] dip '[ obj>> @ ] cache ; inline
 ! Constants
@ -95,7 +95,7 @@ SYMBOL: objects
 SYMBOL: sub-primitives
 : make-jit ( quot rc rt offset -- quad )
-    { [ { } make ] [ ] [ ] [ ] } spread 4array ; inline
+    [ { } make ] 3dip 4array ; inline
 : jit-define ( quot rc rt offset name -- )
    [ make-jit ] dip set ; inline
@ -524,11 +524,9 @@ M: quotation '
 ! Image output
 : (write-image) ( image -- )
-    bootstrap-cell big-endian get [
+    bootstrap-cell big-endian get
-        [ >be write ] curry each
+    [ '[ _ >be write ] each ]
-    ] [
+    [ '[ _ >le write ] each ] if ;
        [ >le write ] curry each
    ] if ;
 : write-image ( image -- )
    "Writing image to " write
--- a/basis/bootstrap/unicode/unicode.factor
+++ b/basis/bootstrap/unicode/unicode.factor
@ -0,0 +1 @@
 USE: unicode
--- a/basis/deques/deques.factor
+++ b/basis/deques/deques.factor
@ -1,6 +1,6 @@
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences math ;
+USING: kernel sequences math fry ;
 IN: deques
 GENERIC: push-front* ( obj deque -- node )
@ -34,7 +34,8 @@ GENERIC: deque-empty? ( deque -- ? )
    [ peek-back ] [ pop-back* ] bi ;
 : slurp-deque ( deque quot -- )
-    [ drop [ deque-empty? not ] curry ]
+    [ drop '[ _ deque-empty? not ] ]
-    [ [ pop-back ] prepose curry ] 2bi [ ] while ; inline
+    [ '[ _ pop-back @ ] ]
    2bi [ ] while ; inline
 MIXIN: deque
--- a/basis/dlists/dlists.factor
+++ b/basis/dlists/dlists.factor
@ -1,8 +1,8 @@
-! Copyright (C) 2007, 2008 Mackenzie Straight, Doug Coleman,
+! Copyright (C) 2007, 2009 Mackenzie Straight, Doug Coleman,
 ! Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: combinators kernel math sequences accessors deques
-search-deques summary hashtables ;
+search-deques summary hashtables fry ;
 IN: dlists
 <PRIVATE
@ -64,7 +64,7 @@ M: dlist-node node-value obj>> ;
    [ front>> ] dip (dlist-find-node) ; inline
 : dlist-each-node ( dlist quot -- )
-    [ f ] compose dlist-find-node 2drop ; inline
+    '[ @ f ] dlist-find-node 2drop ; inline
 : unlink-node ( dlist-node -- )
    dup prev>> over next>> set-prev-when
@ -115,8 +115,7 @@ M: dlist pop-back* ( dlist -- )
    normalize-front ;
 : dlist-find ( dlist quot -- obj/f ? )
-    [ obj>> ] prepose
+    '[ obj>> @ ] dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
    dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
 : dlist-contains? ( dlist quot -- ? )
    dlist-find nip ; inline
@ -143,7 +142,7 @@ M: dlist delete-node ( dlist-node dlist -- )
    ] if ; inline
 : delete-node-if ( dlist quot -- obj/f )
-    [ obj>> ] prepose delete-node-if* drop ; inline
+    '[ obj>> @ ] delete-node-if* drop ; inline
 M: dlist clear-deque ( dlist -- )
    f >>front
@ -151,7 +150,7 @@ M: dlist clear-deque ( dlist -- )
    drop ;
 : dlist-each ( dlist quot -- )
-    [ obj>> ] prepose dlist-each-node ; inline
+    '[ obj>> @ ] dlist-each-node ; inline
 : dlist>seq ( dlist -- seq )
    [ ] accumulator [ dlist-each ] dip ;
@ -159,8 +158,6 @@ M: dlist clear-deque ( dlist -- )
 : 1dlist ( obj -- dlist ) <dlist> [ push-front ] keep ;
 M: dlist clone
-    <dlist> [
+    <dlist> [ '[ _ push-back ] dlist-each ] keep ;
        [ push-back ] curry dlist-each
    ] keep ;
 INSTANCE: dlist deque
--- a/basis/grouping/grouping.factor
+++ b/basis/grouping/grouping.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2005, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel math math.order strings arrays vectors sequences
-sequences.private accessors ;
+sequences.private accessors fry ;
 IN: grouping
 <PRIVATE
@ -94,7 +94,7 @@ INSTANCE: sliced-clumps slice-chunking
            [ first2-unsafe ] dip call
        ] [
            [ 2 <sliced-clumps> ] dip
-            [ first2-unsafe ] prepose all?
+            '[ first2-unsafe @ ] all?
        ] if
    ] if ; inline
--- a/basis/interpolate/interpolate.factor
+++ b/basis/interpolate/interpolate.factor
@ -1,9 +1,11 @@
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: io kernel macros make multiline namespaces parser
 present sequences strings splitting fry accessors ;
 IN: interpolate
 <PRIVATE
 TUPLE: interpolate-var name ;
 : (parse-interpolate) ( string -- )
@ -20,21 +22,22 @@ TUPLE: interpolate-var name ;
 : parse-interpolate ( string -- seq )
    [ (parse-interpolate) ] { } make ;
-MACRO: interpolate ( string -- )
+: (interpolate) ( string quot -- quot' )
-    parse-interpolate [
+    [ parse-interpolate ] dip '[
        dup interpolate-var?
-        [ name>> '[ _ get present write ] ]
+        [ name>> @ '[ _ @ present write ] ]
        [ '[ _ write ] ]
        if
-    ] map [ ] join ;
+    ] map [ ] join ; inline
 PRIVATE>
 MACRO: interpolate ( string -- )
    [ [ get ] ] (interpolate) ;
 : interpolate-locals ( string -- quot )
-    parse-interpolate [
+    [ search [ ] ] (interpolate) ;
        dup interpolate-var?
        [ name>> search '[ _ present write ] ]
        [ '[ _ write ] ]
        if
    ] map [ ] join ;
-: I[ "]I" parse-multiline-string
+: I[
-    interpolate-locals parsed \ call parsed ; parsing
+    "]I" parse-multiline-string
    interpolate-locals over push-all ; parsing
--- a/basis/locals/locals-tests.factor
+++ b/basis/locals/locals-tests.factor
@ -490,4 +490,8 @@ M:: integer lambda-method-forget-test ( a -- b ) ;
 [ 10 ] [
    [| | 0 '[ [let | A [ 10 ] | A _ + ] ] call ] call
-] unit-test
+] unit-test
 ! Discovered by littledan
 [ "bar" ] [ [let | a [ [let | foo [ "bar" ] | foo ] ] | a ] ] unit-test
 [ 10 ] [ [let | a [ 10 ] | [let | b [ a ] | b ] ] ] unit-test
--- a/basis/locals/parser/parser.factor
+++ b/basis/locals/parser/parser.factor
@ -1,7 +1,7 @@
-! Copyright (C) 2007, 2008 Slava Pestov, Eduardo Cavazos.
+! Copyright (C) 2007, 2009 Slava Pestov, Eduardo Cavazos.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays combinators effects.parser
-generic.parser kernel lexer locals.errors
+generic.parser kernel lexer locals.errors fry
 locals.rewrite.closures locals.types make namespaces parser
 quotations sequences splitting words vocabs.parser ;
 IN: locals.parser
@ -56,19 +56,21 @@ SYMBOL: in-lambda?
        (parse-bindings)
    ] [ 2drop ] if ;
 : with-bindings ( quot -- words assoc )
    '[
        in-lambda? on
        _ H{ } make-assoc
    ] { } make swap ; inline
 : parse-bindings ( end -- bindings vars )
-    [
+    [ (parse-bindings) ] with-bindings ;
        [ (parse-bindings) ] H{ } make-assoc
    ] { } make swap ;
 : parse-bindings* ( end -- words assoc )
    [
-        [
+        namespace push-locals
-            namespace push-locals
+        (parse-bindings)
-            (parse-bindings)
+        namespace pop-locals
-            namespace pop-locals
+    ] with-bindings ;
        ] { } make-assoc
    ] { } make swap ;
 : (parse-wbindings) ( end -- )
    dup parse-binding dup [
@ -77,9 +79,7 @@ SYMBOL: in-lambda?
    ] [ 2drop ] if ;
 : parse-wbindings ( end -- bindings vars )
-    [
+    [ (parse-wbindings) ] with-bindings ;
        [ (parse-wbindings) ] H{ } make-assoc
    ] { } make swap ;
 : parse-locals ( -- vars assoc )
    "(" expect ")" parse-effect
@ -88,8 +88,8 @@ SYMBOL: in-lambda?
 : parse-locals-definition ( word -- word quot )
    parse-locals \ ; (parse-lambda) <lambda>
-    2dup "lambda" set-word-prop
+    [ "lambda" set-word-prop ]
-    rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ;
+    [ rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ] 2bi ;
 : (::) ( -- word def ) CREATE-WORD parse-locals-definition ;
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -287,9 +287,13 @@ IN: regexp-tests
 [ { "1" "2" "3" "4" } ]
 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
-[ { "1" "2" "3" "4" } ]
+[ { "1" "2" "3" "4" "" } ]
 [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
 [ { "" } ] [ "" R/ =/ re-split [ >string ] map ] unit-test
 [ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test
 [ { "ABC" "DEF" "GHI" } ]
 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
@ -299,16 +303,16 @@ IN: regexp-tests
 [ 0 ]
 [ "123" R/ [A-Z]+/ count-matches ] unit-test
-[ "1.2.3.4" ]
+[ "1.2.3.4." ]
 [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
-
+  
-[ "-- title --" ] [ "== title ==" "=" <regexp> "-" re-replace ] unit-test
+[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
 /*
 ! FIXME
 [ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
 [ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
-! [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
 [ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
@ -319,7 +323,7 @@ IN: regexp-tests
 */
 ! Bug in parsing word
-[ t ] [ "a" R' a' matches?  ] unit-test
+[ t ] [ "a" R' a' matches? ] unit-test
 ! Convert to lowercase until E
 [ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -61,8 +61,11 @@ IN: regexp
    dupd first-match
    [ split1-slice swap ] [ "" like f swap ] if* ;
 : (re-split) ( string regexp -- )
    over [ [ re-cut , ] keep (re-split) ] [ 2drop ] if ;
 : re-split ( string regexp -- seq )
-    [ dup length 0 > ] swap '[ _ re-cut ] [ ] produce nip ;
+    [ (re-split) ] { } make ;
 : re-replace ( string regexp replacement -- result )
    [ re-split ] dip join ;
--- a/basis/syndication/syndication-tests.factor
+++ b/basis/syndication/syndication-tests.factor
@ -1,5 +1,5 @@
 USING: syndication io kernel io.files tools.test io.encodings.utf8
-calendar urls ;
+calendar urls xml.writer ;
 IN: syndication.tests
 \ download-feed must-infer
@ -43,3 +43,4 @@ IN: syndication.tests
        }
    }
 } ] [ "resource:basis/syndication/test/atom.xml" load-news-file ] unit-test
 [ ] [ "resource:basis/syndication/test/atom.xml" load-news-file feed>xml xml>string drop ] unit-test
--- a/basis/syndication/syndication.factor
+++ b/basis/syndication/syndication.factor
@ -1,10 +1,10 @@
 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
 ! Portions copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: xml.utilities kernel assocs xml.generator math.order
+USING: xml.utilities kernel assocs math.order
    strings sequences xml.data xml.writer
    io.streams.string combinators xml xml.entities.html io.files io
-    http.client namespaces make xml.generator hashtables
+    http.client namespaces make xml.interpolate hashtables
    calendar.format accessors continuations urls present ;
 IN: syndication
@ -114,26 +114,31 @@ TUPLE: entry title url description date ;
    http-get nip string>feed ;
 ! Atom generation
 : simple-tag, ( content name -- )
    [ , ] tag, ;
-: simple-tag*, ( content name attrs -- )
+: entry>xml ( entry -- xml )
-    [ , ] tag*, ;
+    {
-
+        [ title>> ]
-: entry, ( entry -- )
+        [ url>> present ]
-    "entry" [
+        [ date>> timestamp>rfc3339 ]
-        {
+        [ description>> ]
-            [ title>> "title" { { "type" "html" } } simple-tag*, ]
+    } cleave
-            [ url>> present "href" associate "link" swap contained*, ]
+    [XML
-            [ date>> timestamp>rfc3339 "published" simple-tag, ]
+        <entry>
-            [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
+            <title type="html"><-></title>
-        } cleave
+            <link href=<-> />
-    ] tag, ;
+            <published><-></published>
            <content type="html"><-></content>
        </entry>
    XML] ;
 : feed>xml ( feed -- xml )
-    "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
+    [ title>> ]
-        [ title>> "title" simple-tag, ]
+    [ url>> present ]
-        [ url>> present "href" associate "link" swap contained*, ]
+    [ entries>> [ entry>xml ] map ] tri
-        [ entries>> [ entry, ] each ]
+    <XML
-        tri
+        <feed xmlns="http://www.w3.org/2005/Atom">
-    ] make-xml* ;
+            <title><-></title>
            <link href=<-> />
            <->
        </feed>
    XML> ;
--- a/basis/unicode/categories/categories-docs.factor
+++ b/basis/unicode/categories/categories-docs.factor
@ -1,49 +1,59 @@
-! Copyright (C) 2009 Your name.
+! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: help.markup help.syntax kernel ;
 IN: unicode.categories
 HELP: LETTER
-{ $class-description "The class of upper cased letters" } ;
+{ $class-description "The class of upper cased letters." } ;
 HELP: Letter
-{ $class-description "The class of letters" } ;
+{ $class-description "The class of letters." } ;
 HELP: alpha
-{ $class-description "The class of code points which are alphanumeric" } ;
+{ $class-description "The class of alphanumeric characters." } ;
 HELP: blank
-{ $class-description "The class of code points which are whitespace" } ;
+{ $class-description "The class of whitespace characters." } ;
 HELP: character
-{ $class-description "The class of numbers which are pre-defined Unicode code points" } ;
+{ $class-description "The class of pre-defined Unicode code points." } ;
 HELP: control
-{ $class-description "The class of control characters" } ;
+{ $class-description "The class of control characters." } ;
 HELP: digit
-{ $class-description "The class of code coints which are digits" } ;
+{ $class-description "The class of digits." } ;
 HELP: letter
-{ $class-description "The class of code points which are lower-cased letters" } ;
+{ $class-description "The class of lower-cased letters." } ;
 HELP: printable
-{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters" } ;
+{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters." } ;
 HELP: uncased
-{ $class-description "The class of letters which don't have a case" } ;
+{ $class-description "The class of letters which don't have a case." } ;
 ARTICLE: "unicode.categories" "Character classes"
-{ $vocab-link "unicode.categories" } " is a vocabulary which provides predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Below are links to classes of characters, but note that each of these also has a predicate defined, which is usually more useful."
+"The " { $vocab-link "unicode.categories" } " vocabulary implements predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Each character class has an associated predicate word."
 { $subsection blank }
 { $subsection blank? }
 { $subsection letter }
 { $subsection letter? }
 { $subsection LETTER }
 { $subsection LETTER? }
 { $subsection Letter }
 { $subsection Letter? }
 { $subsection digit }
 { $subsection digit? } 
 { $subsection printable }
 { $subsection printable? }
 { $subsection alpha }
 { $subsection alpha? }
 { $subsection control }
 { $subsection control? }
 { $subsection uncased }
-{ $subsection character } ;
+{ $subsection uncased? }
 { $subsection character }
 { $subsection character? } ;
 ABOUT: "unicode.categories"
--- a/basis/unicode/normalize/normalize-docs.factor
+++ b/basis/unicode/normalize/normalize-docs.factor
@ -4,7 +4,13 @@ IN: unicode.normalize
 ABOUT: "unicode.normalize"
 ARTICLE: "unicode.normalize" "Unicode normalization"
-"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings. In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: \"e\\u000301\" (the e character, followed by the combining acute accent character) and \"\\u0000e9\" (a single character, e with an acute accent). There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care. Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
+"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings."
 $nl
 "In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: " { $snippet "\"e\\u000301\"" } " (the e character, followed by the combining acute accent character) and " { $snippet "\"\\u0000e9\"" } " (a single character, e with an acute accent)."
 $nl
 "There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care."
 $nl
 "Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
 { $subsection nfc }
 { $subsection nfd }
 { $subsection nfkc }
@ -12,16 +18,16 @@ ARTICLE: "unicode.normalize" "Unicode normalization"
 HELP: nfc
 { $values { "string" string } { "nfc" "a string in NFC" } }
-{ $description "Converts a string to Normalization Form C" } ;
+{ $description "Converts a string to Normalization Form C." } ;
 HELP: nfd
 { $values { "string" string } { "nfd" "a string in NFD" } }
-{ $description "Converts a string to Normalization Form D" } ;
+{ $description "Converts a string to Normalization Form D." } ;
 HELP: nfkc
 { $values { "string" string } { "nfkc" "a string in NFKC" } }
-{ $description "Converts a string to Normalization Form KC" } ;
+{ $description "Converts a string to Normalization Form KC." } ;
 HELP: nfkd
 { $values { "string" string } { "nfkd" "a string in NFKD" } }
-{ $description "Converts a string to Normalization Form KD" } ;
+{ $description "Converts a string to Normalization Form KD." } ;
--- a/basis/unicode/unicode-docs.factor
+++ b/basis/unicode/unicode-docs.factor
@ -1,8 +1,14 @@
-USING: help.markup help.syntax ;
+USING: help.markup help.syntax strings ;
 IN: unicode
 ARTICLE: "unicode" "Unicode"
-"Unicode is a set of characters, or " { $emphasis "code points" } " covering what's used in most world writing systems. Any Factor string can hold any of these code points transparently; a factor string is a sequence of Unicode code points. Unicode is accompanied by several standard algorithms for common operations like encoding in files, capitalizing a string, finding the boundaries between words, etc. When a programmer is faced with a string manipulation problem, where the string represents human language, a Unicode algorithm is often much better than the naive one. This is not in terms of efficiency, but rather internationalization. Even English text that remains in ASCII is better served by the Unicode collation algorithm than a naive algorithm. The Unicode algorithms implemented here are:"
+"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
 $nl
 "The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
 $nl
 "The Unicode character set is accompanied by several standard algorithms for common operations like encoding text in files, capitalizing a string, finding the boundaries between words, and so on."
 $nl
 "The Unicode algorithms implemented by the " { $vocab-link "unicode" } " vocabulary are:"
 { $vocab-subsection "Case mapping" "unicode.case" }
 { $vocab-subsection "Collation and weak comparison" "unicode.collation" }
 { $vocab-subsection "Character classes" "unicode.categories" }
@ -11,6 +17,6 @@ ARTICLE: "unicode" "Unicode"
 "The following are mostly for internal use:"
 { $vocab-subsection "Unicode syntax" "unicode.syntax" }
 { $vocab-subsection "Unicode data tables" "unicode.data" }
-{ $see-also "io.encodings" } ;
+{ $see-also "ascii" "io.encodings" } ;
 ABOUT: "unicode"
--- a/basis/unix/process/process.factor
+++ b/basis/unix/process/process.factor
@ -1,6 +1,6 @@
 USING: kernel alien.c-types alien.strings sequences math alien.syntax unix
 vectors kernel namespaces continuations threads assocs vectors
-io.backend.unix io.encodings.utf8 unix.utilities ;
+io.backend.unix io.encodings.utf8 unix.utilities fry ;
 IN: unix.process
 ! Low-level Unix process launching utilities. These are used
@ -36,7 +36,7 @@ FUNCTION: int execve ( char* path, char** argv, char** envp ) ;
    [ [ first ] [ ] bi ] dip exec-with-env ;
 : with-fork ( child parent -- )
-    [ [ fork-process dup zero? ] dip [ drop ] prepose ] dip
+    [ [ fork-process dup zero? ] dip '[ drop @ ] ] dip
    if ; inline
 CONSTANT: SIGKILL 9
--- a/basis/xml/autoencoding/autoencoding.factor
+++ b/basis/xml/autoencoding/autoencoding.factor
@ -10,8 +10,8 @@ IN: xml.autoencoding
 : start-utf16le ( -- tag )
    utf16le decode-input-if
-    CHAR: ? expect
+    "?\0" expect
-    0 expect check instruct ;
+    check instruct ;
 : 10xxxxxx? ( ch -- ? )
    -6 shift 3 bitand 2 = ;
@ -36,10 +36,10 @@ IN: xml.autoencoding
 : skip-utf8-bom ( -- tag )
    "\u0000bb\u0000bf" expect utf8 decode-input
-    CHAR: < expect check make-tag ;
+    "<" expect check make-tag ;
 : decode-expecting ( encoding string -- tag )
-    [ decode-input-if next ] [ expect-string ] bi* check make-tag ;
+    [ decode-input-if next ] [ expect ] bi* check make-tag ;
 : start-utf16be ( -- tag )
    utf16be "<" decode-expecting ;
--- a/basis/xml/data/data-docs.factor
+++ b/basis/xml/data/data-docs.factor
@ -150,3 +150,52 @@ HELP: assure-name
 HELP: <simple-name>
 { $values { "string" string } { "name" name } }
 { $description "Converts a string into an XML name with an empty prefix and URL." } ;
 HELP: element-decl
 { $class-description "Describes the class of element declarations, like <!ELEMENT  greeting (#PCDATA)>." } ;
 HELP: <element-decl>
 { $values { "name" name } { "content-spec" string } { "element-decl" entity-decl } }
 { $description "Creates an element declaration object, of the class " { $link element-decl } } ;
 HELP: attlist-decl
 { $class-description "Describes the class of element declarations, like <!ATTLIST pre xml:space (preserve) #FIXED 'preserve'>." } ;
 HELP: <attlist-decl>
 { $values { "name" name } { "att-defs" string } { "attlist-decl" attlist-decl } }
 { $description "Creates an element declaration object, of the class " { $link attlist-decl } } ;
 HELP: entity-decl
 { $class-description "Describes the class of element declarations, like <!ENTITY foo 'bar'>." } ;
 HELP: <entity-decl>
 { $values { "name" name } { "def" string } { "pe?" "t or f" } { "entity-decl" entity-decl } }
 { $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like <!ENTITY % foo 'bar'> and f if the object is like <!ENTITY foo 'bar'>, that is, it can be used outside of the DTD." } ;
 HELP: system-id
 { $class-description "Describes the class of system identifiers within an XML DTD directive, such as <!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } ;
 HELP: <system-id>
 { $values { "system-literal" string } { "system-id" system-id } }
 { $description "Constructs a " { $link system-id } " tuple." } ;
 HELP: public-id
 { $class-description "Describes the class of public identifiers within an XML DTD directive, such as <!DOCTYPE open-hatch " { $emphasis "PUBLIC '-//Textuality//TEXT Standard open-hatch boilerplate//EN' 'http://www.textuality.com/boilerplate/OpenHatch.xml'" } ">" } ;
 HELP: <public-id>
 { $values { "pubid-literal" string } { "system-literal" string } { "public-id" public-id } }
 { $description "Constructs a " { $link system-id } " tuple." } ;
 HELP: notation-decl
 { $class-description "Describes the class of element declarations, like <!NOTATION jpg SYSTEM './jpgviewer'>." } ;
 HELP: <notation-decl>
 { $values { "name" string } { "id" id } { "notation-decl" notation-decl } }
 { $description "Creates an notation declaration object, of the class " { $link notation-decl } "." } ;
 HELP: doctype-decl
 { $class-description "Describes the class of doctype declarations." } ;
 HELP: <doctype-decl>
 { $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } }
 { $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ;
--- a/basis/xml/data/data.factor
+++ b/basis/xml/data/data.factor
@ -5,6 +5,9 @@ delegate.protocols delegate vectors accessors multiline
 macros words quotations combinators slots fry strings ;
 IN: xml.data
 TUPLE: interpolated var ;
 C: <interpolated> interpolated
 UNION: nullable-string string POSTPONE: f ;
 TUPLE: name
@ -85,11 +88,13 @@ C: <comment> comment
 TUPLE: directive ;
 TUPLE: element-decl < directive
-    { name string } { content-spec string } ;
+    { name string }
    { content-spec string } ;
 C: <element-decl> element-decl
 TUPLE: attlist-decl < directive
-    { name string } { att-defs string } ;
+    { name string }
    { att-defs string } ;
 C: <attlist-decl> attlist-decl
 UNION: boolean t POSTPONE: f ;
@ -108,13 +113,23 @@ C: <public-id> public-id
 UNION: id system-id public-id POSTPONE: f ;
 TUPLE: dtd
    { directives sequence }
    { entities assoc }
    { parameter-entities assoc } ;
 C: <dtd> dtd
 UNION: dtd/f dtd POSTPONE: f ;
 TUPLE: doctype-decl < directive
    { name string }
    { external-id id }
-    { internal-subset sequence } ;
+    { internal-subset dtd/f } ;
 C: <doctype-decl> doctype-decl
-TUPLE: notation-decl < directive name id ;
+TUPLE: notation-decl < directive
    { name string }
    { id string } ;
 C: <notation-decl> notation-decl
 TUPLE: instruction { text string } ;
--- a/basis/xml/dtd/dtd.factor
+++ b/basis/xml/dtd/dtd.factor
@ -2,12 +2,9 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: xml.tokenize xml.data xml.state kernel sequences ascii
 fry xml.errors combinators hashtables namespaces xml.entities
-strings ;
+strings xml.name ;
 IN: xml.dtd
 : take-word ( -- string )
    [ get-char blank? ] take-until ;
 : take-decl-contents ( -- first second )
    pass-blank take-word pass-blank ">" take-string ;
@ -20,36 +17,15 @@ IN: xml.dtd
 : take-notation-decl ( -- notation-decl )
    take-decl-contents <notation-decl> ; 
 : take-until-one-of ( seps -- str sep )
    '[ get-char _ member? ] take-until get-char ;
 : take-system-id ( -- system-id )
    parse-quote <system-id> close ;
 : take-public-id ( -- public-id )
    parse-quote parse-quote <public-id> close ;
 UNION: dtd-acceptable
    directive comment instruction ;
 : (take-external-id) ( token -- external-id )
    pass-blank {
        { "SYSTEM" [ take-system-id ] }
        { "PUBLIC" [ take-public-id ] }
        [ bad-external-id ]
    } case ;
 : take-external-id ( -- external-id )
    take-word (take-external-id) ;
 : only-blanks ( str -- )
    [ blank? ] all? [ bad-decl ] unless ;
 : take-entity-def ( var -- entity-name entity-def )
    [
        take-word pass-blank get-char {
            { CHAR: ' [ parse-quote ] }
            { CHAR: " [ parse-quote ] }
-            [ drop take-external-id ]
+            [ drop take-external-id close ]
        } case
   ] dip '[ swap _ [ ?set-at ] change ] 2keep ;
--- a/basis/xml/elements/elements.factor
+++ b/basis/xml/elements/elements.factor
@ -3,12 +3,26 @@
 USING: kernel namespaces xml.tokenize xml.state xml.name
 xml.data accessors arrays make xml.char-classes fry assocs sequences
 math xml.errors sets combinators io.encodings io.encodings.iana
-unicode.case xml.dtd strings ;
+unicode.case xml.dtd strings xml.entities ;
 IN: xml.elements
 : take-interpolated ( quot -- interpolated )
    interpolating? get [
        drop get-char CHAR: > =
        [ next f ] [
            pass-blank " \t\r\n-" take-to
            pass-blank "->" expect
        ] if <interpolated>
    ] [ call ] if ; inline
 : interpolate-quote ( -- interpolated )
    [ quoteless-attr ] take-interpolated ;
 : parse-attr ( -- )
-    parse-name pass-blank CHAR: = expect pass-blank
+    parse-name pass-blank "=" expect pass-blank
-    t parse-quote* 2array , ;
+    get-char CHAR: < =
    [ "<-" expect interpolate-quote ]
    [ t parse-quote* ] if 2array , ;
 : start-tag ( -- name ? )
    #! Outputs the name and whether this is a closing tag
@ -31,14 +45,14 @@ IN: xml.elements
 : end-tag ( name attrs-alist -- tag )
    tag-ns pass-blank get-char CHAR: / =
-    [ pop-ns <contained> next CHAR: > expect ]
+    [ pop-ns <contained> next ">" expect ]
    [ depth inc <opener> close ] if ;
 : take-comment ( -- comment )
-    "--" expect-string
+    "--" expect
    "--" take-string
    <comment>
-    CHAR: > expect ;
+    ">" expect ;
 : assure-no-extra ( seq -- )
    [ first ] map {
@ -80,7 +94,7 @@ SYMBOL: string-input?
    string-input? get [ drop ] [ decode-input ] if ;
 : parse-prolog ( -- prolog )
-    pass-blank middle-tag "?>" expect-string
+    pass-blank middle-tag "?>" expect
    dup assure-no-extra prolog-attrs
    dup encoding>> dup "UTF-16" =
    [ drop ] [ name>encoding [ decode-input-if ] when* ] if
@ -96,45 +110,45 @@ SYMBOL: string-input?
 : take-cdata ( -- string )
    depth get zero? [ bad-cdata ] when
-    "[CDATA[" expect-string "]]>" take-string ;
+    "[CDATA[" expect "]]>" take-string ;
 DEFER: make-tag ! Is this unavoidable?
 : expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
-: (take-internal-subset) ( -- )
+: dtd-loop ( -- )
    pass-blank get-char {
        { CHAR: ] [ next ] }
        { CHAR: % [ expand-pe ] }
        { CHAR: < [
            next make-tag dup dtd-acceptable?
-            [ bad-doctype ] unless , (take-internal-subset)
+            [ bad-doctype ] unless , dtd-loop
        ] }
        { f [ ] }
        [ 1string bad-doctype ]
    } case ;
-: take-internal-subset ( -- seq )
+: take-internal-subset ( -- dtd )
    [
-        H{ } pe-table set
+        H{ } clone pe-table set
        t in-dtd? set
-        (take-internal-subset)
+        dtd-loop
-    ] { } make ;
+        pe-table get
    ] { } make swap extra-entities get swap <dtd> ;
-: nontrivial-doctype ( -- external-id internal-subset )
+: take-optional-id ( -- id/f )
-    pass-blank get-char CHAR: [ = [
+    get-char "SP" member?
-        next take-internal-subset f swap close
+    [ take-external-id ] [ f ] if ;
-    ] [
+
-        " >" take-until-one-of {
+: take-internal ( -- dtd/f )
-            { CHAR: \s [ (take-external-id) ] }
+    get-char CHAR: [ =
-            { CHAR: > [ only-blanks f ] }
+    [ next take-internal-subset ] [ f ] if ;
        } case f
    ] if ;
 : take-doctype-decl ( -- doctype-decl )
-    pass-blank " >" take-until-one-of {
+    pass-blank take-name
-        { CHAR: \s [ nontrivial-doctype ] }
+    pass-blank take-optional-id
-        { CHAR: > [ f f ] }
+    pass-blank take-internal
-    } case <doctype-decl> ;
+    <doctype-decl> close ;
 : take-directive ( -- doctype )
    take-name dup "DOCTYPE" =
@ -151,12 +165,18 @@ DEFER: make-tag ! Is this unavoidable?
        [ drop take-directive ]
    } case ;
 : normal-tag ( -- tag )
    start-tag
    [ dup add-ns pop-ns <closer> depth dec close ]
    [ middle-tag end-tag ] if ;
 : interpolate-tag ( -- interpolated )
    [ "-" bad-name ] take-interpolated ;
 : make-tag ( -- tag )
    {
        { [ get-char dup CHAR: ! = ] [ drop next direct ] }
-        { [ CHAR: ? = ] [ next instruct ] }
+        { [ dup CHAR: ? = ] [ drop next instruct ] }
-        [
+        { [ dup CHAR: - = ] [ drop next interpolate-tag ] }
-            start-tag [ dup add-ns pop-ns <closer> depth dec close ]
+        [ drop normal-tag ]
            [ middle-tag end-tag ] if
        ]
    } cond ;
--- a/basis/xml/entities/entities.factor
+++ b/basis/xml/entities/entities.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: namespaces make kernel assocs sequences fry values
-io.files io.encodings.binary ;
+io.files io.encodings.binary xml.state ;
 IN: xml.entities
 : entities-out
@ -37,7 +37,5 @@ IN: xml.entities
        { "quot"  CHAR: "  }
    } ;
 SYMBOL: extra-entities
 : with-entities ( entities quot -- )
    [ swap extra-entities set call ] with-scope ; inline
--- a/basis/xml/entities/html/html.factor
+++ b/basis/xml/entities/html/html.factor
@ -1,13 +1,13 @@
 ! Copyright (C) 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: assocs io.encodings.binary io.files kernel namespaces sequences
-values xml xml.entities ;
+values xml xml.entities accessors xml.state ;
 IN: xml.entities.html
 VALUE: html-entities
 : read-entities-file ( file -- table )
-    file>dtd nip ;
+    file>dtd entities>> ;
 : get-html ( -- table )
    { "lat1" "special" "symbol" } [
--- a/basis/xml/generator/authors.txt
+++ b/basis/xml/generator/authors.txt
@ -1 +0,0 @@
 Daniel Ehrenberg
--- a/basis/xml/generator/generator-tests.factor
+++ b/basis/xml/generator/generator-tests.factor
@ -1,3 +0,0 @@
 USING: tools.test io.streams.string xml.generator xml.writer accessors ;
 [ "<html><body><a href=\"blah\"/></body></html>" ]
 [ "html" [ "body" [ "a" { { "href" "blah" } } contained*, ] tag, ] make-xml [ body>> write-xml-chunk ] with-string-writer ] unit-test
--- a/basis/xml/generator/generator.factor
+++ b/basis/xml/generator/generator.factor
@ -1,25 +0,0 @@
 ! Copyright (C) 2006, 2007 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: namespaces make kernel xml.data xml.utilities assocs
 sequences ;
 IN: xml.generator
 : comment, ( string -- ) <comment> , ;
 : instruction, ( string -- ) <instruction> , ;
 : nl, ( -- ) "\n" , ;
 : (tag,) ( name attrs quot -- tag )
    -rot [ V{ } make ] 2dip rot <tag> ; inline
 : tag*, ( name attrs quot -- )
    (tag,) , ; inline
 : contained*, ( name attrs -- )
    f <tag> , ;
 : tag, ( name quot -- ) f swap tag*, ; inline
 : contained, ( name -- ) f contained*, ; inline
 : make-xml* ( name attrs quot -- xml )
    (tag,) build-xml ; inline
 : make-xml ( name quot -- xml )
    f swap make-xml* ; inline
--- a/basis/xml/interpolate/interpolate-tests.factor
+++ b/basis/xml/interpolate/interpolate-tests.factor
@ -1,4 +1,46 @@
 ! Copyright (C) 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: tools.test xml.interpolate ;
+USING: tools.test xml.interpolate multiline kernel assocs
 sequences accessors xml.writer xml.interpolate.private
 locals splitting ;
 IN: xml.interpolate.tests
 [ "a" "c" { "a" "c" f } ] [
    "<?xml version='1.0'?><x><-a-><b val=<-c->/><-></x>"
    interpolated-doc
    [ second var>> ]
    [ fourth "val" swap at var>> ]
    [ extract-variables ] tri
 ] unit-test
 [ {" <?xml version="1.0" encoding="UTF-8"?>
 <x>
  one
  <b val="two"/>
  y
  <foo/>
 </x>"} ] [
    [let* | a [ "one" ] c [ "two" ] x [ "y" ]
           d [ [XML <-x-> <foo/> XML] ] |
        <XML
            <x> <-a-> <b val=<-c->/> <-d-> </x>
        XML> pprint-xml>string
    ]
 ] unit-test
 [ {" <?xml version="1.0" encoding="UTF-8"?>
 <doc>
  <item>
    one
  </item>
  <item>
    two
  </item>
  <item>
    three
  </item>
 </doc>"} ] [
    "one two three" " " split
    [ [XML <item><-></item> XML] ] map
    <XML <doc><-></doc> XML> pprint-xml>string
 ] unit-test
--- a/basis/xml/interpolate/interpolate.factor
+++ b/basis/xml/interpolate/interpolate.factor
@ -1,4 +1,95 @@
 ! Copyright (C) 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: ;
+USING: xml xml.state kernel sequences fry assocs xml.data
 accessors strings make multiline parser namespaces macros
 sequences.deep generalizations locals words combinators
 math ;
 IN: xml.interpolate
 <PRIVATE
 : interpolated-chunk ( string -- chunk )
    t interpolating? [ string>xml-chunk ] with-variable ;
 : interpolated-doc ( string -- xml )
    t interpolating? [ string>xml ] with-variable ;
 DEFER: interpolate-sequence
 : interpolate-attrs ( table attrs -- attrs )
    swap '[ dup interpolated? [ var>> _ at ] when ] assoc-map ;
 : interpolate-tag ( table tag -- tag )
    [ nip name>> ]
    [ attrs>> interpolate-attrs ]
    [ children>> [ interpolate-sequence ] [ drop f ] if* ] 2tri
    <tag> ;
 GENERIC: push-item ( item -- )
 M: string push-item , ;
 M: object push-item , ;
 M: sequence push-item % ;
 GENERIC: interpolate-item ( table item -- )
 M: object interpolate-item nip , ;
 M: tag interpolate-item interpolate-tag , ;
 M: interpolated interpolate-item
    var>> swap at push-item ;
 : interpolate-sequence ( table seq -- seq )
    [ [ interpolate-item ] with each ] { } make ;
 : interpolate-xml-doc ( table xml -- xml )
    (clone) [ interpolate-tag ] change-body ;
 GENERIC# (each-interpolated) 1 ( item quot -- ) inline
 M: interpolated (each-interpolated) call ;
 M: tag (each-interpolated)
    swap attrs>> values
    [ interpolated? ] filter
    swap each ;
 M: object (each-interpolated) 2drop ;
 : each-interpolated ( xml quot -- )
    '[ _ (each-interpolated) ] deep-each ; inline
 :: number<-> ( doc -- doc )
    0 :> n! doc [
        dup var>> [ n >>var n 1+ n! ] unless drop
    ] each-interpolated doc ;
 MACRO: interpolate-xml ( string -- doc )
    interpolated-doc number<-> '[ _ interpolate-xml-doc ] ;
 MACRO: interpolate-chunk ( string -- chunk )
    interpolated-chunk number<-> '[ _ interpolate-sequence ] ;
 : >search-hash ( seq -- hash )
    [ dup search ] H{ } map>assoc ;
 : extract-variables ( xml -- seq )
    [ [ var>> , ] each-interpolated ] { } make ;
 : collect ( accum seq -- accum )
    {
        { [ dup [ ] all? ] [ >search-hash parsed ] } ! locals
        { [ dup [ not ] all? ] [ ! fry
            length parsed \ narray parsed \ <enum> parsed
        ] }
        [ drop "XML interpolation contains both fry and locals" throw ] ! mixed
    } cond ;
 : parse-def ( accum delimiter word -- accum )
    [
        parse-multiline-string
        [ interpolated-chunk extract-variables collect ] keep
        parsed
    ] dip parsed ;
 PRIVATE>
 : <XML
    "XML>" \ interpolate-xml parse-def ; parsing
 : [XML
    "XML]" \ interpolate-chunk parse-def ; parsing
--- a/basis/xml/name/name.factor
+++ b/basis/xml/name/name.factor
@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel namespaces accessors xml.tokenize xml.data assocs
 xml.errors xml.char-classes combinators.short-circuit splitting
-fry xml.state sequences ;
+fry xml.state sequences combinators ascii ;
 IN: xml.name
 ! XML namespace processing: ns = namespace
@ -74,3 +74,21 @@ SYMBOL: ns-stack
 : parse-name-starting ( string -- name )
    take-name append interpret-name ;
 : take-system-id ( -- system-id )
    parse-quote <system-id> ;
 : take-public-id ( -- public-id )
    parse-quote parse-quote <public-id> ;
 : (take-external-id) ( token -- external-id )
    pass-blank {
        { "SYSTEM" [ take-system-id ] }
        { "PUBLIC" [ take-public-id ] }
        [ bad-external-id ]
    } case ;
 : take-word ( -- string )
    [ get-char blank? ] take-until ;
 : take-external-id ( -- external-id )
    take-word (take-external-id) ;
--- a/basis/xml/state/state.factor
+++ b/basis/xml/state/state.factor
@ -23,3 +23,11 @@ SYMBOL: xml-stack
 SYMBOL: prolog-data
 SYMBOL: depth
 SYMBOL: interpolating?
 SYMBOL: in-dtd?
 SYMBOL: pe-table
 SYMBOL: extra-entities
--- a/basis/xml/tests/state-parser-tests.factor
+++ b/basis/xml/tests/state-parser-tests.factor
@ -7,6 +7,9 @@ IN: xml.test.state
 : take-rest ( -- string )
    [ f ] take-until ;
 : take-char ( char -- string )
    1string take-to ;
 [ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
 [ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test
 [ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
--- a/basis/xml/tests/test.factor
+++ b/basis/xml/tests/test.factor
@ -49,10 +49,10 @@ SYMBOL: xml-file
 [ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
 [ "<!-- B+, B, or B--->" string>xml ] must-fail
 [ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
-[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test
+[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first ] unit-test
-[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test
+[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first ] unit-test
-[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test
+[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first ] unit-test
-[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test
+[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first ] unit-test
 [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
 [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
 [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
--- a/basis/xml/tests/xmode-dtd.factor
+++ b/basis/xml/tests/xmode-dtd.factor
@ -1,8 +1,8 @@
 ! Copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: xml io.encodings.utf8 io.files kernel tools.test ;
+USING: xml xml.data kernel tools.test ;
 IN: xml.tests
-[ ] [
+[ t ] [
-    "resource:basis/xmode/xmode.dtd" file>dtd 2drop
+    "resource:basis/xmode/xmode.dtd" file>dtd dtd?
 ] unit-test
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@ -58,8 +58,8 @@ IN: xml.tokenize
        '[ @ [ t ] [ get-char _ push f ] if ] skip-until
    ] keep >string ; inline
-: take-char ( ch -- string )
+: take-to ( seq -- string )
-    [ dup get-char = ] take-until nip ;
+    '[ get-char _ member? ] take-until ;
 : pass-blank ( -- )
    #! Advance code past any whitespace, including newlines
@ -75,33 +75,29 @@ IN: xml.tokenize
    dup length rot length 1- - head
    get-char [ missing-close ] unless next ;
-: expect ( ch -- )
+: expect ( string -- )
    get-char 2dup = [ 2drop ] [
        [ 1string ] bi@ expected
    ] if next ;
 : expect-string ( string -- )
    dup [ get-char next ] replicate 2dup =
    [ 2drop ] [ expected ] if ;
 ! Suddenly XML-specific
 : parse-named-entity ( string -- )
    dup entities at [ , ] [
        dup extra-entities get at
        [ % ] [ no-entity ] ?if
    ] ?if ;
 : take-; ( -- string )
    next ";" take-to next ;
 : parse-entity ( -- )
-    next CHAR: ; take-char next
+    take-; "#" ?head [
    "#" ?head [
        "x" ?head 16 10 ? base> ,
    ] [ parse-named-entity ] if ;
 SYMBOL: pe-table
 SYMBOL: in-dtd?
 : parse-pe ( -- )
-    next CHAR: ; take-char dup next
+    take-; dup pe-table get at
-    pe-table get at [ % ] [ no-entity ] ?if ;
+    [ % ] [ no-entity ] ?if ;
 :: (parse-char) ( quot: ( ch -- ? ) -- )
    get-char :> char
@ -131,7 +127,7 @@ SYMBOL: in-dtd?
    ] parse-char ;
 : close ( -- )
-    pass-blank CHAR: > expect ;
+    pass-blank ">" expect ;
 : normalize-quote ( str -- str )
    [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
--- a/basis/xml/writer/writer.factor
+++ b/basis/xml/writer/writer.factor
@ -136,10 +136,10 @@ M: public-id write-xml-chunk
    [ pubid-literal>> write "' '" write ]
    [ system-literal>> write "'" write ] bi ;
-: write-internal-subset ( seq -- )
+: write-internal-subset ( dtd -- )
    [
        "[" write indent
-        [ ?indent write-xml-chunk ] each
+        directives>> [ ?indent write-xml-chunk ] each
        unindent ?indent "]" write
    ] when* ;
--- a/basis/xml/xml.factor
+++ b/basis/xml/xml.factor
@ -164,21 +164,15 @@ TUPLE: pull-xml scope ;
 : file>xml ( filename -- xml )
    binary <file-reader> read-xml ;
-: (read-dtd) ( -- dtd )
+: read-dtd ( stream -- dtd )
    ! should filter out blanks, throw error on non-dtd stuff
    V{ } clone dup [ push ] curry sax-loop ;
 : read-dtd ( stream -- dtd entities )
    [
        t in-dtd? set
        reset-prolog
        H{ } clone extra-entities set
-        (read-dtd)
+        take-internal-subset
        extra-entities get
    ] with-state ;
-: file>dtd ( filename -- dtd entities )
+: file>dtd ( filename -- dtd )
    utf8 <file-reader> read-dtd ;
-: string>dtd ( string -- dtd entities )
+: string>dtd ( string -- dtd )
    <string-reader> read-dtd ;
--- a/core/strings/strings-docs.factor
+++ b/core/strings/strings-docs.factor
@ -22,9 +22,8 @@ $nl
 { $subsection 1string }
 "Since strings are sequences, basic string manipulation can be performed using sequence operations (" { $link "sequences" } "). More advanced functionality can be found in other vocabularies, including but not limited to:"
 { $list
-    { { $vocab-link "ascii" } " - traditional ASCII character classes" }
+    { { $link "ascii" } " - ASCII algorithms for interoperability with legacy applications" }
-    { { $vocab-link "unicode.categories" } " - Unicode character classes" }
+    { { $link "unicode" } " - Unicode algorithms for modern multilingual applications" }
    { { $vocab-link "unicode.case" } " - Unicode case conversion" }
    { { $vocab-link "regexp" } " - regular expressions" }
    { { $vocab-link "peg" } " - parser expression grammars" }
 } ;
--- a/unmaintained/xml/syntax/syntax.factor
+++ b/unmaintained/xml/syntax/syntax.factor
@ -1,58 +0,0 @@
 ! Copyright (C) 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: lexer parser splitting kernel quotations namespaces make
 sequences assocs sequences.lib xml.generator xml.utilities
 xml.data ;
 IN: xml.syntax
 : parsed-name ( accum -- accum )
    scan ":" split1 [ f <name> ] [ <simple-name> ] if* parsed ;
 : run-combinator ( accum quot1 quot2 -- accum )
    >r [ ] like parsed r> [ parsed ] each ;
 : parse-tag-contents ( accum contained? -- accum )
    [ \ contained*, parsed ] [
        scan-word \ [ =
        [ POSTPONE: [ \ tag*, parsed ]
        [ "Expected [ missing" throw ] if
    ] if ;
 DEFER: >>
 : attributes-parsed ( accum quot -- accum )
    [ f parsed ] [
        >r \ >r parsed r> parsed
        [ H{ } make-assoc r> swap ] [ parsed ] each
    ] if-empty ;
 : <<
    parsed-name [
        \ >> parse-until >quotation
        attributes-parsed \ contained? get
    ] with-scope parse-tag-contents ; parsing
 : ==
    \ call parsed parsed-name \ set parsed ; parsing
 : //
    \ contained? on ; parsing
 : parse-special ( accum end-token word -- accum )
    >r parse-tokens " " join parsed r> parsed ;
 : <!-- "-->" \ comment, parse-special ; parsing
 : <!  ">" \ directive, parse-special ; parsing
 : <? "?>" \ instruction, parse-special ; parsing
 : >xml-document ( seq -- xml )
    dup first prolog? [ unclip-slice ] [ standard-prolog ] if swap
    [ tag? ] split-around <xml> ;
 DEFER: XML>
 : <XML
    \ XML> [ >quotation ] parse-literal
    { } parsed \ make parsed \ >xml-document parsed ; parsing