diff --git a/basis/io/streams/duplex/duplex-docs.factor b/basis/io/streams/duplex/duplex-docs.factor
index 48afafeec7..5bf33e9002 100644
--- a/basis/io/streams/duplex/duplex-docs.factor
+++ b/basis/io/streams/duplex/duplex-docs.factor
@@ -20,11 +20,11 @@ HELP: <duplex-stream>
 
 HELP: with-stream
 { $values { "stream" duplex-stream } { "quot" quotation } }
-{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to  " { $snippet "stream" } ". The stream is closed if the quotation returns or throws an error." } ;
+{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream. The stream is closed if the quotation returns or throws an error." } ;
 
 HELP: with-stream*
 { $values { "stream" duplex-stream } { "quot" quotation } }
-{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to  " { $snippet "stream" } "." }
+{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream." }
 { $notes "This word does not close the stream. Compare with " { $link with-stream } "." } ;
 
 HELP: <encoder-duplex>
diff --git a/basis/pack/pack-tests.factor b/basis/pack/pack-tests.factor
index b1a354cd4e..1be37292a0 100755
--- a/basis/pack/pack-tests.factor
+++ b/basis/pack/pack-tests.factor
@@ -46,3 +46,10 @@ pack strings tools.test ;
 [ f ] [ "" [ read-c-string ] with-string-reader ] unit-test
 [ 5 ] [ "FRAM\0\u000005\0\0\0\0\0\0\0" [ read-c-string drop read-u64 ] with-string-reader ] unit-test
 
+[ 9 ] [ "iic" packed-length ] unit-test
+[ "iii" read-packed-le ] must-infer
+[ "iii" read-packed-be ] must-infer
+[ "iii" read-packed-native ] must-infer
+[ "iii" unpack-le ] must-infer
+[ "iii" unpack-be ] must-infer
+[ "iii" unpack-native ] must-infer
diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor
index 0e5cb7dbbc..136deb9ff5 100755
--- a/basis/pack/pack.factor
+++ b/basis/pack/pack.factor
@@ -1,7 +1,9 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
 USING: alien alien.c-types arrays assocs byte-arrays io
 io.binary io.streams.string kernel math math.parser namespaces
 make parser prettyprint quotations sequences strings vectors
-words macros math.functions math.bitwise ;
+words macros math.functions math.bitwise fry ;
 IN: pack
 
 SYMBOL: big-endian
@@ -9,6 +11,13 @@ SYMBOL: big-endian
 : big-endian? ( -- ? )
     1 <int> *char zero? ;
 
+<PRIVATE
+
+: set-big-endian ( -- )
+    big-endian? big-endian set ; inline
+
+PRIVATE>
+
 : >endian ( obj n -- str )
     big-endian get [ >be ] [ >le ] if ; inline
 
@@ -39,6 +48,8 @@ M: string b, ( n string -- ) heap-size b, ;
 : double, ( n -- ) double>bits 8 b, ;
 : c-string, ( str -- ) % 0 u8, ;
 
+<PRIVATE
+
 : (>128-ber) ( n -- )
     dup 0 > [
         [ HEX: 7f bitand HEX: 80 bitor , ] keep -7 shift
@@ -47,6 +58,8 @@ M: string b, ( n string -- ) heap-size b, ;
         drop
     ] if ;
 
+PRIVATE>
+
 : >128-ber ( n -- str )
     [
         [ HEX: 7f bitand , ] keep -7 shift
@@ -70,7 +83,7 @@ M: string b, ( n string -- ) heap-size b, ;
 : read-s32 ( -- n ) 4 read-signed ;
 : read-u32 ( -- n ) 4 read-unsigned ;
 : read-s64 ( -- n ) 8 read-signed ;
-: read-u64 ( -- n ) 8 read-signed ;
+: read-u64 ( -- n ) 8 read-unsigned ;
 : read-s128 ( -- n ) 16 read-signed ;
 : read-u128 ( -- n ) 16 read-unsigned ;
 
@@ -81,7 +94,7 @@ M: string b, ( n string -- ) heap-size b, ;
     8 read endian> bits>double ;
 
 : read-c-string ( -- str/f )
-    "\0" read-until [ drop f ] unless ;
+    "\0" read-until swap and ;
 
 : read-c-string* ( n -- str/f )
     read [ zero? ] trim-right [ f ] when-empty ;
@@ -94,7 +107,9 @@ M: string b, ( n string -- ) heap-size b, ;
 : read-128-ber ( -- n )
     0 (read-128-ber) ;
 
-: pack-table ( -- hash )
+<PRIVATE
+
+CONSTANT: pack-table
     H{
         { CHAR: c s8, }
         { CHAR: C u8, }
@@ -110,9 +125,9 @@ M: string b, ( n string -- ) heap-size b, ;
         { CHAR: F float, }
         { CHAR: d double, }
         { CHAR: D double, }
-    } ;
+    }
 
-: unpack-table ( -- hash )
+CONSTANT: unpack-table
     H{
         { CHAR: c read-s8 }
         { CHAR: C read-u8 }
@@ -128,47 +143,79 @@ M: string b, ( n string -- ) heap-size b, ;
         { CHAR: F read-float }
         { CHAR: d read-double }
         { CHAR: D read-double }
-    } ;
+    }
 
-MACRO: (pack) ( seq str -- quot )
-    [
-        [
-            [
-                swap , pack-table at ,
-            ] 2each
-        ] [ ] make 1quotation %
-       [ B{ } make ] %
-    ] [ ] make ;
+CONSTANT: packed-length-table
+    H{
+        { CHAR: c 1 }
+        { CHAR: C 1 }
+        { CHAR: s 2 }
+        { CHAR: S 2 }
+        { CHAR: t 3 }
+        { CHAR: T 3 }
+        { CHAR: i 4 }
+        { CHAR: I 4 }
+        { CHAR: q 8 }
+        { CHAR: Q 8 }
+        { CHAR: f 4 }
+        { CHAR: F 4 }
+        { CHAR: d 8 }
+        { CHAR: D 8 }
+    }
 
+MACRO: pack ( seq str -- quot )
+    [ pack-table at 1quotation '[ _ @ ] ] [ ] 2map-as concat
+    '[ _ B{ } make ] ;
+
+PRIVATE>
+ 
 : pack-native ( seq str -- seq )
-    [
-        big-endian? big-endian set (pack)
-    ] with-scope ;
+    [ set-big-endian pack ] with-scope ; inline
 
 : pack-be ( seq str -- seq )
-    [ big-endian on (pack) ] with-scope ;
+    [ big-endian on pack ] with-scope ; inline
 
 : pack-le ( seq str -- seq )
-    [ big-endian off (pack) ] with-scope ;
+    [ big-endian off pack ] with-scope ; inline
 
+<PRIVATE
 
-MACRO: (unpack) ( str -- quot )
-    [
-        [
-            [ unpack-table at , \ , , ] each
-        ] [ ] make
-        1quotation [ { } make ] append
-        1quotation %
-        \ with-string-reader ,
-    ] [ ] make ;
+MACRO: unpack ( str -- quot )
+    [ unpack-table at 1quotation '[ @ , ] ] { } map-as concat
+    '[ [ _ { } make ] with-string-reader ] ;
+
+PRIVATE>
 
 : unpack-native ( seq str -- seq )
-    [
-        big-endian? big-endian set (unpack)
-    ] with-scope ;
+    [ set-big-endian unpack ] with-scope ; inline
 
 : unpack-be ( seq str -- seq )
-    [ big-endian on (unpack) ] with-scope ;
+    [ big-endian on unpack ] with-scope ; inline
 
 : unpack-le ( seq str -- seq )
-    [ big-endian off (unpack) ] with-scope ;
+    [ big-endian off unpack ] with-scope ; inline
+
+: packed-length ( str -- n )
+    [ packed-length-table at ] sigma ;
+
+ERROR: packed-read-fail str bytes ;
+
+<PRIVATE
+
+: read-packed-bytes ( str -- bytes )
+    dup packed-length [ read dup length ] keep =
+    [ nip ] [ packed-read-fail ] if ; inline
+
+PRIVATE>
+
+: read-packed ( str quot -- seq )
+    [ read-packed-bytes ] swap bi ; inline
+
+: read-packed-le ( str -- seq )
+    [ unpack-le ] read-packed ; inline
+
+: read-packed-be ( str -- seq )
+    [ unpack-be ] read-packed ; inline
+
+: read-packed-native ( str -- seq )
+    [ unpack-native ] read-packed ; inline
diff --git a/basis/roman/roman.factor b/basis/roman/roman.factor
index 866ac92872..c9394b07ed 100644
--- a/basis/roman/roman.factor
+++ b/basis/roman/roman.factor
@@ -39,16 +39,14 @@ ERROR: roman-range-error n ;
 PRIVATE>
 
 : >roman ( n -- str )
-    dup roman-range-check [
-        (>roman)
-    ] "" make ;
+    dup roman-range-check
+    [ (>roman) ] "" make ;
 
 : >ROMAN ( n -- str ) >roman >upper ;
 
 : roman> ( str -- n )
-    >lower [ roman<= ] monotonic-split [
-        (roman>)
-    ] map sum ;
+    >lower [ roman<= ] monotonic-split
+    [ (roman>) ] sigma ;
 
 <PRIVATE
 
diff --git a/basis/syndication/syndication.factor b/basis/syndication/syndication.factor
index a6eaff4492..c82fe4006d 100644
--- a/basis/syndication/syndication.factor
+++ b/basis/syndication/syndication.factor
@@ -3,7 +3,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: xml.utilities kernel assocs xml.generator math.order
     strings sequences xml.data xml.writer
-    io.streams.string combinators xml xml.entities io.files io
+    io.streams.string combinators xml xml.entities.html io.files io
     http.client namespaces make xml.generator hashtables
     calendar.format accessors continuations urls present ;
 IN: syndication
diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor
index 0524825d43..336d99657e 100644
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@@ -61,7 +61,7 @@ SYMBOL: table
 : eval-seq ( seq -- seq ) [ dup word? [ execute ] when ] map ;
 
 : (set-table) ( class1 class2 val -- )
-    -rot table get nth [ swap or ] change-nth ;
+    [ table get nth ] dip '[ _ or ] change-nth ;
 
 : set-table ( classes1 classes2 val -- )
     [ [ eval-seq ] bi@ ] dip
@@ -199,8 +199,8 @@ to: word-table
 : walk-down ( str i -- j )
     dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ;
 
-: word-break? ( table-entry i str -- ? )
-    spin {
+: word-break? ( str i table-entry -- ? )
+    {
         { t [ 2drop f ] }
         { f [ 2drop t ] }
         { check-letter-after
@@ -214,10 +214,10 @@ to: word-table
     } case ;
 
 :: word-break-next ( old-class new-char i str -- next-class ? )
-    new-char dup format/extended?
-    [ drop old-class dup { 1 2 3 } member? ] [
-        word-break-prop old-class over word-table-nth
-        i str word-break?
+    new-char format/extended?
+    [ old-class dup { 1 2 3 } member? ] [
+        new-char word-break-prop old-class over word-table-nth
+        [ str i ] dip word-break?
     ] if ;
 
 PRIVATE>
diff --git a/basis/xml/entities/entities.factor b/basis/xml/entities/entities.factor
index 03de0f78d1..a3812c7723 100644
--- a/basis/xml/entities/entities.factor
+++ b/basis/xml/entities/entities.factor
@@ -1,6 +1,7 @@
 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces make kernel assocs sequences fry ;
+USING: namespaces make kernel assocs sequences fry values
+io.files io.encodings.binary ;
 IN: xml.entities
 
 : entities-out
@@ -36,265 +37,7 @@ IN: xml.entities
         { "quot"  CHAR: "  }
     } ;
 
-: html-entities
-    #! generated from:
-    #! http://www.w3.org/TR/REC-html40/sgml/entities.html
-    H{
-        { "nbsp"   160 }
-        { "iexcl"  161 }
-        { "cent"   162 }
-        { "pound"  163 }
-        { "curren" 164 }
-        { "yen"    165 }
-        { "brvbar" 166 }
-        { "sect"   167 }
-        { "uml"    168 }
-        { "copy"   169 }
-        { "ordf"   170 }
-        { "laquo"  171 }
-        { "not"    172 }
-        { "shy"    173 }
-        { "reg"    174 }
-        { "macr"   175 }
-        { "deg"    176 }
-        { "plusmn" 177 }
-        { "sup2"   178 }
-        { "sup3"   179 }
-        { "acute"  180 }
-        { "micro"  181 }
-        { "para"   182 }
-        { "middot" 183 }
-        { "cedil"  184 }
-        { "sup1"   185 }
-        { "ordm"   186 }
-        { "raquo"  187 }
-        { "frac14" 188 }
-        { "frac12" 189 }
-        { "frac34" 190 }
-        { "iquest" 191 }
-        { "Agrave" 192 }
-        { "Aacute" 193 }
-        { "Acirc"  194 }
-        { "Atilde" 195 }
-        { "Auml"   196 }
-        { "Aring"  197 }
-        { "AElig"  198 }
-        { "Ccedil" 199 }
-        { "Egrave" 200 }
-        { "Eacute" 201 }
-        { "Ecirc"  202 }
-        { "Euml"   203 }
-        { "Igrave" 204 }
-        { "Iacute" 205 }
-        { "Icirc"  206 }
-        { "Iuml"   207 }
-        { "ETH"    208 }
-        { "Ntilde" 209 }
-        { "Ograve" 210 }
-        { "Oacute" 211 }
-        { "Ocirc"  212 }
-        { "Otilde" 213 }
-        { "Ouml"   214 }
-        { "times"  215 }
-        { "Oslash" 216 }
-        { "Ugrave" 217 }
-        { "Uacute" 218 }
-        { "Ucirc"  219 }
-        { "Uuml"   220 }
-        { "Yacute" 221 }
-        { "THORN"  222 }
-        { "szlig"  223 }
-        { "agrave" 224 }
-        { "aacute" 225 }
-        { "acirc"  226 }
-        { "atilde" 227 }
-        { "auml"   228 }
-        { "aring"  229 }
-        { "aelig"  230 }
-        { "ccedil" 231 }
-        { "egrave" 232 }
-        { "eacute" 233 }
-        { "ecirc"  234 }
-        { "euml"   235 }
-        { "igrave" 236 }
-        { "iacute" 237 }
-        { "icirc"  238 }
-        { "iuml"   239 }
-        { "eth"    240 }
-        { "ntilde" 241 }
-        { "ograve" 242 }
-        { "oacute" 243 }
-        { "ocirc"  244 }
-        { "otilde" 245 }
-        { "ouml"   246 }
-        { "divide" 247 }
-        { "oslash" 248 }
-        { "ugrave" 249 }
-        { "uacute" 250 }
-        { "ucirc"  251 }
-        { "uuml"   252 }
-        { "yacute" 253 }
-        { "thorn"  254 }
-        { "yuml"   255 }
-        { "fnof"     402 }
-        { "Alpha"    913 }
-        { "Beta"     914 }
-        { "Gamma"    915 }
-        { "Delta"    916 }
-        { "Epsilon"  917 }
-        { "Zeta"     918 }
-        { "Eta"      919 }
-        { "Theta"    920 }
-        { "Iota"     921 }
-        { "Kappa"    922 }
-        { "Lambda"   923 }
-        { "Mu"       924 }
-        { "Nu"       925 }
-        { "Xi"       926 }
-        { "Omicron"  927 }
-        { "Pi"       928 }
-        { "Rho"      929 }
-        { "Sigma"    931 }
-        { "Tau"      932 }
-        { "Upsilon"  933 }
-        { "Phi"      934 }
-        { "Chi"      935 }
-        { "Psi"      936 }
-        { "Omega"    937 }
-        { "alpha"    945 }
-        { "beta"     946 }
-        { "gamma"    947 }
-        { "delta"    948 }
-        { "epsilon"  949 }
-        { "zeta"     950 }
-        { "eta"      951 }
-        { "theta"    952 }
-        { "iota"     953 }
-        { "kappa"    954 }
-        { "lambda"   955 }
-        { "mu"       956 }
-        { "nu"       957 }
-        { "xi"       958 }
-        { "omicron"  959 }
-        { "pi"       960 }
-        { "rho"      961 }
-        { "sigmaf"   962 }
-        { "sigma"    963 }
-        { "tau"      964 }
-        { "upsilon"  965 }
-        { "phi"      966 }
-        { "chi"      967 }
-        { "psi"      968 }
-        { "omega"    969 }
-        { "thetasym" 977 }
-        { "upsih"    978 }
-        { "piv"      982 }
-        { "bull"     8226 }
-        { "hellip"   8230 }
-        { "prime"    8242 }
-        { "Prime"    8243 }
-        { "oline"    8254 }
-        { "frasl"    8260 }
-        { "weierp"   8472 }
-        { "image"    8465 }
-        { "real"     8476 }
-        { "trade"    8482 }
-        { "alefsym"  8501 }
-        { "larr"     8592 }
-        { "uarr"     8593 }
-        { "rarr"     8594 }
-        { "darr"     8595 }
-        { "harr"     8596 }
-        { "crarr"    8629 }
-        { "lArr"     8656 }
-        { "uArr"     8657 }
-        { "rArr"     8658 }
-        { "dArr"     8659 }
-        { "hArr"     8660 }
-        { "forall"   8704 }
-        { "part"     8706 }
-        { "exist"    8707 }
-        { "empty"    8709 }
-        { "nabla"    8711 }
-        { "isin"     8712 }
-        { "notin"    8713 }
-        { "ni"       8715 }
-        { "prod"     8719 }
-        { "sum"      8721 }
-        { "minus"    8722 }
-        { "lowast"   8727 }
-        { "radic"    8730 }
-        { "prop"     8733 }
-        { "infin"    8734 }
-        { "ang"      8736 }
-        { "and"      8743 }
-        { "or"       8744 }
-        { "cap"      8745 }
-        { "cup"      8746 }
-        { "int"      8747 }
-        { "there4"   8756 }
-        { "sim"      8764 }
-        { "cong"     8773 }
-        { "asymp"    8776 }
-        { "ne"       8800 }
-        { "equiv"    8801 }
-        { "le"       8804 }
-        { "ge"       8805 }
-        { "sub"      8834 }
-        { "sup"      8835 }
-        { "nsub"     8836 }
-        { "sube"     8838 }
-        { "supe"     8839 }
-        { "oplus"    8853 }
-        { "otimes"   8855 }
-        { "perp"     8869 }
-        { "sdot"     8901 }
-        { "lceil"    8968 }
-        { "rceil"    8969 }
-        { "lfloor"   8970 }
-        { "rfloor"   8971 }
-        { "lang"     9001 }
-        { "rang"     9002 }
-        { "loz"      9674 }
-        { "spades"   9824 }
-        { "clubs"    9827 }
-        { "hearts"   9829 }
-        { "diams"    9830 }
-        { "OElig"   338 }
-        { "oelig"   339 }
-        { "Scaron"  352 }
-        { "scaron"  353 }
-        { "Yuml"    376 }
-        { "circ"    710 }
-        { "tilde"   732 }
-        { "ensp"    8194 }
-        { "emsp"    8195 }
-        { "thinsp"  8201 }
-        { "zwnj"    8204 }
-        { "zwj"     8205 }
-        { "lrm"     8206 }
-        { "rlm"     8207 }
-        { "ndash"   8211 }
-        { "mdash"   8212 }
-        { "lsquo"   8216 }
-        { "rsquo"   8217 }
-        { "sbquo"   8218 }
-        { "ldquo"   8220 }
-        { "rdquo"   8221 }
-        { "bdquo"   8222 }
-        { "dagger"  8224 }
-        { "Dagger"  8225 }
-        { "permil"  8240 }
-        { "lsaquo"  8249 }
-        { "rsaquo"  8250 }
-        { "euro"   8364 }
-    } ;
-
 SYMBOL: extra-entities
-f extra-entities set-global
 
 : with-entities ( entities quot -- )
     [ swap extra-entities set call ] with-scope ; inline
-
-: with-html-entities ( quot -- )
-    html-entities swap with-entities ; inline
diff --git a/basis/xml/entities/html/authors.txt b/basis/xml/entities/html/authors.txt
new file mode 100644
index 0000000000..29e79639ae
--- /dev/null
+++ b/basis/xml/entities/html/authors.txt
@@ -0,0 +1 @@
+Daniel Ehrenberg
\ No newline at end of file
diff --git a/basis/xml/entities/html/html-tests.factor b/basis/xml/entities/html/html-tests.factor
new file mode 100644
index 0000000000..68b10bebe7
--- /dev/null
+++ b/basis/xml/entities/html/html-tests.factor
@@ -0,0 +1,4 @@
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test xml.entities.html ;
+IN: xml.entities.html.tests
diff --git a/basis/xml/entities/html/html.factor b/basis/xml/entities/html/html.factor
new file mode 100644
index 0000000000..6f2732f1d9
--- /dev/null
+++ b/basis/xml/entities/html/html.factor
@@ -0,0 +1,22 @@
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: assocs io.encodings.binary io.files kernel namespaces sequences
+values xml xml.entities ;
+IN: xml.entities.html
+
+VALUE: html-entities
+
+: read-entities-file ( file -- table )
+    f swap binary <file-reader>
+    [ 2drop extra-entities get ] sax ;
+
+: get-html ( -- table )
+    { "lat1" "special" "symbol" } [
+        "resource:basis/xml/entities/html/xhtml-"
+        swap ".ent" 3append read-entities-file
+    ] map first3 assoc-union assoc-union ;
+
+get-html to: html-entities
+
+: with-html-entities ( quot -- )
+    html-entities swap with-entities ; inline
diff --git a/basis/xml/entities/html/xhtml-lat1.ent b/basis/xml/entities/html/xhtml-lat1.ent
new file mode 100644
index 0000000000..ffee223eb1
--- /dev/null
+++ b/basis/xml/entities/html/xhtml-lat1.ent
@@ -0,0 +1,196 @@
+<!-- Portions (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+-->
+<!-- Character entity set. Typical invocation:
+    <!ENTITY % HTMLlat1 PUBLIC
+       "-//W3C//ENTITIES Latin 1 for XHTML//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
+    %HTMLlat1;
+-->
+
+<!ENTITY nbsp   "&#160;"> <!-- no-break space = non-breaking space,
+                                  U+00A0 ISOnum -->
+<!ENTITY iexcl  "&#161;"> <!-- inverted exclamation mark, U+00A1 ISOnum -->
+<!ENTITY cent   "&#162;"> <!-- cent sign, U+00A2 ISOnum -->
+<!ENTITY pound  "&#163;"> <!-- pound sign, U+00A3 ISOnum -->
+<!ENTITY curren "&#164;"> <!-- currency sign, U+00A4 ISOnum -->
+<!ENTITY yen    "&#165;"> <!-- yen sign = yuan sign, U+00A5 ISOnum -->
+<!ENTITY brvbar "&#166;"> <!-- broken bar = broken vertical bar,
+                                  U+00A6 ISOnum -->
+<!ENTITY sect   "&#167;"> <!-- section sign, U+00A7 ISOnum -->
+<!ENTITY uml    "&#168;"> <!-- diaeresis = spacing diaeresis,
+                                  U+00A8 ISOdia -->
+<!ENTITY copy   "&#169;"> <!-- copyright sign, U+00A9 ISOnum -->
+<!ENTITY ordf   "&#170;"> <!-- feminine ordinal indicator, U+00AA ISOnum -->
+<!ENTITY laquo  "&#171;"> <!-- left-pointing double angle quotation mark
+                                  = left pointing guillemet, U+00AB ISOnum -->
+<!ENTITY not    "&#172;"> <!-- not sign = angled dash,
+                                  U+00AC ISOnum -->
+<!ENTITY shy    "&#173;"> <!-- soft hyphen = discretionary hyphen,
+                                  U+00AD ISOnum -->
+<!ENTITY reg    "&#174;"> <!-- registered sign = registered trade mark sign,
+                                  U+00AE ISOnum -->
+<!ENTITY macr   "&#175;"> <!-- macron = spacing macron = overline
+                                  = APL overbar, U+00AF ISOdia -->
+<!ENTITY deg    "&#176;"> <!-- degree sign, U+00B0 ISOnum -->
+<!ENTITY plusmn "&#177;"> <!-- plus-minus sign = plus-or-minus sign,
+                                  U+00B1 ISOnum -->
+<!ENTITY sup2   "&#178;"> <!-- superscript two = superscript digit two
+                                  = squared, U+00B2 ISOnum -->
+<!ENTITY sup3   "&#179;"> <!-- superscript three = superscript digit three
+                                  = cubed, U+00B3 ISOnum -->
+<!ENTITY acute  "&#180;"> <!-- acute accent = spacing acute,
+                                  U+00B4 ISOdia -->
+<!ENTITY micro  "&#181;"> <!-- micro sign, U+00B5 ISOnum -->
+<!ENTITY para   "&#182;"> <!-- pilcrow sign = paragraph sign,
+                                  U+00B6 ISOnum -->
+<!ENTITY middot "&#183;"> <!-- middle dot = Georgian comma
+                                  = Greek middle dot, U+00B7 ISOnum -->
+<!ENTITY cedil  "&#184;"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
+<!ENTITY sup1   "&#185;"> <!-- superscript one = superscript digit one,
+                                  U+00B9 ISOnum -->
+<!ENTITY ordm   "&#186;"> <!-- masculine ordinal indicator,
+                                  U+00BA ISOnum -->
+<!ENTITY raquo  "&#187;"> <!-- right-pointing double angle quotation mark
+                                  = right pointing guillemet, U+00BB ISOnum -->
+<!ENTITY frac14 "&#188;"> <!-- vulgar fraction one quarter
+                                  = fraction one quarter, U+00BC ISOnum -->
+<!ENTITY frac12 "&#189;"> <!-- vulgar fraction one half
+                                  = fraction one half, U+00BD ISOnum -->
+<!ENTITY frac34 "&#190;"> <!-- vulgar fraction three quarters
+                                  = fraction three quarters, U+00BE ISOnum -->
+<!ENTITY iquest "&#191;"> <!-- inverted question mark
+                                  = turned question mark, U+00BF ISOnum -->
+<!ENTITY Agrave "&#192;"> <!-- latin capital letter A with grave
+                                  = latin capital letter A grave,
+                                  U+00C0 ISOlat1 -->
+<!ENTITY Aacute "&#193;"> <!-- latin capital letter A with acute,
+                                  U+00C1 ISOlat1 -->
+<!ENTITY Acirc  "&#194;"> <!-- latin capital letter A with circumflex,
+                                  U+00C2 ISOlat1 -->
+<!ENTITY Atilde "&#195;"> <!-- latin capital letter A with tilde,
+                                  U+00C3 ISOlat1 -->
+<!ENTITY Auml   "&#196;"> <!-- latin capital letter A with diaeresis,
+                                  U+00C4 ISOlat1 -->
+<!ENTITY Aring  "&#197;"> <!-- latin capital letter A with ring above
+                                  = latin capital letter A ring,
+                                  U+00C5 ISOlat1 -->
+<!ENTITY AElig  "&#198;"> <!-- latin capital letter AE
+                                  = latin capital ligature AE,
+                                  U+00C6 ISOlat1 -->
+<!ENTITY Ccedil "&#199;"> <!-- latin capital letter C with cedilla,
+                                  U+00C7 ISOlat1 -->
+<!ENTITY Egrave "&#200;"> <!-- latin capital letter E with grave,
+                                  U+00C8 ISOlat1 -->
+<!ENTITY Eacute "&#201;"> <!-- latin capital letter E with acute,
+                                  U+00C9 ISOlat1 -->
+<!ENTITY Ecirc  "&#202;"> <!-- latin capital letter E with circumflex,
+                                  U+00CA ISOlat1 -->
+<!ENTITY Euml   "&#203;"> <!-- latin capital letter E with diaeresis,
+                                  U+00CB ISOlat1 -->
+<!ENTITY Igrave "&#204;"> <!-- latin capital letter I with grave,
+                                  U+00CC ISOlat1 -->
+<!ENTITY Iacute "&#205;"> <!-- latin capital letter I with acute,
+                                  U+00CD ISOlat1 -->
+<!ENTITY Icirc  "&#206;"> <!-- latin capital letter I with circumflex,
+                                  U+00CE ISOlat1 -->
+<!ENTITY Iuml   "&#207;"> <!-- latin capital letter I with diaeresis,
+                                  U+00CF ISOlat1 -->
+<!ENTITY ETH    "&#208;"> <!-- latin capital letter ETH, U+00D0 ISOlat1 -->
+<!ENTITY Ntilde "&#209;"> <!-- latin capital letter N with tilde,
+                                  U+00D1 ISOlat1 -->
+<!ENTITY Ograve "&#210;"> <!-- latin capital letter O with grave,
+                                  U+00D2 ISOlat1 -->
+<!ENTITY Oacute "&#211;"> <!-- latin capital letter O with acute,
+                                  U+00D3 ISOlat1 -->
+<!ENTITY Ocirc  "&#212;"> <!-- latin capital letter O with circumflex,
+                                  U+00D4 ISOlat1 -->
+<!ENTITY Otilde "&#213;"> <!-- latin capital letter O with tilde,
+                                  U+00D5 ISOlat1 -->
+<!ENTITY Ouml   "&#214;"> <!-- latin capital letter O with diaeresis,
+                                  U+00D6 ISOlat1 -->
+<!ENTITY times  "&#215;"> <!-- multiplication sign, U+00D7 ISOnum -->
+<!ENTITY Oslash "&#216;"> <!-- latin capital letter O with stroke
+                                  = latin capital letter O slash,
+                                  U+00D8 ISOlat1 -->
+<!ENTITY Ugrave "&#217;"> <!-- latin capital letter U with grave,
+                                  U+00D9 ISOlat1 -->
+<!ENTITY Uacute "&#218;"> <!-- latin capital letter U with acute,
+                                  U+00DA ISOlat1 -->
+<!ENTITY Ucirc  "&#219;"> <!-- latin capital letter U with circumflex,
+                                  U+00DB ISOlat1 -->
+<!ENTITY Uuml   "&#220;"> <!-- latin capital letter U with diaeresis,
+                                  U+00DC ISOlat1 -->
+<!ENTITY Yacute "&#221;"> <!-- latin capital letter Y with acute,
+                                  U+00DD ISOlat1 -->
+<!ENTITY THORN  "&#222;"> <!-- latin capital letter THORN,
+                                  U+00DE ISOlat1 -->
+<!ENTITY szlig  "&#223;"> <!-- latin small letter sharp s = ess-zed,
+                                  U+00DF ISOlat1 -->
+<!ENTITY agrave "&#224;"> <!-- latin small letter a with grave
+                                  = latin small letter a grave,
+                                  U+00E0 ISOlat1 -->
+<!ENTITY aacute "&#225;"> <!-- latin small letter a with acute,
+                                  U+00E1 ISOlat1 -->
+<!ENTITY acirc  "&#226;"> <!-- latin small letter a with circumflex,
+                                  U+00E2 ISOlat1 -->
+<!ENTITY atilde "&#227;"> <!-- latin small letter a with tilde,
+                                  U+00E3 ISOlat1 -->
+<!ENTITY auml   "&#228;"> <!-- latin small letter a with diaeresis,
+                                  U+00E4 ISOlat1 -->
+<!ENTITY aring  "&#229;"> <!-- latin small letter a with ring above
+                                  = latin small letter a ring,
+                                  U+00E5 ISOlat1 -->
+<!ENTITY aelig  "&#230;"> <!-- latin small letter ae
+                                  = latin small ligature ae, U+00E6 ISOlat1 -->
+<!ENTITY ccedil "&#231;"> <!-- latin small letter c with cedilla,
+                                  U+00E7 ISOlat1 -->
+<!ENTITY egrave "&#232;"> <!-- latin small letter e with grave,
+                                  U+00E8 ISOlat1 -->
+<!ENTITY eacute "&#233;"> <!-- latin small letter e with acute,
+                                  U+00E9 ISOlat1 -->
+<!ENTITY ecirc  "&#234;"> <!-- latin small letter e with circumflex,
+                                  U+00EA ISOlat1 -->
+<!ENTITY euml   "&#235;"> <!-- latin small letter e with diaeresis,
+                                  U+00EB ISOlat1 -->
+<!ENTITY igrave "&#236;"> <!-- latin small letter i with grave,
+                                  U+00EC ISOlat1 -->
+<!ENTITY iacute "&#237;"> <!-- latin small letter i with acute,
+                                  U+00ED ISOlat1 -->
+<!ENTITY icirc  "&#238;"> <!-- latin small letter i with circumflex,
+                                  U+00EE ISOlat1 -->
+<!ENTITY iuml   "&#239;"> <!-- latin small letter i with diaeresis,
+                                  U+00EF ISOlat1 -->
+<!ENTITY eth    "&#240;"> <!-- latin small letter eth, U+00F0 ISOlat1 -->
+<!ENTITY ntilde "&#241;"> <!-- latin small letter n with tilde,
+                                  U+00F1 ISOlat1 -->
+<!ENTITY ograve "&#242;"> <!-- latin small letter o with grave,
+                                  U+00F2 ISOlat1 -->
+<!ENTITY oacute "&#243;"> <!-- latin small letter o with acute,
+                                  U+00F3 ISOlat1 -->
+<!ENTITY ocirc  "&#244;"> <!-- latin small letter o with circumflex,
+                                  U+00F4 ISOlat1 -->
+<!ENTITY otilde "&#245;"> <!-- latin small letter o with tilde,
+                                  U+00F5 ISOlat1 -->
+<!ENTITY ouml   "&#246;"> <!-- latin small letter o with diaeresis,
+                                  U+00F6 ISOlat1 -->
+<!ENTITY divide "&#247;"> <!-- division sign, U+00F7 ISOnum -->
+<!ENTITY oslash "&#248;"> <!-- latin small letter o with stroke,
+                                  = latin small letter o slash,
+                                  U+00F8 ISOlat1 -->
+<!ENTITY ugrave "&#249;"> <!-- latin small letter u with grave,
+                                  U+00F9 ISOlat1 -->
+<!ENTITY uacute "&#250;"> <!-- latin small letter u with acute,
+                                  U+00FA ISOlat1 -->
+<!ENTITY ucirc  "&#251;"> <!-- latin small letter u with circumflex,
+                                  U+00FB ISOlat1 -->
+<!ENTITY uuml   "&#252;"> <!-- latin small letter u with diaeresis,
+                                  U+00FC ISOlat1 -->
+<!ENTITY yacute "&#253;"> <!-- latin small letter y with acute,
+                                  U+00FD ISOlat1 -->
+<!ENTITY thorn  "&#254;"> <!-- latin small letter thorn,
+                                  U+00FE ISOlat1 -->
+<!ENTITY yuml   "&#255;"> <!-- latin small letter y with diaeresis,
+                                  U+00FF ISOlat1 -->
diff --git a/basis/xml/entities/html/xhtml-special.ent b/basis/xml/entities/html/xhtml-special.ent
new file mode 100644
index 0000000000..ca358b2fec
--- /dev/null
+++ b/basis/xml/entities/html/xhtml-special.ent
@@ -0,0 +1,80 @@
+<!-- Special characters for XHTML -->
+
+<!-- Character entity set. Typical invocation:
+     <!ENTITY % HTMLspecial PUBLIC
+        "-//W3C//ENTITIES Special for XHTML//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent">
+     %HTMLspecial;
+-->
+
+<!-- Portions (C) International Organization for Standardization 1986:
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+-->
+
+<!-- Relevant ISO entity set is given unless names are newly introduced.
+     New names (i.e., not in ISO 8879 list) do not clash with any
+     existing ISO 8879 entity names. ISO 10646 character numbers
+     are given for each character, in hex. values are decimal
+     conversions of the ISO 10646 values and refer to the document
+     character set. Names are Unicode names. 
+-->
+
+<!-- C0 Controls and Basic Latin -->
+<!ENTITY quot    "&#34;"> <!--  quotation mark, U+0022 ISOnum -->
+<!ENTITY amp     "&#38;#38;"> <!--  ampersand, U+0026 ISOnum -->
+<!ENTITY lt      "&#38;#60;"> <!--  less-than sign, U+003C ISOnum -->
+<!ENTITY gt      "&#62;"> <!--  greater-than sign, U+003E ISOnum -->
+<!ENTITY apos	 "&#39;"> <!--  apostrophe = APL quote, U+0027 ISOnum -->
+
+<!-- Latin Extended-A -->
+<!ENTITY OElig   "&#338;"> <!--  latin capital ligature OE,
+                                    U+0152 ISOlat2 -->
+<!ENTITY oelig   "&#339;"> <!--  latin small ligature oe, U+0153 ISOlat2 -->
+<!-- ligature is a misnomer, this is a separate character in some languages -->
+<!ENTITY Scaron  "&#352;"> <!--  latin capital letter S with caron,
+                                    U+0160 ISOlat2 -->
+<!ENTITY scaron  "&#353;"> <!--  latin small letter s with caron,
+                                    U+0161 ISOlat2 -->
+<!ENTITY Yuml    "&#376;"> <!--  latin capital letter Y with diaeresis,
+                                    U+0178 ISOlat2 -->
+
+<!-- Spacing Modifier Letters -->
+<!ENTITY circ    "&#710;"> <!--  modifier letter circumflex accent,
+                                    U+02C6 ISOpub -->
+<!ENTITY tilde   "&#732;"> <!--  small tilde, U+02DC ISOdia -->
+
+<!-- General Punctuation -->
+<!ENTITY ensp    "&#8194;"> <!-- en space, U+2002 ISOpub -->
+<!ENTITY emsp    "&#8195;"> <!-- em space, U+2003 ISOpub -->
+<!ENTITY thinsp  "&#8201;"> <!-- thin space, U+2009 ISOpub -->
+<!ENTITY zwnj    "&#8204;"> <!-- zero width non-joiner,
+                                    U+200C NEW RFC 2070 -->
+<!ENTITY zwj     "&#8205;"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
+<!ENTITY lrm     "&#8206;"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
+<!ENTITY rlm     "&#8207;"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
+<!ENTITY ndash   "&#8211;"> <!-- en dash, U+2013 ISOpub -->
+<!ENTITY mdash   "&#8212;"> <!-- em dash, U+2014 ISOpub -->
+<!ENTITY lsquo   "&#8216;"> <!-- left single quotation mark,
+                                    U+2018 ISOnum -->
+<!ENTITY rsquo   "&#8217;"> <!-- right single quotation mark,
+                                    U+2019 ISOnum -->
+<!ENTITY sbquo   "&#8218;"> <!-- single low-9 quotation mark, U+201A NEW -->
+<!ENTITY ldquo   "&#8220;"> <!-- left double quotation mark,
+                                    U+201C ISOnum -->
+<!ENTITY rdquo   "&#8221;"> <!-- right double quotation mark,
+                                    U+201D ISOnum -->
+<!ENTITY bdquo   "&#8222;"> <!-- double low-9 quotation mark, U+201E NEW -->
+<!ENTITY dagger  "&#8224;"> <!-- dagger, U+2020 ISOpub -->
+<!ENTITY Dagger  "&#8225;"> <!-- double dagger, U+2021 ISOpub -->
+<!ENTITY permil  "&#8240;"> <!-- per mille sign, U+2030 ISOtech -->
+<!ENTITY lsaquo  "&#8249;"> <!-- single left-pointing angle quotation mark,
+                                    U+2039 ISO proposed -->
+<!-- lsaquo is proposed but not yet ISO standardized -->
+<!ENTITY rsaquo  "&#8250;"> <!-- single right-pointing angle quotation mark,
+                                    U+203A ISO proposed -->
+<!-- rsaquo is proposed but not yet ISO standardized -->
+
+<!-- Currency Symbols -->
+<!ENTITY euro   "&#8364;"> <!--  euro sign, U+20AC NEW -->
diff --git a/basis/xml/entities/html/xhtml-symbol.ent b/basis/xml/entities/html/xhtml-symbol.ent
new file mode 100644
index 0000000000..63c2abfa6f
--- /dev/null
+++ b/basis/xml/entities/html/xhtml-symbol.ent
@@ -0,0 +1,237 @@
+<!-- Mathematical, Greek and Symbolic characters for XHTML -->
+
+<!-- Character entity set. Typical invocation:
+     <!ENTITY % HTMLsymbol PUBLIC
+        "-//W3C//ENTITIES Symbols for XHTML//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent">
+     %HTMLsymbol;
+-->
+
+<!-- Portions (C) International Organization for Standardization 1986:
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+-->
+
+<!-- Relevant ISO entity set is given unless names are newly introduced.
+     New names (i.e., not in ISO 8879 list) do not clash with any
+     existing ISO 8879 entity names. ISO 10646 character numbers
+     are given for each character, in hex. values are decimal
+     conversions of the ISO 10646 values and refer to the document
+     character set. Names are Unicode names. 
+-->
+
+<!-- Latin Extended-B -->
+<!ENTITY fnof     "&#402;"> <!-- latin small letter f with hook = function
+                                    = florin, U+0192 ISOtech -->
+
+<!-- Greek -->
+<!ENTITY Alpha    "&#913;"> <!-- greek capital letter alpha, U+0391 -->
+<!ENTITY Beta     "&#914;"> <!-- greek capital letter beta, U+0392 -->
+<!ENTITY Gamma    "&#915;"> <!-- greek capital letter gamma,
+                                    U+0393 ISOgrk3 -->
+<!ENTITY Delta    "&#916;"> <!-- greek capital letter delta,
+                                    U+0394 ISOgrk3 -->
+<!ENTITY Epsilon  "&#917;"> <!-- greek capital letter epsilon, U+0395 -->
+<!ENTITY Zeta     "&#918;"> <!-- greek capital letter zeta, U+0396 -->
+<!ENTITY Eta      "&#919;"> <!-- greek capital letter eta, U+0397 -->
+<!ENTITY Theta    "&#920;"> <!-- greek capital letter theta,
+                                    U+0398 ISOgrk3 -->
+<!ENTITY Iota     "&#921;"> <!-- greek capital letter iota, U+0399 -->
+<!ENTITY Kappa    "&#922;"> <!-- greek capital letter kappa, U+039A -->
+<!ENTITY Lambda   "&#923;"> <!-- greek capital letter lamda,
+                                    U+039B ISOgrk3 -->
+<!ENTITY Mu       "&#924;"> <!-- greek capital letter mu, U+039C -->
+<!ENTITY Nu       "&#925;"> <!-- greek capital letter nu, U+039D -->
+<!ENTITY Xi       "&#926;"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
+<!ENTITY Omicron  "&#927;"> <!-- greek capital letter omicron, U+039F -->
+<!ENTITY Pi       "&#928;"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
+<!ENTITY Rho      "&#929;"> <!-- greek capital letter rho, U+03A1 -->
+<!-- there is no Sigmaf, and no U+03A2 character either -->
+<!ENTITY Sigma    "&#931;"> <!-- greek capital letter sigma,
+                                    U+03A3 ISOgrk3 -->
+<!ENTITY Tau      "&#932;"> <!-- greek capital letter tau, U+03A4 -->
+<!ENTITY Upsilon  "&#933;"> <!-- greek capital letter upsilon,
+                                    U+03A5 ISOgrk3 -->
+<!ENTITY Phi      "&#934;"> <!-- greek capital letter phi,
+                                    U+03A6 ISOgrk3 -->
+<!ENTITY Chi      "&#935;"> <!-- greek capital letter chi, U+03A7 -->
+<!ENTITY Psi      "&#936;"> <!-- greek capital letter psi,
+                                    U+03A8 ISOgrk3 -->
+<!ENTITY Omega    "&#937;"> <!-- greek capital letter omega,
+                                    U+03A9 ISOgrk3 -->
+
+<!ENTITY alpha    "&#945;"> <!-- greek small letter alpha,
+                                    U+03B1 ISOgrk3 -->
+<!ENTITY beta     "&#946;"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
+<!ENTITY gamma    "&#947;"> <!-- greek small letter gamma,
+                                    U+03B3 ISOgrk3 -->
+<!ENTITY delta    "&#948;"> <!-- greek small letter delta,
+                                    U+03B4 ISOgrk3 -->
+<!ENTITY epsilon  "&#949;"> <!-- greek small letter epsilon,
+                                    U+03B5 ISOgrk3 -->
+<!ENTITY zeta     "&#950;"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
+<!ENTITY eta      "&#951;"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
+<!ENTITY theta    "&#952;"> <!-- greek small letter theta,
+                                    U+03B8 ISOgrk3 -->
+<!ENTITY iota     "&#953;"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
+<!ENTITY kappa    "&#954;"> <!-- greek small letter kappa,
+                                    U+03BA ISOgrk3 -->
+<!ENTITY lambda   "&#955;"> <!-- greek small letter lamda,
+                                    U+03BB ISOgrk3 -->
+<!ENTITY mu       "&#956;"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
+<!ENTITY nu       "&#957;"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
+<!ENTITY xi       "&#958;"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
+<!ENTITY omicron  "&#959;"> <!-- greek small letter omicron, U+03BF NEW -->
+<!ENTITY pi       "&#960;"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
+<!ENTITY rho      "&#961;"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
+<!ENTITY sigmaf   "&#962;"> <!-- greek small letter final sigma,
+                                    U+03C2 ISOgrk3 -->
+<!ENTITY sigma    "&#963;"> <!-- greek small letter sigma,
+                                    U+03C3 ISOgrk3 -->
+<!ENTITY tau      "&#964;"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
+<!ENTITY upsilon  "&#965;"> <!-- greek small letter upsilon,
+                                    U+03C5 ISOgrk3 -->
+<!ENTITY phi      "&#966;"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
+<!ENTITY chi      "&#967;"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
+<!ENTITY psi      "&#968;"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
+<!ENTITY omega    "&#969;"> <!-- greek small letter omega,
+                                    U+03C9 ISOgrk3 -->
+<!ENTITY thetasym "&#977;"> <!-- greek theta symbol,
+                                    U+03D1 NEW -->
+<!ENTITY upsih    "&#978;"> <!-- greek upsilon with hook symbol,
+                                    U+03D2 NEW -->
+<!ENTITY piv      "&#982;"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
+
+<!-- General Punctuation -->
+<!ENTITY bull     "&#8226;"> <!-- bullet = black small circle,
+                                     U+2022 ISOpub  -->
+<!-- bullet is NOT the same as bullet operator, U+2219 -->
+<!ENTITY hellip   "&#8230;"> <!-- horizontal ellipsis = three dot leader,
+                                     U+2026 ISOpub  -->
+<!ENTITY prime    "&#8242;"> <!-- prime = minutes = feet, U+2032 ISOtech -->
+<!ENTITY Prime    "&#8243;"> <!-- double prime = seconds = inches,
+                                     U+2033 ISOtech -->
+<!ENTITY oline    "&#8254;"> <!-- overline = spacing overscore,
+                                     U+203E NEW -->
+<!ENTITY frasl    "&#8260;"> <!-- fraction slash, U+2044 NEW -->
+
+<!-- Letterlike Symbols -->
+<!ENTITY weierp   "&#8472;"> <!-- script capital P = power set
+                                     = Weierstrass p, U+2118 ISOamso -->
+<!ENTITY image    "&#8465;"> <!-- black-letter capital I = imaginary part,
+                                     U+2111 ISOamso -->
+<!ENTITY real     "&#8476;"> <!-- black-letter capital R = real part symbol,
+                                     U+211C ISOamso -->
+<!ENTITY trade    "&#8482;"> <!-- trade mark sign, U+2122 ISOnum -->
+<!ENTITY alefsym  "&#8501;"> <!-- alef symbol = first transfinite cardinal,
+                                     U+2135 NEW -->
+<!-- alef symbol is NOT the same as hebrew letter alef,
+     U+05D0 although the same glyph could be used to depict both characters -->
+
+<!-- Arrows -->
+<!ENTITY larr     "&#8592;"> <!-- leftwards arrow, U+2190 ISOnum -->
+<!ENTITY uarr     "&#8593;"> <!-- upwards arrow, U+2191 ISOnum-->
+<!ENTITY rarr     "&#8594;"> <!-- rightwards arrow, U+2192 ISOnum -->
+<!ENTITY darr     "&#8595;"> <!-- downwards arrow, U+2193 ISOnum -->
+<!ENTITY harr     "&#8596;"> <!-- left right arrow, U+2194 ISOamsa -->
+<!ENTITY crarr    "&#8629;"> <!-- downwards arrow with corner leftwards
+                                     = carriage return, U+21B5 NEW -->
+<!ENTITY lArr     "&#8656;"> <!-- leftwards double arrow, U+21D0 ISOtech -->
+<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
+    but also does not have any other character for that function. So lArr can
+    be used for 'is implied by' as ISOtech suggests -->
+<!ENTITY uArr     "&#8657;"> <!-- upwards double arrow, U+21D1 ISOamsa -->
+<!ENTITY rArr     "&#8658;"> <!-- rightwards double arrow,
+                                     U+21D2 ISOtech -->
+<!-- Unicode does not say this is the 'implies' character but does not have 
+     another character with this function so rArr can be used for 'implies'
+     as ISOtech suggests -->
+<!ENTITY dArr     "&#8659;"> <!-- downwards double arrow, U+21D3 ISOamsa -->
+<!ENTITY hArr     "&#8660;"> <!-- left right double arrow,
+                                     U+21D4 ISOamsa -->
+
+<!-- Mathematical Operators -->
+<!ENTITY forall   "&#8704;"> <!-- for all, U+2200 ISOtech -->
+<!ENTITY part     "&#8706;"> <!-- partial differential, U+2202 ISOtech  -->
+<!ENTITY exist    "&#8707;"> <!-- there exists, U+2203 ISOtech -->
+<!ENTITY empty    "&#8709;"> <!-- empty set = null set, U+2205 ISOamso -->
+<!ENTITY nabla    "&#8711;"> <!-- nabla = backward difference,
+                                     U+2207 ISOtech -->
+<!ENTITY isin     "&#8712;"> <!-- element of, U+2208 ISOtech -->
+<!ENTITY notin    "&#8713;"> <!-- not an element of, U+2209 ISOtech -->
+<!ENTITY ni       "&#8715;"> <!-- contains as member, U+220B ISOtech -->
+<!ENTITY prod     "&#8719;"> <!-- n-ary product = product sign,
+                                     U+220F ISOamsb -->
+<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
+     the same glyph might be used for both -->
+<!ENTITY sum      "&#8721;"> <!-- n-ary summation, U+2211 ISOamsb -->
+<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
+     though the same glyph might be used for both -->
+<!ENTITY minus    "&#8722;"> <!-- minus sign, U+2212 ISOtech -->
+<!ENTITY lowast   "&#8727;"> <!-- asterisk operator, U+2217 ISOtech -->
+<!ENTITY radic    "&#8730;"> <!-- square root = radical sign,
+                                     U+221A ISOtech -->
+<!ENTITY prop     "&#8733;"> <!-- proportional to, U+221D ISOtech -->
+<!ENTITY infin    "&#8734;"> <!-- infinity, U+221E ISOtech -->
+<!ENTITY ang      "&#8736;"> <!-- angle, U+2220 ISOamso -->
+<!ENTITY and      "&#8743;"> <!-- logical and = wedge, U+2227 ISOtech -->
+<!ENTITY or       "&#8744;"> <!-- logical or = vee, U+2228 ISOtech -->
+<!ENTITY cap      "&#8745;"> <!-- intersection = cap, U+2229 ISOtech -->
+<!ENTITY cup      "&#8746;"> <!-- union = cup, U+222A ISOtech -->
+<!ENTITY int      "&#8747;"> <!-- integral, U+222B ISOtech -->
+<!ENTITY there4   "&#8756;"> <!-- therefore, U+2234 ISOtech -->
+<!ENTITY sim      "&#8764;"> <!-- tilde operator = varies with = similar to,
+                                     U+223C ISOtech -->
+<!-- tilde operator is NOT the same character as the tilde, U+007E,
+     although the same glyph might be used to represent both  -->
+<!ENTITY cong     "&#8773;"> <!-- approximately equal to, U+2245 ISOtech -->
+<!ENTITY asymp    "&#8776;"> <!-- almost equal to = asymptotic to,
+                                     U+2248 ISOamsr -->
+<!ENTITY ne       "&#8800;"> <!-- not equal to, U+2260 ISOtech -->
+<!ENTITY equiv    "&#8801;"> <!-- identical to, U+2261 ISOtech -->
+<!ENTITY le       "&#8804;"> <!-- less-than or equal to, U+2264 ISOtech -->
+<!ENTITY ge       "&#8805;"> <!-- greater-than or equal to,
+                                     U+2265 ISOtech -->
+<!ENTITY sub      "&#8834;"> <!-- subset of, U+2282 ISOtech -->
+<!ENTITY sup      "&#8835;"> <!-- superset of, U+2283 ISOtech -->
+<!ENTITY nsub     "&#8836;"> <!-- not a subset of, U+2284 ISOamsn -->
+<!ENTITY sube     "&#8838;"> <!-- subset of or equal to, U+2286 ISOtech -->
+<!ENTITY supe     "&#8839;"> <!-- superset of or equal to,
+                                     U+2287 ISOtech -->
+<!ENTITY oplus    "&#8853;"> <!-- circled plus = direct sum,
+                                     U+2295 ISOamsb -->
+<!ENTITY otimes   "&#8855;"> <!-- circled times = vector product,
+                                     U+2297 ISOamsb -->
+<!ENTITY perp     "&#8869;"> <!-- up tack = orthogonal to = perpendicular,
+                                     U+22A5 ISOtech -->
+<!ENTITY sdot     "&#8901;"> <!-- dot operator, U+22C5 ISOamsb -->
+<!-- dot operator is NOT the same character as U+00B7 middle dot -->
+
+<!-- Miscellaneous Technical -->
+<!ENTITY lceil    "&#8968;"> <!-- left ceiling = APL upstile,
+                                     U+2308 ISOamsc  -->
+<!ENTITY rceil    "&#8969;"> <!-- right ceiling, U+2309 ISOamsc  -->
+<!ENTITY lfloor   "&#8970;"> <!-- left floor = APL downstile,
+                                     U+230A ISOamsc  -->
+<!ENTITY rfloor   "&#8971;"> <!-- right floor, U+230B ISOamsc  -->
+<!ENTITY lang     "&#9001;"> <!-- left-pointing angle bracket = bra,
+                                     U+2329 ISOtech -->
+<!-- lang is NOT the same character as U+003C 'less than sign' 
+     or U+2039 'single left-pointing angle quotation mark' -->
+<!ENTITY rang     "&#9002;"> <!-- right-pointing angle bracket = ket,
+                                     U+232A ISOtech -->
+<!-- rang is NOT the same character as U+003E 'greater than sign' 
+     or U+203A 'single right-pointing angle quotation mark' -->
+
+<!-- Geometric Shapes -->
+<!ENTITY loz      "&#9674;"> <!-- lozenge, U+25CA ISOpub -->
+
+<!-- Miscellaneous Symbols -->
+<!ENTITY spades   "&#9824;"> <!-- black spade suit, U+2660 ISOpub -->
+<!-- black here seems to mean filled as opposed to hollow -->
+<!ENTITY clubs    "&#9827;"> <!-- black club suit = shamrock,
+                                     U+2663 ISOpub -->
+<!ENTITY hearts   "&#9829;"> <!-- black heart suit = valentine,
+                                     U+2665 ISOpub -->
+<!ENTITY diams    "&#9830;"> <!-- black diamond suit, U+2666 ISOpub -->
diff --git a/basis/xml/errors/errors-tests.factor b/basis/xml/errors/errors-tests.factor
index e72e465f0d..426ef57736 100644
--- a/basis/xml/errors/errors-tests.factor
+++ b/basis/xml/errors/errors-tests.factor
@@ -6,22 +6,27 @@ IN: xml.errors.tests
     '[ _ string>xml ] swap '[ _ = ] must-fail-with ;
 
 T{ no-entity f 1 10 "nbsp" } "<x>&nbsp;</x>" xml-error-test
-T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" }
-} "<x></y>" xml-error-test
+T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } }
+    "<x></y>" xml-error-test
 T{ unclosed f 1 4 V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
 T{ nonexist-ns f 1 5 "x" } "<x:y/>" xml-error-test
 T{ unopened f 1 5 } "</x>" xml-error-test
-T{ not-yes/no f 1 41 "maybe" } "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
+T{ not-yes/no f 1 41 "maybe" }
+    "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
 T{ extra-attrs f 1 32 V{ T{ name f "" "foo" f } }
 } "<?xml version='1.1' foo='bar'?><x/>" xml-error-test
-T{ bad-version f 1 28 "5 million" } "<?xml version='5 million'?><x/>" xml-error-test
+T{ bad-version f 1 28 "5 million" }
+    "<?xml version='5 million'?><x/>" xml-error-test
 T{ notags f } "" xml-error-test
 T{ multitags } "<x/><y/>" xml-error-test
-T{ bad-prolog  f 1 26 T{ prolog f "1.0" "UTF-8" f }
-} "<x/><?xml version='1.0'?>" xml-error-test
+T{ bad-prolog  f 1 26 T{ prolog f "1.0" "UTF-8" f } }
+    "<x/><?xml version='1.0'?>" xml-error-test
 T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
-xml-error-test
+    xml-error-test
 T{ pre/post-content f "x" t } "x<y/>" xml-error-test
 T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
-T{ bad-instruction f 1 11 T{ instruction f "xsl" }
-} "<x><?xsl?></x>" xml-error-test
+T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
+    "<x><?xsl?></x>" xml-error-test
+T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
+T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
+T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
\ No newline at end of file
diff --git a/basis/xml/errors/errors.factor b/basis/xml/errors/errors.factor
index 0c039d526c..9d3d8a6bb0 100644
--- a/basis/xml/errors/errors.factor
+++ b/basis/xml/errors/errors.factor
@@ -32,17 +32,6 @@ M: no-entity summary ( obj -- str )
         "Entity does not exist: &" write thing>> write ";" print
     ] with-string-writer ;
 
-TUPLE: xml-string-error < parsing-error string ; ! this should not exist
-
-: xml-string-error ( string -- * )
-    \ xml-string-error parsing-error swap >>string throw ;
-
-M: xml-string-error summary ( obj -- str )
-    [
-        dup call-next-method write
-        string>> print
-    ] with-string-writer ;
-
 TUPLE: mismatched < parsing-error open close ;
 
 : mismatched ( open close -- * )
@@ -233,7 +222,34 @@ M: misplaced-directive summary ( obj -- str )
         dir>> write-xml-chunk nl
     ] with-string-writer ;
 
+TUPLE: bad-name < parsing-error name ;
+
+: bad-name ( name -- * )
+    \ bad-name parsing-error swap >>name throw ;
+
+M: bad-name summary ( obj -- str )
+    [ call-next-method ]
+    [ "Invalid name: " swap name>> "\n" 3append ]
+    bi append ;
+
+TUPLE: unclosed-quote < parsing-error ;
+
+: unclosed-quote ( -- * )
+    \ unclosed-quote parsing-error throw ;
+
+M: unclosed-quote summary
+    call-next-method
+    "XML document ends with quote still open\n" append ;
+
+TUPLE: quoteless-attr < parsing-error ;
+
+: quoteless-attr ( -- * )
+    \ quoteless-attr parsing-error throw ;
+
+M: quoteless-attr summary
+    call-next-method "Attribute lacks quotes around value\n" append ;
+
 UNION: xml-parse-error multitags notags extra-attrs nonexist-ns
-       not-yes/no unclosed mismatched xml-string-error expected no-entity
+       not-yes/no unclosed mismatched expected no-entity
        bad-prolog versionless-prolog capitalized-prolog bad-instruction
-       bad-directive ;
+       bad-directive bad-name unclosed-quote quoteless-attr ;
diff --git a/basis/xml/tests/ascii.xml b/basis/xml/tests/ascii.xml
new file mode 100644
index 0000000000..ca1c355c81
--- /dev/null
+++ b/basis/xml/tests/ascii.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='ASCII'?><x>e</x>
\ No newline at end of file
diff --git a/basis/xml/tests/encodings.factor b/basis/xml/tests/encodings.factor
new file mode 100644
index 0000000000..720b04ca42
--- /dev/null
+++ b/basis/xml/tests/encodings.factor
@@ -0,0 +1,14 @@
+USING: xml xml.data xml.utilities tools.test accessors kernel ;
+
+[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/spaces.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
+[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
+[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
+[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
\ No newline at end of file
diff --git a/basis/xml/tests/latin1.xml b/basis/xml/tests/latin1.xml
new file mode 100644
index 0000000000..f8bc6bcd61
--- /dev/null
+++ b/basis/xml/tests/latin1.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='ISO-8859-1'?><x>�</x>
\ No newline at end of file
diff --git a/basis/xml/tests/latin5.xml b/basis/xml/tests/latin5.xml
new file mode 100644
index 0000000000..afbcf09fc7
--- /dev/null
+++ b/basis/xml/tests/latin5.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='ISO-8859-9'?><x>�</x>
\ No newline at end of file
diff --git a/basis/xml/tests/prologless.xml b/basis/xml/tests/prologless.xml
new file mode 100644
index 0000000000..a60ed31cc6
--- /dev/null
+++ b/basis/xml/tests/prologless.xml
@@ -0,0 +1 @@
+<x>é</x>
\ No newline at end of file
diff --git a/basis/xml/tests/spaces.xml b/basis/xml/tests/spaces.xml
new file mode 100644
index 0000000000..dd194ff218
--- /dev/null
+++ b/basis/xml/tests/spaces.xml
@@ -0,0 +1,3 @@
+
+
+<x>é</x>
diff --git a/basis/xml/tests/test.factor b/basis/xml/tests/test.factor
index f37c3fa7ac..7a826756b6 100644
--- a/basis/xml/tests/test.factor
+++ b/basis/xml/tests/test.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 IN: xml.tests
 USING: kernel xml tools.test io namespaces make sequences
-xml.errors xml.entities parser strings xml.data io.files
+xml.errors xml.entities.html parser strings xml.data io.files
 xml.writer xml.utilities state-parser continuations assocs
 sequences.deep accessors io.streams.string ;
 
@@ -53,12 +53,15 @@ SYMBOL: xml-file
 [ "<foo>         bar            </foo>" string>xml pprint-xml>string ] unit-test
 [ "<!-- B+, B, or B--->" string>xml ] must-fail
 [ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
-[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk second ] unit-test
-[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk second ] unit-test
-[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk second ] unit-test
-[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk second ] unit-test
-[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk second ] unit-test
-[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk second ] unit-test
-[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk second ] unit-test
-[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo   SYSTEM \"blah.dtd\"   >" string>xml-chunk second ] unit-test
+[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
+[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
+[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
+[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
+[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
+[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
+[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
+[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo   SYSTEM \"blah.dtd\"   >" string>xml-chunk first ] unit-test
 [ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
+[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
+[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
+[ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test
\ No newline at end of file
diff --git a/basis/xml/tests/unitag.xml b/basis/xml/tests/unitag.xml
new file mode 100644
index 0000000000..b7ef6ade3c
--- /dev/null
+++ b/basis/xml/tests/unitag.xml
@@ -0,0 +1 @@
+<é>x</é>
\ No newline at end of file
diff --git a/basis/xml/tests/utf16.xml b/basis/xml/tests/utf16.xml
new file mode 100644
index 0000000000..d8775098a1
Binary files /dev/null and b/basis/xml/tests/utf16.xml differ
diff --git a/basis/xml/tests/utf16be-bom.xml b/basis/xml/tests/utf16be-bom.xml
new file mode 100644
index 0000000000..4a6f3e255c
Binary files /dev/null and b/basis/xml/tests/utf16be-bom.xml differ
diff --git a/basis/xml/tests/utf16be.xml b/basis/xml/tests/utf16be.xml
new file mode 100644
index 0000000000..c97bff7593
Binary files /dev/null and b/basis/xml/tests/utf16be.xml differ
diff --git a/basis/xml/tests/utf16le-bom.xml b/basis/xml/tests/utf16le-bom.xml
new file mode 100644
index 0000000000..ac7d8b8c70
Binary files /dev/null and b/basis/xml/tests/utf16le-bom.xml differ
diff --git a/basis/xml/tests/utf16le.xml b/basis/xml/tests/utf16le.xml
new file mode 100644
index 0000000000..5a0c7d9551
Binary files /dev/null and b/basis/xml/tests/utf16le.xml differ
diff --git a/basis/xml/tests/utf8-bom.xml b/basis/xml/tests/utf8-bom.xml
new file mode 100644
index 0000000000..5486916f73
--- /dev/null
+++ b/basis/xml/tests/utf8-bom.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='UTF-8'?><x/>
\ No newline at end of file
diff --git a/basis/xml/tests/utf8.xml b/basis/xml/tests/utf8.xml
new file mode 100644
index 0000000000..83b3e2d501
--- /dev/null
+++ b/basis/xml/tests/utf8.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='UTF-8'?><x>é</x>
\ No newline at end of file
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor
index 0c475c108d..a2ae9c4d58 100644
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -1,9 +1,10 @@
 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: xml.errors xml.data xml.utilities xml.char-classes sets
-xml.entities kernel state-parser kernel namespaces make strings
-math math.parser sequences assocs arrays splitting combinators
-unicode.case accessors fry ascii ;
+USING: accessors arrays ascii assocs combinators
+combinators.short-circuit fry io.encodings io.encodings.iana
+io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
+math math.parser namespaces sequences sets splitting state-parser
+strings xml.char-classes xml.data xml.entities xml.errors hashtables ;
 IN: xml.tokenize
 
 ! XML namespace processing: ns = namespace
@@ -53,34 +54,37 @@ SYMBOL: ns-stack
 
 ! version=1.0? is calculated once and passed around for efficiency
 
-: (parse-name) ( -- str )
-    version=1.0? dup
-    get-char name-start? [
-        [ dup get-char name-char? not ] take-until nip
-    ] [
-        "Malformed name" xml-string-error
-    ] if ;
+: assure-name ( str version=1.0? -- str )
+    over {
+        [ first name-start? ]
+        [ rest-slice [ name-char? ] with all? ]
+    } 2&& [ bad-name ] unless ;
+
+: (parse-name) ( start -- str )
+    version=1.0?
+    [ [ get-char name-char? not ] curry take-until append ]
+    [ assure-name ] bi ;
+
+: parse-name-starting ( start -- name )
+    (parse-name) get-char CHAR: : =
+    [ next "" (parse-name) ] [ "" swap ] if f <name> ;
 
 : parse-name ( -- name )
-    (parse-name) get-char CHAR: : =
-    [ next (parse-name) ] [ "" swap ] if f <name> ;
+    "" parse-name-starting ;
 
 !   -- Parsing strings
 
-: (parse-entity) ( string -- )
+: parse-named-entity ( string -- )
     dup entities at [ , ] [ 
-        prolog-data get standalone>>
-        [ no-entity ] [
-            dup extra-entities get at
-            [ , ] [ no-entity ] ?if
-        ] if
+        dup extra-entities get at
+        [ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish
     ] ?if ;
 
 : parse-entity ( -- )
     next CHAR: ; take-char next
     "#" ?head [
         "x" ?head 16 10 ? base> ,
-    ] [ (parse-entity) ] if ;
+    ] [ parse-named-entity ] if ;
 
 : (parse-char) ( ch -- )
     get-char {
@@ -93,13 +97,9 @@ SYMBOL: ns-stack
 : parse-char ( ch -- string )
     [ (parse-char) ] "" make ;
 
-: parse-quot ( ch -- string )
-    parse-char get-char
-    [ "XML file ends in a quote" xml-string-error ] unless ;
-
 : parse-text ( -- string )
     CHAR: < parse-char ;
-
+                                   
 ! Parsing tags
 
 : start-tag ( -- name ? )
@@ -107,17 +107,18 @@ SYMBOL: ns-stack
     get-char CHAR: / = dup [ next ] when
     parse-name swap ;
 
-: parse-attr-value ( -- seq )
-    get-char dup "'\"" member? [
-        next parse-quot
-    ] [
-        "Attribute lacks quote" xml-string-error
-    ] if ;
+: (parse-quote) ( ch -- string )
+    parse-char get-char
+    [ unclosed-quote ] unless ;
+
+: parse-quote ( -- seq )
+    pass-blank get-char dup "'\"" member?
+    [ next (parse-quote) ] [ quoteless-attr ] if ;
 
 : parse-attr ( -- )
-    [ parse-name ] with-scope
-    pass-blank CHAR: = expect pass-blank
-    [ parse-attr-value ] with-scope
+    parse-name
+    pass-blank CHAR: = expect
+    parse-quote
     2array , ;
 
 : (middle-tag) ( -- )
@@ -153,7 +154,7 @@ SYMBOL: ns-stack
 : only-blanks ( str -- )
     [ blank? ] all? [ bad-doctype-decl ] unless ;
 
-: take-system-literal ( -- str )
+: take-system-literal ( -- str ) ! replace with parse-quote?
     pass-blank get-char next {
         { CHAR: ' [ "'" take-string ] }
         { CHAR: " [ "\"" take-string ] }
@@ -207,15 +208,18 @@ DEFER: direct
 
 : take-entity-def ( -- entity-name entity-def )
     " " take-string pass-blank get-char {
-        { CHAR: ' [ take-system-literal ] }
-        { CHAR: " [ take-system-literal ] }
+        { CHAR: ' [ parse-quote ] }
+        { CHAR: " [ parse-quote ] }
         [ drop take-external-id ]
     } case ;
 
+: associate-entity ( entity-name entity-def -- )
+    swap extra-entities [ ?set-at ] change ;
+
 : take-entity-decl ( -- entity-decl )
     pass-blank get-char {
         { CHAR: % [ next pass-blank take-entity-def ] }
-        [ drop take-entity-def ]
+        [ drop take-entity-def 2dup associate-entity ]
     } case
     ">" take-string only-blanks <entity-decl> ;
 
@@ -253,22 +257,36 @@ DEFER: direct
 : good-version ( version -- version )
     dup { "1.0" "1.1" } member? [ bad-version ] unless ;
 
-: prolog-attrs ( alist -- prolog )
-    [ T{ name f "" "version" f } swap at
-      [ good-version ] [ versionless-prolog ] if* ] keep
-    [ T{ name f "" "encoding" f } swap at
-      "UTF-8" or ] keep
+: prolog-version ( alist -- version )
+    T{ name f "" "version" f } swap at
+    [ good-version ] [ versionless-prolog ] if* ;
+
+: prolog-encoding ( alist -- encoding )
+    T{ name f "" "encoding" f } swap at "UTF-8" or ;
+
+: prolog-standalone ( alist -- version )
     T{ name f "" "standalone" f } swap at
-    [ yes/no>bool ] [ f ] if*
-    <prolog> ;
+    [ yes/no>bool ] [ f ] if* ;
+
+: prolog-attrs ( alist -- prolog )
+    [ prolog-version ]
+    [ prolog-encoding ]
+    [ prolog-standalone ]
+    tri <prolog> ;
+
+SYMBOL: string-input?
+: decode-input-if ( encoding -- )
+    string-input? get [ drop ] [ decode-input ] if ;
 
 : parse-prolog ( -- prolog )
     pass-blank middle-tag "?>" expect-string
     dup assure-no-extra prolog-attrs
+    dup encoding>> dup "UTF-16" =
+    [ drop ] [ name>encoding [ decode-input-if ] when* ] if
     dup prolog-data set ;
 
 : instruct ( -- instruction )
-    (parse-name) dup "xml" =
+    "" (parse-name) dup "xml" =
     [ drop parse-prolog ] [
         dup >lower "xml" =
         [ capitalized-prolog ]
@@ -278,10 +296,69 @@ DEFER: direct
 : make-tag ( -- tag )
     {
         { [ get-char dup CHAR: ! = ] [ drop next direct ] }
-        { [ CHAR: ? = ] [ next instruct ] } 
+        { [ CHAR: ? = ] [ next instruct ] }
         [
             start-tag [ dup add-ns pop-ns <closer> ]
             [ middle-tag end-tag ] if
             CHAR: > expect
         ]
     } cond ;
+
+! Autodetecting encodings
+
+: continue-make-tag ( str -- tag )
+    parse-name-starting middle-tag end-tag CHAR: > expect ;
+
+: start-utf16le ( -- tag )
+    utf16le decode-input-if
+    CHAR: ? expect
+    0 expect instruct ;
+
+: 10xxxxxx? ( ch -- ? )
+    -6 shift 3 bitand 2 = ;
+          
+: start<name ( ch -- tag )
+    ascii?
+    [ utf8 decode-input-if next make-tag ] [
+        next
+        [ get-next 10xxxxxx? not ] take-until
+        get-char suffix utf8 decode
+        utf8 decode-input-if next
+        continue-make-tag
+    ] if ;
+          
+: start< ( -- tag )
+    get-next {
+        { 0 [ next next start-utf16le ] }
+        { CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
+        { CHAR: ! [ utf8 decode-input next next direct ] }
+        [ start<name ]
+    } case ;
+
+: skip-utf8-bom ( -- tag )
+    "\u0000bb\u0000bf" expect utf8 decode-input
+    CHAR: < expect make-tag ;
+
+: decode-expecting ( encoding string -- tag )
+    [ decode-input-if next ] [ expect-string ] bi* make-tag ;
+
+: start-utf16be ( -- tag )
+    utf16be "<" decode-expecting ;
+
+: skip-utf16le-bom ( -- tag )
+    utf16le "\u0000fe<" decode-expecting ;
+
+: skip-utf16be-bom ( -- tag )
+    utf16be "\u0000ff<" decode-expecting ;
+
+: start-document ( -- tag )
+    get-char {
+        { CHAR: < [ start< ] }
+        { 0 [ start-utf16be ] }
+        { HEX: EF [ skip-utf8-bom ] }
+        { HEX: FF [ skip-utf16le-bom ] }
+        { HEX: FE [ skip-utf16be-bom ] }
+        { f [ "" ] }
+        [ drop utf8 decode-input-if f ]
+        ! Same problem as with <e`>, in the case of XML chunks?
+    } case ;
diff --git a/basis/xml/xml-docs.factor b/basis/xml/xml-docs.factor
index 16a817d628..e87c32d375 100644
--- a/basis/xml/xml-docs.factor
+++ b/basis/xml/xml-docs.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: help.markup help.syntax kernel xml.data xml.errors
 xml.writer state-parser xml.tokenize xml.utilities xml.entities
-strings sequences io ;
+strings sequences io xml.entities.html ;
 IN: xml
 
 HELP: string>xml
@@ -295,9 +295,6 @@ HELP: expected
 HELP: no-entity
 { $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link parsing-error } ". Contains one slot, thing, containing a string representing the entity." } ;
 
-HELP: xml-string-error
-{ $class-description "XML parsing error that delegates to " { $link parsing-error } " and represents an other, unspecified error, which is represented by the slot string, containing a string describing the error." } ;
-
 HELP: open-tag
 { $class-description "represents a tag that does have children, ie is not a contained tag" }
 { $notes "the constructor used for this class is simply " { $link <tag> } "." }
@@ -324,6 +321,15 @@ HELP: state-parse
 HELP: pre/post-content
 { $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;
 
+HELP: unclosed-quote
+{ $class-description "describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;
+
+HELP: bad-name
+{ $class-description "describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;
+
+HELP: quoteless-attr
+{ $class-description "describes the error where an attribute of an XML tag is missing quotes around a value." } ;
+
 HELP: entities
 { $description "a hash table from default XML entity names (like &amp; and &lt;) to the characters they represent. This is automatically included when parsing any XML document." }
 { $see-also html-entities } ;
@@ -444,6 +450,9 @@ ARTICLE: { "xml" "errors" } "XML parsing errors"
     { $subsection expected }
     { $subsection no-entity }
     { $subsection pre/post-content }
+    { $subsection unclosed-quote }
+    { $subsection bad-name }
+    { $subsection quoteless-attr }
     "Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information"
     $nl
     "Note that, in parsing an XML document, only the first error is reported." ;
@@ -456,7 +465,7 @@ ARTICLE: { "xml" "entities" } "XML entities"
     { $subsection with-html-entities } ;
 
 ARTICLE: "xml" "XML parser"
-"The " { $vocab-link "xml" } " vocabulary implements the XML 1.1 standard, converting strings of text into XML and vice versa."
+"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa."
     { $subsection { "xml" "reading" } }
     { $subsection { "xml" "writing" } }
     { $subsection { "xml" "classes" } }
diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor
index 8afcf7a33b..328a058a58 100644
--- a/basis/xml/xml.factor
+++ b/basis/xml/xml.factor
@@ -1,10 +1,9 @@
 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: io io.streams.string io.files kernel math namespaces
-prettyprint sequences arrays generic strings vectors
-xml.char-classes xml.data xml.errors xml.tokenize xml.writer
-xml.utilities state-parser assocs ascii io.encodings.utf8
-accessors xml.backend ;
+USING: accessors arrays io io.encodings.binary io.files
+io.streams.string kernel namespaces sequences state-parser strings
+xml.backend xml.data xml.errors xml.tokenize ascii
+xml.writer ;
 IN: xml
 
 !   -- Overall parser with data tree
@@ -23,7 +22,7 @@ GENERIC: process ( object -- )
 M: object process add-child ;
 
 M: prolog process
-    xml-stack get V{ { f V{ "" } } } =
+    xml-stack get V{ { f V{ } } } =
     [ bad-prolog ] unless drop ;
 
 M: instruction process
@@ -101,6 +100,7 @@ TUPLE: pull-xml scope ;
         text-now? on
     ] H{ } make-assoc
     pull-xml boa ;
+! pull-xml needs to call start-document somewhere
 
 : pull-event ( pull -- xml-event/f )
     scope>> [
@@ -133,11 +133,12 @@ TUPLE: pull-xml scope ;
 : sax ( stream quot: ( xml-elem -- ) -- )
     swap [
         reset-prolog init-ns-stack
-        prolog-data get call-under
+        start-document [ call-under ] when*
         sax-loop
     ] state-parse ; inline recursive
 
 : (read-xml) ( -- )
+    start-document [ process ] when*
     [ process ] sax-loop ; inline
 
 : (read-xml-chunk) ( stream -- prolog seq )
@@ -159,11 +160,12 @@ TUPLE: pull-xml scope ;
     <string-reader> read-xml ;
 
 : string>xml-chunk ( string -- xml )
-    <string-reader> read-xml-chunk ;
+    t string-input?
+    [ <string-reader> read-xml-chunk ] with-variable ;
 
 : file>xml ( filename -- xml )
     ! Autodetect encoding!
-    utf8 <file-reader> read-xml ;
+    binary <file-reader> read-xml ;
 
 : xml-reprint ( string -- )
     string>xml print-xml ;
diff --git a/extra/fuel/fuel.factor b/extra/fuel/fuel.factor
index 46d6ba12c7..f52a34ff28 100644
--- a/extra/fuel/fuel.factor
+++ b/extra/fuel/fuel.factor
@@ -141,7 +141,7 @@ PRIVATE>
 
 : fuel-get-article ( name -- ) article fuel-eval-set-result ;
 
-MEMO: fuel-get-article-title ( name -- )
+: fuel-get-article-title ( name -- )
     articles get at [ article-title ] [ f ] if* fuel-eval-set-result ;
 
 : fuel-word-help ( name -- ) (fuel-word-help) fuel-eval-set-result ;
diff --git a/misc/fuel/README b/misc/fuel/README
index eb280d796c..706191aaa3 100644
--- a/misc/fuel/README
+++ b/misc/fuel/README
@@ -18,16 +18,11 @@ beast.
 
   (load-file "<path/to/factor/installation>/misc/fuel/fu.el")
 
-  or
-
-  (add-to-list load-path "<path/to/factor/installation>/fuel")
-  (require 'fuel)
-
   If all you want is a major mode for editing Factor code with pretty
   font colors and indentation, without running the factor listener
   inside Emacs, you can use instead:
 
-  (add-to-list load-path "<path/to/factor/installation>/fuel")
+  (add-to-list 'load-path "<path/to/factor/installation>/fuel")
   (setq factor-mode-use-fuel nil)
   (require 'factor-mode)
 
diff --git a/misc/fuel/fuel-eval.el b/misc/fuel/fuel-eval.el
index 543d23bd3f..4c34ef17b8 100644
--- a/misc/fuel/fuel-eval.el
+++ b/misc/fuel/fuel-eval.el
@@ -31,6 +31,7 @@
         ((listp sexp)
          (case (car sexp)
            (:array (factor--seq 'V{ '} (cdr sexp)))
+           (:seq (factor--seq '{ '} (cdr sexp)))
            (:quote (format "\\ %s" (factor `(:factor ,(cadr sexp)))))
            (:quotation (factor--seq '\[ '\] (cdr sexp)))
            (:using (factor `(USING: ,@(cdr sexp) :end)))
diff --git a/misc/fuel/fuel-font-lock.el b/misc/fuel/fuel-font-lock.el
index 99a7c7b8fb..10561ded68 100644
--- a/misc/fuel/fuel-font-lock.el
+++ b/misc/fuel/fuel-font-lock.el
@@ -72,11 +72,21 @@
 
 ;;; Font lock:
 
+(defun fuel-font-lock--syntactic-face (state)
+  (cond ((nth 3 state) 'factor-font-lock-string)
+        ((char-equal (char-after (nth 8 state)) ?\ )
+         (save-excursion
+           (goto-char (nth 8 state))
+           (beginning-of-line)
+           (cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name)
+                 ((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ")
+                  'factor-font-lock-symbol)
+                 (t 'default))))
+        (t 'factor-font-lock-comment)))
+
 (defconst fuel-font-lock--font-lock-keywords
   `((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect)
-    (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)
     (,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word)
-    ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
     (,fuel-syntax--vocab-ref-regexp  2 'factor-font-lock-vocabulary-name)
     (,fuel-syntax--declaration-words-regex . 'factor-font-lock-declaration)
     (,fuel-syntax--word-definition-regex 2 'factor-font-lock-word)
@@ -89,24 +99,26 @@
     (,fuel-syntax--type-definition-regex 2 'factor-font-lock-type-name)
     (,fuel-syntax--method-definition-regex (1 'factor-font-lock-type-name)
                                            (2 'factor-font-lock-word))
-    (,fuel-syntax--parent-type-regex 2 'factor-font-lock-type-name)
+    (,fuel-syntax--tuple-decl-regex 2 'factor-font-lock-type-name)
     (,fuel-syntax--constructor-regex . 'factor-font-lock-constructor)
     (,fuel-syntax--setter-regex . 'factor-font-lock-setter-word)
     (,fuel-syntax--getter-regex . 'factor-font-lock-getter-word)
     (,fuel-syntax--symbol-definition-regex 2 'factor-font-lock-symbol)
-    (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)))
+    (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)
+    ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
+    (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)))
 
 (defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax)
   (set (make-local-variable 'comment-start) "! ")
   (set (make-local-variable 'parse-sexp-lookup-properties) t)
-  (set (make-local-variable 'font-lock-comment-face) 'factor-font-lock-comment)
-  (set (make-local-variable 'font-lock-string-face) 'factor-font-lock-string)
   (set (make-local-variable 'font-lock-defaults)
        `(,(or keywords 'fuel-font-lock--font-lock-keywords)
          nil nil nil nil
          ,@(if no-syntax nil
              (list (cons 'font-lock-syntactic-keywords
-                         fuel-syntax--syntactic-keywords))))))
+                         fuel-syntax--syntactic-keywords)
+                   (cons 'font-lock-syntactic-face-function
+                         'fuel-font-lock--syntactic-face))))))
 
 
 ;;; Fontify strings as Factor code:
diff --git a/misc/fuel/fuel-help.el b/misc/fuel/fuel-help.el
index 705d1469a2..a82de388da 100644
--- a/misc/fuel/fuel-help.el
+++ b/misc/fuel/fuel-help.el
@@ -137,7 +137,8 @@
 
 (defun fuel-help--get-article (name label)
   (message "Retrieving article ...")
-  (let* ((cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
+  (let* ((name (if (listp name) (cons :seq name) name))
+         (cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
          (ret (fuel-eval--send/wait cmd))
          (res (fuel-eval--retort-result ret)))
     (if (not res)
diff --git a/misc/fuel/fuel-listener.el b/misc/fuel/fuel-listener.el
index 66034225f1..aa9f05ab29 100644
--- a/misc/fuel/fuel-listener.el
+++ b/misc/fuel/fuel-listener.el
@@ -31,7 +31,12 @@
   :group 'fuel)
 
 (defcustom fuel-listener-factor-binary
-  (expand-file-name "factor" fuel-factor-root-dir)
+  (expand-file-name (cond ((eq system-type 'windows-nt)
+                           "factor.exe")
+                          ((eq system-type 'darwin)
+                           "Factor.app/Contents/MacOS/factor")
+                          (t "factor"))
+                    fuel-factor-root-dir)
   "Full path to the factor executable to use when starting a listener."
   :type '(file :must-match t)
   :group 'fuel-listener)
@@ -132,8 +137,7 @@ buffer."
 
 (defun fuel-listener--setup-completion ()
   (setq fuel-syntax--current-vocab-function 'fuel-listener--current-vocab)
-  (setq fuel-syntax--usings-function 'fuel-listener--usings)
-  (set-syntax-table fuel-syntax--syntax-table))
+  (setq fuel-syntax--usings-function 'fuel-listener--usings))
 
 
 ;;; Stack mode support
@@ -160,7 +164,6 @@ buffer."
   (set (make-local-variable 'comint-prompt-regexp) fuel-con--prompt-regex)
   (set (make-local-variable 'comint-use-prompt-regexp) t)
   (set (make-local-variable 'comint-prompt-read-only) t)
-  (set-syntax-table fuel-syntax--syntax-table)
   (fuel-listener--setup-completion)
   (fuel-listener--setup-stack-mode))
 
diff --git a/misc/fuel/fuel-markup.el b/misc/fuel/fuel-markup.el
index 696e4ff080..6a374cd5c8 100644
--- a/misc/fuel/fuel-markup.el
+++ b/misc/fuel/fuel-markup.el
@@ -61,7 +61,7 @@
 
 (defun fuel-markup--insert-button (label link type)
   (let ((label (format "%s" label))
-        (link (format "%s" link)))
+        (link (if (listp link) link (format "%s" link))))
     (insert-text-button label
                         :type 'fuel-markup--button
                         'markup-link link
@@ -70,8 +70,9 @@
                         'help-echo (format "%s (%s)" label type))))
 
 (defun fuel-markup--article-title (name)
-  (fuel-eval--retort-result
-   (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel"))))
+  (let ((name (if (listp name) (cons :seq name) name)))
+    (fuel-eval--retort-result
+     (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel")))))
 
 (defun fuel-markup--link-at-point ()
   (let ((button (condition-case nil (forward-button 0) (error nil))))
diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el
index b74b0afc11..4112e3507d 100644
--- a/misc/fuel/fuel-syntax.el
+++ b/misc/fuel/fuel-syntax.el
@@ -79,7 +79,7 @@
   (regexp-opt fuel-syntax--declaration-words 'words))
 
 (defsubst fuel-syntax--second-word-regex (prefixes)
-  (format "^%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
+  (format "%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
 
 (defconst fuel-syntax--method-definition-regex
   "^M: +\\([^ ]+\\) +\\([^ ]+\\)")
@@ -87,14 +87,22 @@
 (defconst fuel-syntax--integer-regex
   "\\_<-?[0-9]+\\_>")
 
-(defconst fuel-syntax--ratio-regex
-  "\\_<-?\\([0-9]+\\+\\)?[0-9]+/-?[0-9]+\\_>")
+(defconst fuel-syntax--raw-float-regex
+  "[0-9]*\\.[0-9]*\\([eE][+-]?[0-9]+\\)?")
 
 (defconst fuel-syntax--float-regex
-  "\\_<-?[0-9]+\\.[0-9]*\\([eE][+-]?[0-9]+\\)?\\_>")
+  (format "\\_<-?%s\\_>" fuel-syntax--raw-float-regex))
+
+(defconst fuel-syntax--number-regex
+  (format "\\([0-9]+\\|%s\\)" fuel-syntax--raw-float-regex))
+
+(defconst fuel-syntax--ratio-regex
+  (format "\\_<[+-]?%s/-?%s\\_>"
+          fuel-syntax--number-regex
+          fuel-syntax--number-regex))
 
 (defconst fuel-syntax--bad-string-regex
-  "\"\\([^\"]\\|\\\\\"\\)*\n")
+  "\\_<\"[^>]\\([^\"\n]\\|\\\\\"\\)*\n")
 
 (defconst fuel-syntax--word-definition-regex
   (fuel-syntax--second-word-regex
@@ -114,8 +122,8 @@
 (defconst fuel-syntax--type-definition-regex
   (fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:")))
 
-(defconst fuel-syntax--parent-type-regex
-  "^\\(TUPLE\\|PREDICTE\\): +[^ ]+ +< +\\([^ ]+\\)")
+(defconst fuel-syntax--tuple-decl-regex
+  "^TUPLE: +\\([^ \n]+\\) +< +\\([^ \n]+\\)\\_>")
 
 (defconst fuel-syntax--constructor-regex "<[^ >]+>")
 
@@ -125,7 +133,8 @@
 (defconst fuel-syntax--symbol-definition-regex
   (fuel-syntax--second-word-regex '("SYMBOL:" "VAR:")))
 
-(defconst fuel-syntax--stack-effect-regex " ( .* )")
+(defconst fuel-syntax--stack-effect-regex
+  "\\( ( .* )\\)\\|\\( (( .* ))\\)")
 
 (defconst fuel-syntax--using-lines-regex "^USING: +\\([^;]+\\);")
 
@@ -163,26 +172,26 @@
           fuel-syntax--declaration-words-regex))
 
 (defconst fuel-syntax--single-liner-regex
-  (format "^%s" (regexp-opt '("ABOUT:"
-                              "ARTICLE:"
-                              "ALIAS:"
-                              "CONSTANT:" "C:"
-                              "DEFER:"
-                              "FORGET:"
-                              "GENERIC:" "GENERIC#"
-                              "HELP:" "HEX:" "HOOK:"
-                              "IN:" "INSTANCE:"
-                              "MAIN:" "MATH:" "MIXIN:"
-                              "OCT:"
-                              "POSTPONE:" "PRIVATE>" "<PRIVATE"
-                              "QUALIFIED-WITH:" "QUALIFIED:"
-                              "RENAME:"
-                              "SINGLETON:" "SLOT:" "SYMBOL:"
-                              "USE:"
-                              "VAR:"))))
+  (regexp-opt '("ABOUT:"
+                "ARTICLE:"
+                "ALIAS:"
+                "CONSTANT:" "C:"
+                "DEFER:"
+                "FORGET:"
+                "GENERIC:" "GENERIC#"
+                "HELP:" "HEX:" "HOOK:"
+                "IN:" "INSTANCE:"
+                "MAIN:" "MATH:" "MIXIN:"
+                "OCT:"
+                "POSTPONE:" "PRIVATE>" "<PRIVATE"
+                "QUALIFIED-WITH:" "QUALIFIED:"
+                "RENAME:"
+                "SINGLETON:" "SLOT:" "SYMBOL:"
+                "USE:"
+                "VAR:")))
 
 (defconst fuel-syntax--begin-of-def-regex
-  (format "^USING: \\|\\(%s\\)\\|\\(%s .*\\)"
+  (format "^USING: \\|\\(%s\\)\\|\\(^%s .*\\)"
           fuel-syntax--definition-start-regex
           fuel-syntax--single-liner-regex))
 
@@ -190,7 +199,7 @@
   (format "^.*%s" fuel-syntax--definition-end-regex))
 
 (defconst fuel-syntax--end-of-def-regex
-  (format "\\(%s\\)\\|\\(%s .*\\)"
+  (format "\\(%s\\)\\|\\(^%s .*\\)"
           fuel-syntax--end-of-def-line-regex
           fuel-syntax--single-liner-regex))
 
@@ -220,13 +229,21 @@
     table))
 
 (defconst fuel-syntax--syntactic-keywords
-  `(;; Comments:
+  `(;; CHARs:
+    ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
+    ;; Comments:
     ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">"))
     ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">"))
-    ;; CHARs:
-    ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
     ;; Strings
-    ("\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)*\\(\"\\)" (1 "\"") (3 "\""))
+    ("\\_<\\(\"\\)\\([^\n\r\f\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\""))
+    ("\\_<<\\(\"\\)\\_>" (1 "\""))
+    ("\\_<\\(\"\\)>\\_>" (1 "\""))
+    ;; Multiline constructs
+    ("\\_<USING:\\( \\)\\(;\\)" (1 "<b") (2 ">b"))
+    ("\\_<USING:\\( \\)" (1 "<b"))
+    ("\\_<TUPLE: +\\w+? +< +\\w+? *\\( \\)" (1 "<b"))
+    ("\\_<\\(TUPLE\\|SYMBOLS\\|VARS\\): +\\w+? *\\( \\)\\([^<]\\|\\_>\\)" (2 "<b"))
+    ("\\(\n\\| \\);\\_>" (1 ">b"))
     ;; Let and lambda:
     ("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">"))
     ("\\(\\[\\)\\(let\\|wlet\\|let\\*\\)\\( \\|$\\)" (1 "(]"))