diff --git a/basis/xml/char-classes/char-classes.factor b/basis/xml/char-classes/char-classes.factor
index 6cba618682..03e85e3ea3 100644
--- a/basis/xml/char-classes/char-classes.factor
+++ b/basis/xml/char-classes/char-classes.factor
@@ -3,16 +3,16 @@
 USING: kernel sequences unicode.syntax math math.order combinators ;
 IN: xml.char-classes
 
-CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_ ;
+CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
 : 1.0name-start? ( char -- ? )
     dup 1.0name-start*? [ drop t ] 
     [ HEX: 2BB HEX: 2C1 between? ] if ;
 
-CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387 ;
+CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
 
-CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _ ;
+CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
 
-CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7 ;
+CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
 
 : name-start? ( 1.0? char -- ? )
     swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
diff --git a/basis/xml/data/data.factor b/basis/xml/data/data.factor
index bf4e2047a7..8e6ff4bf09 100644
--- a/basis/xml/data/data.factor
+++ b/basis/xml/data/data.factor
@@ -17,10 +17,13 @@ C: <name> name
     [ [ main>> ] bi@ ?= ] 2tri and and ;
 
 : <simple-name> ( string -- name )
+    "" swap f <name> ;
+
+: <null-name> ( string -- name )
     f swap f <name> ;
 
 : assure-name ( string/name -- name )
-    dup name? [ <simple-name> ] unless ;
+    dup name? [ <null-name> ] unless ;
 
 TUPLE: opener name attrs ;
 C: <opener> opener
@@ -54,6 +57,9 @@ C: <public-id> public-id
 TUPLE: doctype-decl < directive name external-id internal-subset ;
 C: <doctype-decl> doctype-decl
 
+TUPLE: notation-decl < directive name id ;
+C: <notation-decl> notation-decl
+
 TUPLE: instruction text ;
 C: <instruction> instruction
 
diff --git a/basis/xml/errors/errors-tests.factor b/basis/xml/errors/errors-tests.factor
index 25ec54814f..72d928568b 100644
--- a/basis/xml/errors/errors-tests.factor
+++ b/basis/xml/errors/errors-tests.factor
@@ -25,11 +25,12 @@ T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
     xml-error-test
 T{ pre/post-content f "x" t } "x<y/>" xml-error-test
 T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
-T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
-    "<x><?xsl?></x>" xml-error-test
 T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
 T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
 T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
 T{ attr-w/< f 1 11 } "<x value='<'/>" xml-error-test
 T{ text-w/]]> f 1 6 } "<x>]]></x>" xml-error-test
 T{ duplicate-attr f 1 21 T{ name { space "" } { main "this" } } V{ "a" "b" } } "<x this='a' this='b'/>" xml-error-test
+T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
+T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
+T{ pre/post-content f "&" t } "&32;<x/>" xml-error-test
diff --git a/basis/xml/errors/errors.factor b/basis/xml/errors/errors.factor
index 83b48f1da1..0be9ade1cf 100644
--- a/basis/xml/errors/errors.factor
+++ b/basis/xml/errors/errors.factor
@@ -170,18 +170,6 @@ M: versionless-prolog summary ( obj -- str )
         "XML prolog lacks a version declaration" print
     ] with-string-writer ;
 
-TUPLE: bad-instruction < parsing-error instruction ;
-
-: bad-instruction ( instruction -- * )
-    \ bad-instruction parsing-error swap >>instruction throw ;
-
-M: bad-instruction summary ( obj -- str )
-    [
-        dup call-next-method write
-        "Misplaced processor instruction:" print
-        instruction>> write-xml-chunk nl
-    ] with-string-writer ;
-
 TUPLE: bad-directive < parsing-error dir ;
 
 : bad-directive ( directive -- * )
@@ -286,9 +274,17 @@ TUPLE: duplicate-attr < parsing-error key values ;
 M: duplicate-attr summary
     call-next-method "\nDuplicate attribute" append ;
 
+TUPLE: bad-cdata < parsing-error ;
+
+: bad-cdata ( -- * )
+    \ bad-cdata parsing-error throw ;
+
+M: bad-cdata summary
+    call-next-method "\nCDATA occurs before or after main tag" append ;
+
 UNION: xml-parse-error
     multitags notags extra-attrs nonexist-ns bad-decl
     not-yes/no unclosed mismatched expected no-entity
-    bad-prolog versionless-prolog capitalized-prolog bad-instruction
+    bad-prolog versionless-prolog capitalized-prolog
     bad-directive bad-name unclosed-quote quoteless-attr
     attr-w/< text-w/]]> duplicate-attr ;
diff --git a/basis/xml/tests/xmltest.factor b/basis/xml/tests/xmltest.factor
index 4cae34a6dd..8caa5e8a75 100644
--- a/basis/xml/tests/xmltest.factor
+++ b/basis/xml/tests/xmltest.factor
@@ -1,12 +1,12 @@
 USING: accessors assocs combinators continuations fry generalizations
 io.pathnames kernel macros sequences stack-checker tools.test xml
-xml.utilities xml.writer ;
+xml.utilities xml.writer arrays ;
 IN: xml.tests.suite
 
-TUPLE: test id uri sections description type ;
+TUPLE: xml-test id uri sections description type ;
 
-: >test ( tag -- test )
-    test new swap {
+: >xml-test ( tag -- test )
+    xml-test new swap {
         [ "TYPE" swap at >>type ]
         [ "ID" swap at >>id ]
         [ "URI" swap at >>uri ]
@@ -15,7 +15,7 @@ TUPLE: test id uri sections description type ;
     } cleave ;
 
 : parse-tests ( xml -- tests )
-    "TEST" tags-named [ >test ] map ;
+    "TEST" tags-named [ >xml-test ] map ;
 
 : base "resource:basis/xml/tests/xmltest/" ;
 
@@ -32,11 +32,22 @@ MACRO: drop-input ( quot -- newquot )
 : well-formed? ( uri -- answer )
     [ file>xml ] fails? "not-wf" "valid" ? ;
 
-: run-test ( test -- )
+: test-quots ( test -- result quot )
     [ type>> '[ _ ] ]
-    [ '[ _ uri>> base swap append-path well-formed? ] ] bi
-    unit-test ;
+    [ '[ _ uri>> base swap append-path well-formed? ] ] bi ;
 
-: run-tests ( -- )
+: xml-tests ( -- tests )
     base "xmltest.xml" append-path file>xml
-    parse-tests [ run-test ] each ;
\ No newline at end of file
+    parse-tests [ test-quots 2array ] map ;
+
+: run-xml-tests ( -- )
+    xml-tests [ unit-test ] assoc-each ;
+
+: works? ( result quot -- ? )
+    [ first ] [ call ] bi* = ;
+
+: partition-xml-tests ( -- successes failures )
+    xml-tests [ first2 works? ] partition ;
+
+: failing-valids ( -- tests )
+    partition-xml-tests nip [ second first ] map [ type>> "valid" = ] filter ;
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor
index dc9f7ae719..2747959831 100644
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -13,6 +13,8 @@ IN: xml.tokenize
 ! A stack of hashtables
 SYMBOL: ns-stack
 
+SYMBOL: depth
+
 : attrs>ns ( attrs-alist -- hash )
     ! this should check to make sure URIs are valid
     [
@@ -50,25 +52,37 @@ SYMBOL: ns-stack
 
 ! Parsing names
 
-! version=1.0? is calculated once and passed around for efficiency
+: valid-name? ( str -- ? )
+    [ f ] [
+        version=1.0? swap {
+            [ first name-start? ]
+            [ rest-slice [ name-char? ] with all? ]
+        } 2&&
+    ] if-empty ;
 
-: assure-name ( str version=1.0? -- str )
-    over {
-        [ first name-start? ]
-        [ rest-slice [ name-char? ] with all? ]
-    } 2&& [ bad-name ] unless ;
+: prefixed-name ( str -- name/f )
+    ":" split dup length 2 = [
+        [ [ valid-name? ] all? ]
+        [ first2 f <name> ] bi and
+    ] [ drop f ] if ;
 
-: (parse-name) ( start -- str )
-    version=1.0?
-    [ [ get-char name-char? not ] curry take-until append ]
-    [ assure-name ] bi ;
+: interpret-name ( str -- name )
+    dup prefixed-name [ ] [
+        dup valid-name?
+        [ <simple-name> ] [ bad-name ] if
+    ] ?if ;
 
-: parse-name-starting ( start -- name )
-    (parse-name) get-char CHAR: : =
-    [ next "" (parse-name) ] [ "" swap ] if f <name> ;
+: take-name ( -- string )
+    version=1.0? '[ _ get-char name-char? not ] take-until ;
 
 : parse-name ( -- name )
-    "" parse-name-starting ;
+    take-name interpret-name ;
+
+: parse-name-starting ( string -- name )
+    take-name append interpret-name ;
+
+: parse-simple-name ( -- name )
+    take-name <simple-name> ;
 
 !   -- Parsing strings
 
@@ -99,11 +113,15 @@ SYMBOL: ns-stack
 : assure-no-]]> ( circular -- )
     "]]>" sequence= [ text-w/]]> ] when ;
 
-: parse-text ( -- string )
-    3 f <array> <circular> '[
-        _ [ push-circular ]
-        [ nip assure-no-]]> ]
-        [ drop CHAR: < = ] 2tri
+:: parse-text ( -- string )
+    3 f <array> <circular> :> circ
+    depth get zero? :> no-text [| char |
+        char circ push-circular
+        circ assure-no-]]>
+        no-text [ char blank? char CHAR: < = or [
+            char 1string t pre/post-content
+        ] unless ] when
+        char CHAR: < =
     ] parse-char ;
 
 ! Parsing tags
@@ -131,7 +149,7 @@ SYMBOL: ns-stack
     [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
 
 : parse-attr ( -- )
-    parse-name CHAR: = expect
+    parse-name pass-blank CHAR: = expect pass-blank
     t parse-quote* normalize-quot 2array , ;
 
 : (middle-tag) ( -- )
@@ -148,9 +166,13 @@ SYMBOL: ns-stack
     [ (middle-tag) ] f make pass-blank
     assure-no-duplicates ;
 
+: close ( -- )
+    pass-blank CHAR: > expect ;
+
 : end-tag ( name attrs-alist -- tag )
     tag-ns pass-blank get-char CHAR: / =
-    [ pop-ns <contained> next ] [ <opener> ] if ;
+    [ pop-ns <contained> next CHAR: > expect ]
+    [ depth inc <opener> close ] if ;
 
 : take-comment ( -- comment )
     "--" expect-string
@@ -159,6 +181,7 @@ SYMBOL: ns-stack
     CHAR: > expect ;
 
 : take-cdata ( -- string )
+    depth get zero? [ bad-cdata ] when
     "[CDATA[" expect-string "]]>" take-string ;
 
 : take-word ( -- string )
@@ -173,19 +196,17 @@ SYMBOL: ns-stack
 : take-attlist-decl ( -- doctype-decl )
     take-decl-contents <attlist-decl> ;
 
+: take-notation-decl ( -- notation-decl )
+    take-decl-contents <notation-decl> ; 
+
 : take-until-one-of ( seps -- str sep )
     '[ get-char _ member? ] take-until get-char ;
 
-: expect-> ( -- )
-    pass-blank CHAR: > expect ;
-
 : take-system-id ( -- system-id )
-    parse-quote <system-id>
-    expect-> ;
+    parse-quote <system-id> close ;
 
 : take-public-id ( -- public-id )
-    parse-quote parse-quote <public-id>
-    expect-> ;
+    parse-quote parse-quote <public-id> close ;
 
 DEFER: direct
 
@@ -216,7 +237,7 @@ DEFER: direct
         { CHAR: \s [
             pass-blank get-char CHAR: [ = [
                 next take-internal-subset f swap
-                expect->
+                close
             ] [
                 " >" take-until-one-of {
                     { CHAR: \s [ (take-external-id) ] }
@@ -235,21 +256,22 @@ DEFER: direct
     } case ;
 
 : associate-entity ( entity-name entity-def -- )
-    swap extra-entities [ ?set-at ] change ;
+    swap extra-entities get set-at ;
 
 : take-entity-decl ( -- entity-decl )
     pass-blank get-char {
         { CHAR: % [ next pass-blank take-entity-def ] }
         [ drop take-entity-def 2dup associate-entity ]
     } case
-    expect-> <entity-decl> ;
+    close <entity-decl> ;
 
 : take-directive ( -- directive )
-    take-word {
+    take-name {
         { "ELEMENT" [ take-element-decl ] }
         { "ATTLIST" [ take-attlist-decl ] }
         { "DOCTYPE" [ take-doctype-decl ] }
         { "ENTITY" [ take-entity-decl ] }
+        { "NOTATION" [ take-notation-decl ] }
         [ bad-directive ]
     } case ;
 
@@ -307,28 +329,27 @@ SYMBOL: string-input?
     dup prolog-data set ;
 
 : instruct ( -- instruction )
-    "" (parse-name) dup "xml" =
-    [ drop parse-prolog ] [
-        dup >lower "xml" =
-        [ capitalized-prolog ]
-        [ "?>" take-string append <instruction> ] if
-    ] if ;
+    take-name {
+        { [ dup "xml" = ] [ drop parse-prolog ] }
+        { [ dup >lower "xml" = ] [ capitalized-prolog ] }
+        { [ dup valid-name? not ] [ bad-name ] }
+        [ "?>" take-string append <instruction> ]
+    } cond ;
 
 : make-tag ( -- tag )
     {
         { [ get-char dup CHAR: ! = ] [ drop next direct ] }
         { [ CHAR: ? = ] [ next instruct ] }
         [
-            start-tag [ dup add-ns pop-ns <closer> ]
+            start-tag [ dup add-ns pop-ns <closer> depth dec close ]
             [ middle-tag end-tag ] if
-            CHAR: > expect
         ]
     } cond ;
 
 ! Autodetecting encodings
 
 : continue-make-tag ( str -- tag )
-    parse-name-starting middle-tag end-tag CHAR: > expect ;
+    parse-name-starting middle-tag end-tag ;
 
 : start-utf16le ( -- tag )
     utf16le decode-input-if
diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor
index 53d186dcd2..8ee35001a6 100644
--- a/basis/xml/xml.factor
+++ b/basis/xml/xml.factor
@@ -2,8 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays io io.encodings.binary io.files
 io.streams.string kernel namespaces sequences xml.state-parser strings
-xml.backend xml.data xml.errors xml.tokenize ascii
-xml.writer ;
+xml.backend xml.data xml.errors xml.tokenize ascii xml.entities
+xml.writer assocs ;
 IN: xml
 
 !   -- Overall parser with data tree
@@ -25,11 +25,6 @@ M: prolog process
     xml-stack get V{ { f V{ } } } =
     [ bad-prolog ] unless drop ;
 
-M: instruction process
-    xml-stack get length 1 =
-    [ bad-instruction ] unless
-    add-child ;
-
 M: directive process
     xml-stack get dup length 1 =
     swap first second [ tag? ] contains? not and
@@ -53,7 +48,9 @@ M: closer process
     <tag> add-child ;
 
 : init-xml-stack ( -- )
-    V{ } clone xml-stack set f push-xml ;
+    V{ } clone xml-stack set
+    extra-entities [ H{ } assoc-like ] change
+    f push-xml ;
 
 : default-prolog ( -- prolog )
     "1.0" "UTF-8" f <prolog> ;
@@ -150,11 +147,12 @@ TUPLE: pull-xml scope ;
     ] state-parse ;
 
 : read-xml ( stream -- xml )
-    #! Produces a tree of XML nodes
-    (read-xml-chunk) make-xml-doc ;
+    0 depth
+    [ (read-xml-chunk) make-xml-doc ] with-variable ;
 
 : read-xml-chunk ( stream -- seq )
-    (read-xml-chunk) nip ;
+    1 depth
+    [ (read-xml-chunk) nip ] with-variable ;
 
 : string>xml ( string -- xml )
     <string-reader> read-xml ;