Going further towards conformance

2009-01-20 15:37:21 -06:00 · 2009-01-20 15:37:21 -06:00 · 67dd4ca4a4
parent 8f44f5e4b3
commit 67dd4ca4a4
7 changed files with 115 additions and 82 deletions
--- a/basis/xml/char-classes/char-classes.factor
+++ b/basis/xml/char-classes/char-classes.factor
@ -3,16 +3,16 @@
 USING: kernel sequences unicode.syntax math math.order combinators ;
 IN: xml.char-classes
-CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_ ;
+CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
 : 1.0name-start? ( char -- ? )
    dup 1.0name-start*? [ drop t ] 
    [ HEX: 2BB HEX: 2C1 between? ] if ;
-CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387 ;
+CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
-CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _ ;
+CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
-CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7 ;
+CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
 : name-start? ( 1.0? char -- ? )
    swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
--- a/basis/xml/data/data.factor
+++ b/basis/xml/data/data.factor
@ -17,10 +17,13 @@ C: <name> name
    [ [ main>> ] bi@ ?= ] 2tri and and ;
 : <simple-name> ( string -- name )
    "" swap f <name> ;
 : <null-name> ( string -- name )
    f swap f <name> ;
 : assure-name ( string/name -- name )
-    dup name? [ <simple-name> ] unless ;
+    dup name? [ <null-name> ] unless ;
 TUPLE: opener name attrs ;
 C: <opener> opener
@ -54,6 +57,9 @@ C: <public-id> public-id
 TUPLE: doctype-decl < directive name external-id internal-subset ;
 C: <doctype-decl> doctype-decl
 TUPLE: notation-decl < directive name id ;
 C: <notation-decl> notation-decl
 TUPLE: instruction text ;
 C: <instruction> instruction
--- a/basis/xml/errors/errors-tests.factor
+++ b/basis/xml/errors/errors-tests.factor
@ -25,11 +25,12 @@ T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
    xml-error-test
 T{ pre/post-content f "x" t } "x<y/>" xml-error-test
 T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
 T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
    "<x><?xsl?></x>" xml-error-test
 T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
 T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
 T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
 T{ attr-w/< f 1 11 } "<x value='<'/>" xml-error-test
 T{ text-w/]]> f 1 6 } "<x>]]></x>" xml-error-test
 T{ duplicate-attr f 1 21 T{ name { space "" } { main "this" } } V{ "a" "b" } } "<x this='a' this='b'/>" xml-error-test
 T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
 T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
 T{ pre/post-content f "&" t } "&32;<x/>" xml-error-test
--- a/basis/xml/errors/errors.factor
+++ b/basis/xml/errors/errors.factor
@ -170,18 +170,6 @@ M: versionless-prolog summary ( obj -- str )
        "XML prolog lacks a version declaration" print
    ] with-string-writer ;
 TUPLE: bad-instruction < parsing-error instruction ;
 : bad-instruction ( instruction -- * )
    \ bad-instruction parsing-error swap >>instruction throw ;
 M: bad-instruction summary ( obj -- str )
    [
        dup call-next-method write
        "Misplaced processor instruction:" print
        instruction>> write-xml-chunk nl
    ] with-string-writer ;
 TUPLE: bad-directive < parsing-error dir ;
 : bad-directive ( directive -- * )
@ -286,9 +274,17 @@ TUPLE: duplicate-attr < parsing-error key values ;
 M: duplicate-attr summary
    call-next-method "\nDuplicate attribute" append ;
 TUPLE: bad-cdata < parsing-error ;
 : bad-cdata ( -- * )
    \ bad-cdata parsing-error throw ;
 M: bad-cdata summary
    call-next-method "\nCDATA occurs before or after main tag" append ;
 UNION: xml-parse-error
    multitags notags extra-attrs nonexist-ns bad-decl
    not-yes/no unclosed mismatched expected no-entity
-    bad-prolog versionless-prolog capitalized-prolog bad-instruction
+    bad-prolog versionless-prolog capitalized-prolog
    bad-directive bad-name unclosed-quote quoteless-attr
    attr-w/< text-w/]]> duplicate-attr ;
--- a/basis/xml/tests/xmltest.factor
+++ b/basis/xml/tests/xmltest.factor
@ -1,12 +1,12 @@
 USING: accessors assocs combinators continuations fry generalizations
 io.pathnames kernel macros sequences stack-checker tools.test xml
-xml.utilities xml.writer ;
+xml.utilities xml.writer arrays ;
 IN: xml.tests.suite
-TUPLE: test id uri sections description type ;
+TUPLE: xml-test id uri sections description type ;
-: >test ( tag -- test )
+: >xml-test ( tag -- test )
-    test new swap {
+    xml-test new swap {
        [ "TYPE" swap at >>type ]
        [ "ID" swap at >>id ]
        [ "URI" swap at >>uri ]
@ -15,7 +15,7 @@ TUPLE: test id uri sections description type ;
    } cleave ;
 : parse-tests ( xml -- tests )
-    "TEST" tags-named [ >test ] map ;
+    "TEST" tags-named [ >xml-test ] map ;
 : base "resource:basis/xml/tests/xmltest/" ;
@ -32,11 +32,22 @@ MACRO: drop-input ( quot -- newquot )
 : well-formed? ( uri -- answer )
    [ file>xml ] fails? "not-wf" "valid" ? ;
-: run-test ( test -- )
+: test-quots ( test -- result quot )
    [ type>> '[ _ ] ]
-    [ '[ _ uri>> base swap append-path well-formed? ] ] bi
+    [ '[ _ uri>> base swap append-path well-formed? ] ] bi ;
    unit-test ;
-: run-tests ( -- )
+: xml-tests ( -- tests )
    base "xmltest.xml" append-path file>xml
-    parse-tests [ run-test ] each ;
+    parse-tests [ test-quots 2array ] map ;
 : run-xml-tests ( -- )
    xml-tests [ unit-test ] assoc-each ;
 : works? ( result quot -- ? )
    [ first ] [ call ] bi* = ;
 : partition-xml-tests ( -- successes failures )
    xml-tests [ first2 works? ] partition ;
 : failing-valids ( -- tests )
    partition-xml-tests nip [ second first ] map [ type>> "valid" = ] filter ;
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@ -13,6 +13,8 @@ IN: xml.tokenize
 ! A stack of hashtables
 SYMBOL: ns-stack
 SYMBOL: depth
 : attrs>ns ( attrs-alist -- hash )
    ! this should check to make sure URIs are valid
    [
@ -50,25 +52,37 @@ SYMBOL: ns-stack
 ! Parsing names
-! version=1.0? is calculated once and passed around for efficiency
+: valid-name? ( str -- ? )
    [ f ] [
        version=1.0? swap {
            [ first name-start? ]
            [ rest-slice [ name-char? ] with all? ]
        } 2&&
    ] if-empty ;
-: assure-name ( str version=1.0? -- str )
+: prefixed-name ( str -- name/f )
-    over {
+    ":" split dup length 2 = [
-        [ first name-start? ]
+        [ [ valid-name? ] all? ]
-        [ rest-slice [ name-char? ] with all? ]
+        [ first2 f <name> ] bi and
-    } 2&& [ bad-name ] unless ;
+    ] [ drop f ] if ;
-: (parse-name) ( start -- str )
+: interpret-name ( str -- name )
-    version=1.0?
+    dup prefixed-name [ ] [
-    [ [ get-char name-char? not ] curry take-until append ]
+        dup valid-name?
-    [ assure-name ] bi ;
+        [ <simple-name> ] [ bad-name ] if
    ] ?if ;
-: parse-name-starting ( start -- name )
+: take-name ( -- string )
-    (parse-name) get-char CHAR: : =
+    version=1.0? '[ _ get-char name-char? not ] take-until ;
    [ next "" (parse-name) ] [ "" swap ] if f <name> ;
 : parse-name ( -- name )
-    "" parse-name-starting ;
+    take-name interpret-name ;
 : parse-name-starting ( string -- name )
    take-name append interpret-name ;
 : parse-simple-name ( -- name )
    take-name <simple-name> ;
 !   -- Parsing strings
@ -99,11 +113,15 @@ SYMBOL: ns-stack
 : assure-no-]]> ( circular -- )
    "]]>" sequence= [ text-w/]]> ] when ;
-: parse-text ( -- string )
+:: parse-text ( -- string )
-    3 f <array> <circular> '[
+    3 f <array> <circular> :> circ
-        _ [ push-circular ]
+    depth get zero? :> no-text [| char |
-        [ nip assure-no-]]> ]
+        char circ push-circular
-        [ drop CHAR: < = ] 2tri
+        circ assure-no-]]>
        no-text [ char blank? char CHAR: < = or [
            char 1string t pre/post-content
        ] unless ] when
        char CHAR: < =
    ] parse-char ;
 ! Parsing tags
@ -131,7 +149,7 @@ SYMBOL: ns-stack
    [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
 : parse-attr ( -- )
-    parse-name CHAR: = expect
+    parse-name pass-blank CHAR: = expect pass-blank
    t parse-quote* normalize-quot 2array , ;
 : (middle-tag) ( -- )
@ -148,9 +166,13 @@ SYMBOL: ns-stack
    [ (middle-tag) ] f make pass-blank
    assure-no-duplicates ;
 : close ( -- )
    pass-blank CHAR: > expect ;
 : end-tag ( name attrs-alist -- tag )
    tag-ns pass-blank get-char CHAR: / =
-    [ pop-ns <contained> next ] [ <opener> ] if ;
+    [ pop-ns <contained> next CHAR: > expect ]
    [ depth inc <opener> close ] if ;
 : take-comment ( -- comment )
    "--" expect-string
@ -159,6 +181,7 @@ SYMBOL: ns-stack
    CHAR: > expect ;
 : take-cdata ( -- string )
    depth get zero? [ bad-cdata ] when
    "[CDATA[" expect-string "]]>" take-string ;
 : take-word ( -- string )
@ -173,19 +196,17 @@ SYMBOL: ns-stack
 : take-attlist-decl ( -- doctype-decl )
    take-decl-contents <attlist-decl> ;
 : take-notation-decl ( -- notation-decl )
    take-decl-contents <notation-decl> ; 
 : take-until-one-of ( seps -- str sep )
    '[ get-char _ member? ] take-until get-char ;
 : expect-> ( -- )
    pass-blank CHAR: > expect ;
 : take-system-id ( -- system-id )
-    parse-quote <system-id>
+    parse-quote <system-id> close ;
    expect-> ;
 : take-public-id ( -- public-id )
-    parse-quote parse-quote <public-id>
+    parse-quote parse-quote <public-id> close ;
    expect-> ;
 DEFER: direct
@ -216,7 +237,7 @@ DEFER: direct
        { CHAR: \s [
            pass-blank get-char CHAR: [ = [
                next take-internal-subset f swap
-                expect->
+                close
            ] [
                " >" take-until-one-of {
                    { CHAR: \s [ (take-external-id) ] }
@ -235,21 +256,22 @@ DEFER: direct
    } case ;
 : associate-entity ( entity-name entity-def -- )
-    swap extra-entities [ ?set-at ] change ;
+    swap extra-entities get set-at ;
 : take-entity-decl ( -- entity-decl )
    pass-blank get-char {
        { CHAR: % [ next pass-blank take-entity-def ] }
        [ drop take-entity-def 2dup associate-entity ]
    } case
-    expect-> <entity-decl> ;
+    close <entity-decl> ;
 : take-directive ( -- directive )
-    take-word {
+    take-name {
        { "ELEMENT" [ take-element-decl ] }
        { "ATTLIST" [ take-attlist-decl ] }
        { "DOCTYPE" [ take-doctype-decl ] }
        { "ENTITY" [ take-entity-decl ] }
        { "NOTATION" [ take-notation-decl ] }
        [ bad-directive ]
    } case ;
@ -307,28 +329,27 @@ SYMBOL: string-input?
    dup prolog-data set ;
 : instruct ( -- instruction )
-    "" (parse-name) dup "xml" =
+    take-name {
-    [ drop parse-prolog ] [
+        { [ dup "xml" = ] [ drop parse-prolog ] }
-        dup >lower "xml" =
+        { [ dup >lower "xml" = ] [ capitalized-prolog ] }
-        [ capitalized-prolog ]
+        { [ dup valid-name? not ] [ bad-name ] }
-        [ "?>" take-string append <instruction> ] if
+        [ "?>" take-string append <instruction> ]
-    ] if ;
+    } cond ;
 : make-tag ( -- tag )
    {
        { [ get-char dup CHAR: ! = ] [ drop next direct ] }
        { [ CHAR: ? = ] [ next instruct ] }
        [
-            start-tag [ dup add-ns pop-ns <closer> ]
+            start-tag [ dup add-ns pop-ns <closer> depth dec close ]
            [ middle-tag end-tag ] if
            CHAR: > expect
        ]
    } cond ;
 ! Autodetecting encodings
 : continue-make-tag ( str -- tag )
-    parse-name-starting middle-tag end-tag CHAR: > expect ;
+    parse-name-starting middle-tag end-tag ;
 : start-utf16le ( -- tag )
    utf16le decode-input-if
--- a/basis/xml/xml.factor
+++ b/basis/xml/xml.factor
@ -2,8 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays io io.encodings.binary io.files
 io.streams.string kernel namespaces sequences xml.state-parser strings
-xml.backend xml.data xml.errors xml.tokenize ascii
+xml.backend xml.data xml.errors xml.tokenize ascii xml.entities
-xml.writer ;
+xml.writer assocs ;
 IN: xml
 !   -- Overall parser with data tree
@ -25,11 +25,6 @@ M: prolog process
    xml-stack get V{ { f V{ } } } =
    [ bad-prolog ] unless drop ;
 M: instruction process
    xml-stack get length 1 =
    [ bad-instruction ] unless
    add-child ;
 M: directive process
    xml-stack get dup length 1 =
    swap first second [ tag? ] contains? not and
@ -53,7 +48,9 @@ M: closer process
    <tag> add-child ;
 : init-xml-stack ( -- )
-    V{ } clone xml-stack set f push-xml ;
+    V{ } clone xml-stack set
    extra-entities [ H{ } assoc-like ] change
    f push-xml ;
 : default-prolog ( -- prolog )
    "1.0" "UTF-8" f <prolog> ;
@ -150,11 +147,12 @@ TUPLE: pull-xml scope ;
    ] state-parse ;
 : read-xml ( stream -- xml )
-    #! Produces a tree of XML nodes
+    0 depth
-    (read-xml-chunk) make-xml-doc ;
+    [ (read-xml-chunk) make-xml-doc ] with-variable ;
 : read-xml-chunk ( stream -- seq )
-    (read-xml-chunk) nip ;
+    1 depth
    [ (read-xml-chunk) nip ] with-variable ;
 : string>xml ( string -- xml )
    <string-reader> read-xml ;