Going further towards conformance
parent
8f44f5e4b3
commit
67dd4ca4a4
|
@ -3,16 +3,16 @@
|
|||
USING: kernel sequences unicode.syntax math math.order combinators ;
|
||||
IN: xml.char-classes
|
||||
|
||||
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_ ;
|
||||
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
|
||||
: 1.0name-start? ( char -- ? )
|
||||
dup 1.0name-start*? [ drop t ]
|
||||
[ HEX: 2BB HEX: 2C1 between? ] if ;
|
||||
|
||||
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387 ;
|
||||
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
|
||||
|
||||
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _ ;
|
||||
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
|
||||
|
||||
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7 ;
|
||||
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
|
||||
|
||||
: name-start? ( 1.0? char -- ? )
|
||||
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
|
||||
|
|
|
@ -17,10 +17,13 @@ C: <name> name
|
|||
[ [ main>> ] bi@ ?= ] 2tri and and ;
|
||||
|
||||
: <simple-name> ( string -- name )
|
||||
"" swap f <name> ;
|
||||
|
||||
: <null-name> ( string -- name )
|
||||
f swap f <name> ;
|
||||
|
||||
: assure-name ( string/name -- name )
|
||||
dup name? [ <simple-name> ] unless ;
|
||||
dup name? [ <null-name> ] unless ;
|
||||
|
||||
TUPLE: opener name attrs ;
|
||||
C: <opener> opener
|
||||
|
@ -54,6 +57,9 @@ C: <public-id> public-id
|
|||
TUPLE: doctype-decl < directive name external-id internal-subset ;
|
||||
C: <doctype-decl> doctype-decl
|
||||
|
||||
TUPLE: notation-decl < directive name id ;
|
||||
C: <notation-decl> notation-decl
|
||||
|
||||
TUPLE: instruction text ;
|
||||
C: <instruction> instruction
|
||||
|
||||
|
|
|
@ -25,11 +25,12 @@ T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
|
|||
xml-error-test
|
||||
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
|
||||
T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
|
||||
T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
|
||||
"<x><?xsl?></x>" xml-error-test
|
||||
T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
|
||||
T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
|
||||
T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
|
||||
T{ attr-w/< f 1 11 } "<x value='<'/>" xml-error-test
|
||||
T{ text-w/]]> f 1 6 } "<x>]]></x>" xml-error-test
|
||||
T{ duplicate-attr f 1 21 T{ name { space "" } { main "this" } } V{ "a" "b" } } "<x this='a' this='b'/>" xml-error-test
|
||||
T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
|
||||
T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
|
||||
T{ pre/post-content f "&" t } "&32;<x/>" xml-error-test
|
||||
|
|
|
@ -170,18 +170,6 @@ M: versionless-prolog summary ( obj -- str )
|
|||
"XML prolog lacks a version declaration" print
|
||||
] with-string-writer ;
|
||||
|
||||
TUPLE: bad-instruction < parsing-error instruction ;
|
||||
|
||||
: bad-instruction ( instruction -- * )
|
||||
\ bad-instruction parsing-error swap >>instruction throw ;
|
||||
|
||||
M: bad-instruction summary ( obj -- str )
|
||||
[
|
||||
dup call-next-method write
|
||||
"Misplaced processor instruction:" print
|
||||
instruction>> write-xml-chunk nl
|
||||
] with-string-writer ;
|
||||
|
||||
TUPLE: bad-directive < parsing-error dir ;
|
||||
|
||||
: bad-directive ( directive -- * )
|
||||
|
@ -286,9 +274,17 @@ TUPLE: duplicate-attr < parsing-error key values ;
|
|||
M: duplicate-attr summary
|
||||
call-next-method "\nDuplicate attribute" append ;
|
||||
|
||||
TUPLE: bad-cdata < parsing-error ;
|
||||
|
||||
: bad-cdata ( -- * )
|
||||
\ bad-cdata parsing-error throw ;
|
||||
|
||||
M: bad-cdata summary
|
||||
call-next-method "\nCDATA occurs before or after main tag" append ;
|
||||
|
||||
UNION: xml-parse-error
|
||||
multitags notags extra-attrs nonexist-ns bad-decl
|
||||
not-yes/no unclosed mismatched expected no-entity
|
||||
bad-prolog versionless-prolog capitalized-prolog bad-instruction
|
||||
bad-prolog versionless-prolog capitalized-prolog
|
||||
bad-directive bad-name unclosed-quote quoteless-attr
|
||||
attr-w/< text-w/]]> duplicate-attr ;
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
USING: accessors assocs combinators continuations fry generalizations
|
||||
io.pathnames kernel macros sequences stack-checker tools.test xml
|
||||
xml.utilities xml.writer ;
|
||||
xml.utilities xml.writer arrays ;
|
||||
IN: xml.tests.suite
|
||||
|
||||
TUPLE: test id uri sections description type ;
|
||||
TUPLE: xml-test id uri sections description type ;
|
||||
|
||||
: >test ( tag -- test )
|
||||
test new swap {
|
||||
: >xml-test ( tag -- test )
|
||||
xml-test new swap {
|
||||
[ "TYPE" swap at >>type ]
|
||||
[ "ID" swap at >>id ]
|
||||
[ "URI" swap at >>uri ]
|
||||
|
@ -15,7 +15,7 @@ TUPLE: test id uri sections description type ;
|
|||
} cleave ;
|
||||
|
||||
: parse-tests ( xml -- tests )
|
||||
"TEST" tags-named [ >test ] map ;
|
||||
"TEST" tags-named [ >xml-test ] map ;
|
||||
|
||||
: base "resource:basis/xml/tests/xmltest/" ;
|
||||
|
||||
|
@ -32,11 +32,22 @@ MACRO: drop-input ( quot -- newquot )
|
|||
: well-formed? ( uri -- answer )
|
||||
[ file>xml ] fails? "not-wf" "valid" ? ;
|
||||
|
||||
: run-test ( test -- )
|
||||
: test-quots ( test -- result quot )
|
||||
[ type>> '[ _ ] ]
|
||||
[ '[ _ uri>> base swap append-path well-formed? ] ] bi
|
||||
unit-test ;
|
||||
[ '[ _ uri>> base swap append-path well-formed? ] ] bi ;
|
||||
|
||||
: run-tests ( -- )
|
||||
: xml-tests ( -- tests )
|
||||
base "xmltest.xml" append-path file>xml
|
||||
parse-tests [ run-test ] each ;
|
||||
parse-tests [ test-quots 2array ] map ;
|
||||
|
||||
: run-xml-tests ( -- )
|
||||
xml-tests [ unit-test ] assoc-each ;
|
||||
|
||||
: works? ( result quot -- ? )
|
||||
[ first ] [ call ] bi* = ;
|
||||
|
||||
: partition-xml-tests ( -- successes failures )
|
||||
xml-tests [ first2 works? ] partition ;
|
||||
|
||||
: failing-valids ( -- tests )
|
||||
partition-xml-tests nip [ second first ] map [ type>> "valid" = ] filter ;
|
||||
|
|
|
@ -13,6 +13,8 @@ IN: xml.tokenize
|
|||
! A stack of hashtables
|
||||
SYMBOL: ns-stack
|
||||
|
||||
SYMBOL: depth
|
||||
|
||||
: attrs>ns ( attrs-alist -- hash )
|
||||
! this should check to make sure URIs are valid
|
||||
[
|
||||
|
@ -50,25 +52,37 @@ SYMBOL: ns-stack
|
|||
|
||||
! Parsing names
|
||||
|
||||
! version=1.0? is calculated once and passed around for efficiency
|
||||
: valid-name? ( str -- ? )
|
||||
[ f ] [
|
||||
version=1.0? swap {
|
||||
[ first name-start? ]
|
||||
[ rest-slice [ name-char? ] with all? ]
|
||||
} 2&&
|
||||
] if-empty ;
|
||||
|
||||
: assure-name ( str version=1.0? -- str )
|
||||
over {
|
||||
[ first name-start? ]
|
||||
[ rest-slice [ name-char? ] with all? ]
|
||||
} 2&& [ bad-name ] unless ;
|
||||
: prefixed-name ( str -- name/f )
|
||||
":" split dup length 2 = [
|
||||
[ [ valid-name? ] all? ]
|
||||
[ first2 f <name> ] bi and
|
||||
] [ drop f ] if ;
|
||||
|
||||
: (parse-name) ( start -- str )
|
||||
version=1.0?
|
||||
[ [ get-char name-char? not ] curry take-until append ]
|
||||
[ assure-name ] bi ;
|
||||
: interpret-name ( str -- name )
|
||||
dup prefixed-name [ ] [
|
||||
dup valid-name?
|
||||
[ <simple-name> ] [ bad-name ] if
|
||||
] ?if ;
|
||||
|
||||
: parse-name-starting ( start -- name )
|
||||
(parse-name) get-char CHAR: : =
|
||||
[ next "" (parse-name) ] [ "" swap ] if f <name> ;
|
||||
: take-name ( -- string )
|
||||
version=1.0? '[ _ get-char name-char? not ] take-until ;
|
||||
|
||||
: parse-name ( -- name )
|
||||
"" parse-name-starting ;
|
||||
take-name interpret-name ;
|
||||
|
||||
: parse-name-starting ( string -- name )
|
||||
take-name append interpret-name ;
|
||||
|
||||
: parse-simple-name ( -- name )
|
||||
take-name <simple-name> ;
|
||||
|
||||
! -- Parsing strings
|
||||
|
||||
|
@ -99,11 +113,15 @@ SYMBOL: ns-stack
|
|||
: assure-no-]]> ( circular -- )
|
||||
"]]>" sequence= [ text-w/]]> ] when ;
|
||||
|
||||
: parse-text ( -- string )
|
||||
3 f <array> <circular> '[
|
||||
_ [ push-circular ]
|
||||
[ nip assure-no-]]> ]
|
||||
[ drop CHAR: < = ] 2tri
|
||||
:: parse-text ( -- string )
|
||||
3 f <array> <circular> :> circ
|
||||
depth get zero? :> no-text [| char |
|
||||
char circ push-circular
|
||||
circ assure-no-]]>
|
||||
no-text [ char blank? char CHAR: < = or [
|
||||
char 1string t pre/post-content
|
||||
] unless ] when
|
||||
char CHAR: < =
|
||||
] parse-char ;
|
||||
|
||||
! Parsing tags
|
||||
|
@ -131,7 +149,7 @@ SYMBOL: ns-stack
|
|||
[ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
|
||||
|
||||
: parse-attr ( -- )
|
||||
parse-name CHAR: = expect
|
||||
parse-name pass-blank CHAR: = expect pass-blank
|
||||
t parse-quote* normalize-quot 2array , ;
|
||||
|
||||
: (middle-tag) ( -- )
|
||||
|
@ -148,9 +166,13 @@ SYMBOL: ns-stack
|
|||
[ (middle-tag) ] f make pass-blank
|
||||
assure-no-duplicates ;
|
||||
|
||||
: close ( -- )
|
||||
pass-blank CHAR: > expect ;
|
||||
|
||||
: end-tag ( name attrs-alist -- tag )
|
||||
tag-ns pass-blank get-char CHAR: / =
|
||||
[ pop-ns <contained> next ] [ <opener> ] if ;
|
||||
[ pop-ns <contained> next CHAR: > expect ]
|
||||
[ depth inc <opener> close ] if ;
|
||||
|
||||
: take-comment ( -- comment )
|
||||
"--" expect-string
|
||||
|
@ -159,6 +181,7 @@ SYMBOL: ns-stack
|
|||
CHAR: > expect ;
|
||||
|
||||
: take-cdata ( -- string )
|
||||
depth get zero? [ bad-cdata ] when
|
||||
"[CDATA[" expect-string "]]>" take-string ;
|
||||
|
||||
: take-word ( -- string )
|
||||
|
@ -173,19 +196,17 @@ SYMBOL: ns-stack
|
|||
: take-attlist-decl ( -- doctype-decl )
|
||||
take-decl-contents <attlist-decl> ;
|
||||
|
||||
: take-notation-decl ( -- notation-decl )
|
||||
take-decl-contents <notation-decl> ;
|
||||
|
||||
: take-until-one-of ( seps -- str sep )
|
||||
'[ get-char _ member? ] take-until get-char ;
|
||||
|
||||
: expect-> ( -- )
|
||||
pass-blank CHAR: > expect ;
|
||||
|
||||
: take-system-id ( -- system-id )
|
||||
parse-quote <system-id>
|
||||
expect-> ;
|
||||
parse-quote <system-id> close ;
|
||||
|
||||
: take-public-id ( -- public-id )
|
||||
parse-quote parse-quote <public-id>
|
||||
expect-> ;
|
||||
parse-quote parse-quote <public-id> close ;
|
||||
|
||||
DEFER: direct
|
||||
|
||||
|
@ -216,7 +237,7 @@ DEFER: direct
|
|||
{ CHAR: \s [
|
||||
pass-blank get-char CHAR: [ = [
|
||||
next take-internal-subset f swap
|
||||
expect->
|
||||
close
|
||||
] [
|
||||
" >" take-until-one-of {
|
||||
{ CHAR: \s [ (take-external-id) ] }
|
||||
|
@ -235,21 +256,22 @@ DEFER: direct
|
|||
} case ;
|
||||
|
||||
: associate-entity ( entity-name entity-def -- )
|
||||
swap extra-entities [ ?set-at ] change ;
|
||||
swap extra-entities get set-at ;
|
||||
|
||||
: take-entity-decl ( -- entity-decl )
|
||||
pass-blank get-char {
|
||||
{ CHAR: % [ next pass-blank take-entity-def ] }
|
||||
[ drop take-entity-def 2dup associate-entity ]
|
||||
} case
|
||||
expect-> <entity-decl> ;
|
||||
close <entity-decl> ;
|
||||
|
||||
: take-directive ( -- directive )
|
||||
take-word {
|
||||
take-name {
|
||||
{ "ELEMENT" [ take-element-decl ] }
|
||||
{ "ATTLIST" [ take-attlist-decl ] }
|
||||
{ "DOCTYPE" [ take-doctype-decl ] }
|
||||
{ "ENTITY" [ take-entity-decl ] }
|
||||
{ "NOTATION" [ take-notation-decl ] }
|
||||
[ bad-directive ]
|
||||
} case ;
|
||||
|
||||
|
@ -307,28 +329,27 @@ SYMBOL: string-input?
|
|||
dup prolog-data set ;
|
||||
|
||||
: instruct ( -- instruction )
|
||||
"" (parse-name) dup "xml" =
|
||||
[ drop parse-prolog ] [
|
||||
dup >lower "xml" =
|
||||
[ capitalized-prolog ]
|
||||
[ "?>" take-string append <instruction> ] if
|
||||
] if ;
|
||||
take-name {
|
||||
{ [ dup "xml" = ] [ drop parse-prolog ] }
|
||||
{ [ dup >lower "xml" = ] [ capitalized-prolog ] }
|
||||
{ [ dup valid-name? not ] [ bad-name ] }
|
||||
[ "?>" take-string append <instruction> ]
|
||||
} cond ;
|
||||
|
||||
: make-tag ( -- tag )
|
||||
{
|
||||
{ [ get-char dup CHAR: ! = ] [ drop next direct ] }
|
||||
{ [ CHAR: ? = ] [ next instruct ] }
|
||||
[
|
||||
start-tag [ dup add-ns pop-ns <closer> ]
|
||||
start-tag [ dup add-ns pop-ns <closer> depth dec close ]
|
||||
[ middle-tag end-tag ] if
|
||||
CHAR: > expect
|
||||
]
|
||||
} cond ;
|
||||
|
||||
! Autodetecting encodings
|
||||
|
||||
: continue-make-tag ( str -- tag )
|
||||
parse-name-starting middle-tag end-tag CHAR: > expect ;
|
||||
parse-name-starting middle-tag end-tag ;
|
||||
|
||||
: start-utf16le ( -- tag )
|
||||
utf16le decode-input-if
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays io io.encodings.binary io.files
|
||||
io.streams.string kernel namespaces sequences xml.state-parser strings
|
||||
xml.backend xml.data xml.errors xml.tokenize ascii
|
||||
xml.writer ;
|
||||
xml.backend xml.data xml.errors xml.tokenize ascii xml.entities
|
||||
xml.writer assocs ;
|
||||
IN: xml
|
||||
|
||||
! -- Overall parser with data tree
|
||||
|
@ -25,11 +25,6 @@ M: prolog process
|
|||
xml-stack get V{ { f V{ } } } =
|
||||
[ bad-prolog ] unless drop ;
|
||||
|
||||
M: instruction process
|
||||
xml-stack get length 1 =
|
||||
[ bad-instruction ] unless
|
||||
add-child ;
|
||||
|
||||
M: directive process
|
||||
xml-stack get dup length 1 =
|
||||
swap first second [ tag? ] contains? not and
|
||||
|
@ -53,7 +48,9 @@ M: closer process
|
|||
<tag> add-child ;
|
||||
|
||||
: init-xml-stack ( -- )
|
||||
V{ } clone xml-stack set f push-xml ;
|
||||
V{ } clone xml-stack set
|
||||
extra-entities [ H{ } assoc-like ] change
|
||||
f push-xml ;
|
||||
|
||||
: default-prolog ( -- prolog )
|
||||
"1.0" "UTF-8" f <prolog> ;
|
||||
|
@ -150,11 +147,12 @@ TUPLE: pull-xml scope ;
|
|||
] state-parse ;
|
||||
|
||||
: read-xml ( stream -- xml )
|
||||
#! Produces a tree of XML nodes
|
||||
(read-xml-chunk) make-xml-doc ;
|
||||
0 depth
|
||||
[ (read-xml-chunk) make-xml-doc ] with-variable ;
|
||||
|
||||
: read-xml-chunk ( stream -- seq )
|
||||
(read-xml-chunk) nip ;
|
||||
1 depth
|
||||
[ (read-xml-chunk) nip ] with-variable ;
|
||||
|
||||
: string>xml ( string -- xml )
|
||||
<string-reader> read-xml ;
|
||||
|
|
Loading…
Reference in New Issue