refactor xml parser

db4
Doug Coleman 2008-05-02 17:12:09 -05:00
parent b6d8521c8c
commit a77ba70706
4 changed files with 168 additions and 167 deletions

View File

@ -0,0 +1,6 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
IN: xml.backend
! A stack of { tag children } pairs
SYMBOL: xml-stack

View File

@ -1,150 +1,179 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math
debugger sequences state-parser ;
debugger sequences state-parser accessors inspector
namespaces io.streams.string xml.backend ;
IN: xml.errors
TUPLE: no-entity thing ;
: <no-entity> ( string -- error )
{ set-no-entity-thing } no-entity construct-parsing-error ;
M: no-entity error.
dup parsing-error.
"Entity does not exist: &" write no-entity-thing write ";" print ;
TUPLE: xml-string-error string ; ! this should not exist
: <xml-string-error> ( string -- xml-string-error )
{ set-xml-string-error-string }
xml-string-error construct-parsing-error ;
M: xml-string-error error.
dup parsing-error.
xml-string-error-string print ;
TUPLE: mismatched open close ;
: <mismatched>
{ set-mismatched-open set-mismatched-close }
mismatched construct-parsing-error ;
M: mismatched error.
dup parsing-error.
"Mismatched tags" print
"Opening tag: <" write dup mismatched-open print-name ">" print
"Closing tag: </" write mismatched-close print-name ">" print ;
TUPLE: unclosed tags ;
! <unclosed> is ( -- unclosed ), see presentation.factor
M: unclosed error.
"Unclosed tags" print
"Tags: " print
unclosed-tags [ " <" write print-name ">" print ] each ;
TUPLE: bad-uri string ;
: <bad-uri> ( string -- bad-uri )
{ set-bad-uri-string } bad-uri construct-parsing-error ;
M: bad-uri error.
dup parsing-error.
"Bad URI:" print bad-uri-string . ;
TUPLE: nonexist-ns name ;
: <nonexist-ns> ( name-string -- nonexist-ns )
{ set-nonexist-ns-name }
nonexist-ns construct-parsing-error ;
M: nonexist-ns error.
dup parsing-error.
"Namespace " write nonexist-ns-name write " has not been declared" print ;
TUPLE: unopened ; ! this should give which tag was unopened
: <unopened> ( -- unopened )
{ } unopened construct-parsing-error ;
M: unopened error.
parsing-error.
"Closed an unopened tag" print ;
TUPLE: not-yes/no text ;
: <not-yes/no> ( text -- not-yes/no )
{ set-not-yes/no-text } not-yes/no construct-parsing-error ;
M: not-yes/no error.
dup parsing-error.
"standalone must be either yes or no, not \"" write
not-yes/no-text write "\"." print ;
TUPLE: extra-attrs attrs ; ! this should actually print the names
: <extra-attrs> ( attrs -- extra-attrs )
{ set-extra-attrs-attrs }
extra-attrs construct-parsing-error ;
M: extra-attrs error.
dup parsing-error.
"Extra attributes included in xml version declaration:" print
extra-attrs-attrs . ;
TUPLE: bad-version num ;
: <bad-version>
{ set-bad-version-num }
bad-version construct-parsing-error ;
M: bad-version error.
"XML version must be \"1.0\" or \"1.1\". Version here was " write
bad-version-num . ;
TUPLE: notags ;
C: <notags> notags
M: notags error.
drop "XML document lacks a main tag" print ;
TUPLE: multitags ;
C: <multitags> multitags
M: multitags error.
drop "XML document contains multiple main tags" print ;
TUPLE: bad-prolog prolog ;
: <bad-prolog> ( prolog -- bad-prolog )
{ set-bad-prolog-prolog }
bad-prolog construct-parsing-error ;
M: bad-prolog error.
dup parsing-error.
"Misplaced XML prolog" print
bad-prolog-prolog write-prolog nl ;
TUPLE: capitalized-prolog name ;
: <capitalized-prolog> ( name -- capitalized-prolog )
{ set-capitalized-prolog-name }
capitalized-prolog construct-parsing-error ;
M: capitalized-prolog error.
dup parsing-error.
"XML prolog name was partially or totally capitalized, using" print
"<?" write capitalized-prolog-name write "...?>" write
" instead of <?xml...?>" print ;
M: multitags summary ( obj -- str )
drop "XML document contains multiple main tags" ;
TUPLE: pre/post-content string pre? ;
C: <pre/post-content> pre/post-content
M: pre/post-content error.
"The text string:" print
dup pre/post-content-string .
"was used " write
pre/post-content-pre? "before" "after" ? write
" the main tag." print ;
M: pre/post-content summary ( obj -- str )
[
"The text string:" print
dup string>> .
"was used " write
pre?>> "before" "after" ? write
" the main tag." print
] with-string-writer ;
TUPLE: versionless-prolog ;
TUPLE: no-entity < parsing-error thing ;
: <no-entity> ( string -- error )
\ no-entity parsing-error swap >>thing ;
M: no-entity summary ( obj -- str )
[
dup call-next-method write
"Entity does not exist: &" write thing>> write ";" print
] with-string-writer ;
TUPLE: xml-string-error < parsing-error string ; ! this should not exist
: <xml-string-error> ( string -- xml-string-error )
\ xml-string-error parsing-error swap >>string ;
M: xml-string-error summary ( obj -- str )
[
dup call-next-method write
string>> print
] with-string-writer ;
TUPLE: mismatched < parsing-error open close ;
: <mismatched>
\ mismatched parsing-error swap >>close swap >>open ;
M: mismatched summary ( obj -- str )
[
dup call-next-method write
"Mismatched tags" print
"Opening tag: <" write dup open>> print-name ">" print
"Closing tag: </" write close>> print-name ">" print
] with-string-writer ;
TUPLE: unclosed < parsing-error tags ;
: <unclosed> ( -- unclosed )
unclosed parsing-error
xml-stack get rest-slice [ first opener-name ] map >>tags ;
M: unclosed summary ( obj -- str )
[
dup call-next-method write
"Unclosed tags" print
"Tags: " print
tags>> [ " <" write print-name ">" print ] each
] with-string-writer ;
TUPLE: bad-uri < parsing-error string ;
: <bad-uri> ( string -- bad-uri )
\ bad-uri parsing-error swap >>string ;
M: bad-uri summary ( obj -- str )
[
dup call-next-method write
"Bad URI:" print string>> .
] with-string-writer ;
TUPLE: nonexist-ns < parsing-error name ;
: <nonexist-ns> ( name-string -- nonexist-ns )
\ nonexist-ns parsing-error swap >>name ;
M: nonexist-ns summary ( obj -- str )
[
dup call-next-method write
"Namespace " write name>> write " has not been declared" print
] with-string-writer ;
TUPLE: unopened < parsing-error ; ! this should give which tag was unopened
: <unopened> ( -- unopened )
\ unopened parsing-error ;
M: unopened summary ( obj -- str )
[
call-next-method write
"Closed an unopened tag" print
] with-string-writer ;
TUPLE: not-yes/no < parsing-error text ;
: <not-yes/no> ( text -- not-yes/no )
\ not-yes/no parsing-error swap >>text ;
M: not-yes/no summary ( obj -- str )
[
dup call-next-method write
"standalone must be either yes or no, not \"" write
text>> write "\"." print
] with-string-writer ;
! this should actually print the names
TUPLE: extra-attrs < parsing-error attrs ;
: <extra-attrs> ( attrs -- extra-attrs )
\ extra-attrs parsing-error swap >>attrs ;
M: extra-attrs summary ( obj -- str )
[
dup call-next-method write
"Extra attributes included in xml version declaration:" print
attrs>> .
] with-string-writer ;
TUPLE: bad-version < parsing-error num ;
: <bad-version>
\ bad-version parsing-error swap >>num ;
M: bad-version summary ( obj -- str )
[
"XML version must be \"1.0\" or \"1.1\". Version here was " write
num>> .
] with-string-writer ;
TUPLE: notags < parsing-error ;
: <notags>
\ notags parsing-error ;
M: notags summary ( obj -- str )
drop "XML document lacks a main tag" ;
TUPLE: bad-prolog < parsing-error prolog ;
: <bad-prolog> ( prolog -- bad-prolog )
\ bad-prolog parsing-error swap >>prolog ;
M: bad-prolog summary ( obj -- str )
[
dup call-next-method write
"Misplaced XML prolog" print
prolog>> write-prolog nl
] with-string-writer ;
TUPLE: capitalized-prolog < parsing-error name ;
: <capitalized-prolog> ( name -- capitalized-prolog )
\ capitalized-prolog parsing-error swap >>name ;
M: capitalized-prolog summary ( obj -- str )
[
dup call-next-method write
"XML prolog name was partially or totally capitalized, using" print
"<?" write name>> write "...?>" write
" instead of <?xml...?>" print
] with-string-writer ;
TUPLE: versionless-prolog < parsing-error ;
: <versionless-prolog> ( -- versionless-prolog )
{ } versionless-prolog construct-parsing-error ;
M: versionless-prolog error.
parsing-error.
"XML prolog lacks a version declaration" print ;
\ versionless-prolog parsing-error ;
M: versionless-prolog summary ( obj -- str )
[
call-next-method write
"XML prolog lacks a version declaration" print
] with-string-writer ;
TUPLE: bad-instruction inst ;
TUPLE: bad-instruction < parsing-error instruction ;
: <bad-instruction> ( instruction -- bad-instruction )
{ set-bad-instruction-inst }
bad-instruction construct-parsing-error ;
M: bad-instruction error.
dup parsing-error.
"Misplaced processor instruction:" print
bad-instruction-inst write-item nl ;
\ bad-instruction parsing-error swap >>instruction ;
M: bad-instruction summary ( obj -- str )
[
dup call-next-method write
"Misplaced processor instruction:" print
bad-instruction-inst write-item nl
] with-string-writer ;
TUPLE: bad-directive dir ;
TUPLE: bad-directive < parsing-error dir ;
: <bad-directive> ( directive -- bad-directive )
{ set-bad-directive-dir }
bad-directive construct-parsing-error ;
M: bad-directive error.
dup parsing-error.
"Misplaced directive:" print
bad-directive-dir write-item nl ;
\ bad-directive parsing-error swap >>dir ;
M: bad-directive summary ( obj -- str )
[
dup call-next-method write
"Misplaced directive:" print
bad-directive-dir write-item nl
] with-string-writer ;
UNION: xml-parse-error multitags notags extra-attrs nonexist-ns
not-yes/no unclosed mismatched xml-string-error expected no-entity

View File

@ -1,28 +0,0 @@
USING: continuations xml xml.errors tools.test kernel arrays xml.data state-parser quotations ;
IN: xml.tests
: xml-error-test ( expected-error xml-string -- )
[ string>xml ] curry swap [ = ] curry must-fail-with ;
T{ no-entity T{ parsing-error f 1 10 } "nbsp" } "<x>&nbsp;</x>" xml-error-test
T{ mismatched T{ parsing-error f 1 8 } T{ name f "" "x" "" } T{ name f "" "y" "" }
} "<x></y>" xml-error-test
T{ unclosed f V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
T{ nonexist-ns T{ parsing-error f 1 5 } "x" } "<x:y/>" xml-error-test
T{ unopened T{ parsing-error f 1 5 } } "</x>" xml-error-test
T{ not-yes/no T{ parsing-error f 1 41 } "maybe" } "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
T{ extra-attrs T{ parsing-error f 1 32 } V{ T{ name f "" "foo" f } }
} "<?xml version='1.1' foo='bar'?><x/>" xml-error-test
T{ bad-version T{ parsing-error f 1 28 } "5 million" } "<?xml version='5 million'?><x/>" xml-error-test
T{ notags f } "" xml-error-test
T{ multitags f } "<x/><y/>" xml-error-test
T{ bad-prolog T{ parsing-error f 1 26 } T{ prolog f "1.0" "UTF-8" f }
} "<x/><?xml version='1.0'?>" xml-error-test
T{ capitalized-prolog T{ parsing-error f 1 6 } "XmL" } "<?XmL version='1.0'?><x/>"
xml-error-test
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
T{ versionless-prolog T{ parsing-error f 1 8 } } "<?xml?><x/>" xml-error-test
T{ bad-instruction T{ parsing-error f 1 11 } T{ instruction f "xsl" }
} "<x><?xsl?></x>" xml-error-test
T{ bad-directive T{ parsing-error f 1 15 } T{ directive f "DOCTYPE" }
} "<x/><!DOCTYPE>" xml-error-test

View File

@ -3,18 +3,12 @@
USING: io io.streams.string io.files kernel math namespaces
prettyprint sequences arrays generic strings vectors
xml.char-classes xml.data xml.errors xml.tokenize xml.writer
xml.utilities state-parser assocs ascii io.encodings.utf8 ;
xml.utilities state-parser assocs ascii io.encodings.utf8
accessors xml.backend ;
IN: xml
! -- Overall parser with data tree
! A stack of { tag children } pairs
SYMBOL: xml-stack
: <unclosed> ( -- unclosed )
xml-stack get rest-slice [ first opener-name ] map
{ set-unclosed-tags } unclosed construct ;
: add-child ( object -- )
xml-stack get peek second push ;