diff --git a/contrib/xml-rpc/xml-rpc.facts b/contrib/xml-rpc/xml-rpc.facts index d828849281..cc20e988cc 100644 --- a/contrib/xml-rpc/xml-rpc.facts +++ b/contrib/xml-rpc/xml-rpc.facts @@ -21,7 +21,7 @@ HELP: base64 HELP: { $values { "name" "a string" } { "params" "a sequence" } } -{ $description "creates a tuple reprsenting a method call which can be translated using" { $code send-rpc } "into an XML-RPC document" } +{ $description "creates a tuple reprsenting a method call which can be translated using send-rpc into an XML-RPC document" } { $see-also rpc-method } ; HELP: rpc-method @@ -52,7 +52,7 @@ HELP: post-rpc { $description "posts an XML-RPC document to the specified URL, receives the response and parses it as XML-RPC, returning the tuple" } ; ARTICLE: { "xml-rpc" "intro" } "XML-RPC" - "This is the XML-RPC library. XML-RPC is used instead of SOAP because it is far simpler and easier to use for most tasks. The library was implemented by Daniel Ehrenberg." + "This is the XML-RPC library. XML-RPC is used instead of SOAP because it is far simpler and easier to use for most tasks. The library was implemented by Daniel Ehrenberg. Together with XML, this is part of the F2EE framework." $terpri "The most important words that this library implements are:" { $subsection send-rpc } diff --git a/contrib/xml/example2.factor b/contrib/xml/example2.factor new file mode 100644 index 0000000000..f648ef50ae --- /dev/null +++ b/contrib/xml/example2.factor @@ -0,0 +1,36 @@ +IN: ref-template +USING: kernel xml sequences hashtables tools io arrays namespaces generic ; + +SYMBOL: ref-table + +: replace-ref ( ref -- object ) + reference-name ref-table get hash call ; + +: r-ref-string ( xml-string -- xml-string ) + xml-string-array [ + dup reference? [ replace-ref ] when + ] map ; + +GENERIC: (r-ref) ( xml -- object ) +M: any-tag (r-ref) + dup tag-props dup [ + dup [ r-ref-string swap set ] hash-each + ] bind over set-tag-props ; +M: reference (r-ref) + replace-ref ; +M: object (r-ref) ; + +: replace-refs ( xml -- xml ) + [ (r-ref) ] xml-map ; + +! Example + +: test-refs + H{ + { "foo" [ "foo" ] } + { "bar" [ [ .s ] string-out ] } + { "baz" [ "" string>xml delegate ] } + } ref-table set + "%foo;%bar;%baz;" string>xml + replace-refs ; + diff --git a/contrib/xml/parser.factor b/contrib/xml/parser.factor index 8002819dae..b3cb4a46ad 100644 --- a/contrib/xml/parser.factor +++ b/contrib/xml/parser.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2005, 2006 Daniel Ehrenberg +! --> Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. IN: xml USING: errors hashtables io kernel math namespaces prettyprint sequences @@ -13,72 +13,79 @@ TUPLE: instruction text ; : start-tag ( -- name ? ) #! Outputs the name and whether this is a closing tag - char CHAR: / = dup [ incr-spot ] when + get-char CHAR: / = dup [ incr-spot ] when parse-name swap ; -: (parse-quot) ( ch sbuf -- ) - char { - { [ dup not ] [ "File ended in quote" throw ] } - { [ 3dup nip = ] [ drop >string , drop incr-spot ] } - { [ dup CHAR: & = ] [ drop parse-entity (parse-quot) ] } - { [ dup CHAR: % = ] [ drop parse-reference (parse-quot) ] } - { [ t ] [ parsed-ch (parse-quot) ] } +: (parse-quot) ( ch -- ) + ! The similarities with (parse-text) should be factored out + get-char { + { [ dup not ] + [ "File ended in quote" throw ] } + { [ 2dup = ] + [ 2drop end-record , incr-spot ] } + { [ dup CHAR: & = ] + [ drop parse-entity (parse-quot) ] } + { [ CHAR: % = ] [ parse-reference (parse-quot) ] } + { [ t ] [ incr-spot (parse-quot) ] } } cond ; : parse-quot ( ch -- array ) - [ SBUF" " clone (parse-quot) ] { } make ; + [ new-record (parse-quot) ] { } make ; : parse-prop-value ( -- seq ) - char dup "'\"" member? [ + get-char dup "'\"" member? [ incr-spot parse-quot ] [ "Attribute lacks quote" throw ] if ; : parse-prop ( -- ) - parse-name pass-blank CHAR: = expect pass-blank - parse-prop-value swap set ; + [ parse-name ] with-scope + pass-blank CHAR: = expect pass-blank + [ parse-prop-value ] with-scope + swap set ; : (middle-tag) ( -- ) - pass-blank char name-start-char? + pass-blank get-char name-start-char? [ parse-prop (middle-tag) ] when ; : middle-tag ( -- hash ) [ (middle-tag) ] make-hash pass-blank ; : end-tag ( string hash -- tag ) - pass-blank char CHAR: / = + pass-blank get-char CHAR: / = [ incr-spot ] [ ] if ; : skip-comment ( -- comment ) "--" expect-string - "--" take-until-string + "--" take-string CHAR: > expect ; : cdata ( -- string ) - "[CDATA[" expect-string "]]>" take-until-string ; + "[CDATA[" expect-string "]]>" take-string ; : directive ( -- object ) { { [ "--" string-matches? ] [ skip-comment ] } { [ "[CDATA[" string-matches? ] [ cdata ] } - { [ t ] [ ">" take-until-string ] } + { [ t ] [ CHAR: > take-char ] } } cond ; : instruction ( -- instruction ) ! this should make sure the name doesn't include 'xml' - "?>" take-until-string ; + "?>" take-string ; : make-tag ( -- tag/f ) CHAR: < expect - { { [ char dup CHAR: ! = ] [ drop incr-spot directive ] } + { { [ get-char dup CHAR: ! = ] [ drop incr-spot directive ] } { [ CHAR: ? = ] [ incr-spot instruction ] } { [ t ] [ start-tag [ ] [ middle-tag end-tag ] if pass-blank CHAR: > expect - ] } } cond ; + ] } + } cond ; ! -- Overall parser with data tree @@ -234,9 +241,18 @@ M: extra-attrs error. throw ] unless concat ; +TUPLE: bad-version num ; +M: bad-version error. + "XML version must be \"1.0\" or \"1.1\". Version here was " write + bad-version-num . ; + +: good-version ( version -- version ) + dup { "1.0" "1.1" } member? [ throw ] unless ; + : prolog-attrs ( hash -- ) T{ name f "" "version" f } over hash [ - concat-strings prolog-data get set-prolog-version + concat-strings good-version + prolog-data get set-prolog-version ] when* T{ name f "" "encoding" f } over hash [ concat-strings prolog-data get set-prolog-encoding @@ -253,10 +269,14 @@ M: extra-attrs error. dup assure-no-extra prolog-attrs ] when ; -: init-xml ( string -- ) - code set { 0 1 1 } clone spot set +: init-xml ( stream -- ) + stdio set + { 0 0 0 "" } clone spot set + f record set f now-recording? set + incr-spot "1.0" "iso-8859-1" f prolog-data set - init-xml-stack init-ns-stack ; + init-xml-stack + init-ns-stack ; UNION: any-tag tag contained-tag ; @@ -277,9 +297,9 @@ M: multitags error. : (string>xml) ( -- ) parse-text process - more? [ make-tag process (string>xml) ] when ; inline + get-char [ make-tag process (string>xml) ] when ; -: string>xml ( string -- xml-doc ) +: stream>xml ( stream -- xml-doc ) #! Produces a tree of XML nodes [ init-xml @@ -290,5 +310,8 @@ M: multitags error. make-xml-doc ] with-scope ; +: string>xml ( string -- xml-doc ) + stream>xml ; + UNION: xml-parse-error multitags notags xml-error extra-attrs nonexist-ns not-yes/no unclosed mismatched xml-string-error expected no-entity ; diff --git a/contrib/xml/test.factor b/contrib/xml/test.factor index 7b0c27d695..30e9276907 100644 --- a/contrib/xml/test.factor +++ b/contrib/xml/test.factor @@ -8,7 +8,7 @@ USING: kernel xml test io namespaces hashtables sequences ! This is insufficient SYMBOL: xml-file [ ] [ "contrib/xml/test.xml" resource-path - contents string>xml xml-file set ] unit-test + stream>xml xml-file set ] unit-test [ "1.0" ] [ xml-file get xml-doc-prolog prolog-version ] unit-test [ f ] [ xml-file get xml-doc-prolog prolog-standalone ] unit-test [ "a" ] [ xml-file get name-space ] unit-test diff --git a/contrib/xml/tokenizer.factor b/contrib/xml/tokenizer.factor index a9fdc86f3f..54de4d71ff 100644 --- a/contrib/xml/tokenizer.factor +++ b/contrib/xml/tokenizer.factor @@ -4,16 +4,44 @@ IN: xml USING: errors hashtables io kernel math namespaces prettyprint sequences tools generic strings char-classes ; -SYMBOL: code #! Source code -SYMBOL: spot #! { index line column } -: get-index ( -- index ) spot get first ; -: set-index ( index -- ) 0 spot get set-nth ; +! -- Low-level parsing +! Code stored in stdio +! Spot is composite so it won't be lost in sub-scopes +SYMBOL: spot #! { char line column line-str } +: get-char ( -- char ) spot get first ; +: set-char ( char -- ) 0 spot get set-nth ; : get-line ( -- line ) spot get second ; : set-line ( line -- ) 1 spot get set-nth ; : get-column ( -- column ) spot get third ; : set-column ( column -- ) 2 spot get set-nth ; +: get-line-str ( -- line-str ) 3 spot get nth ; +: set-line-str ( line-str -- ) 3 spot get set-nth ; SYMBOL: prolog-data +! Record is composite so it changes in nested scopes +SYMBOL: record ! string +SYMBOL: now-recording? ! t/f +: recording? ( -- t/f ) now-recording? get ; +: get-record ( -- sbuf ) record get ; + +: push-record ( ch -- ) + get-record push ; +: new-record ( -- ) + SBUF" " clone record set + t now-recording? set + get-char [ push-record ] when* ; +: unrecord ( -- ) + record get pop* ; + +: (end-record) ( -- sbuf ) + f now-recording? set + get-record ; +: end-record* ( n -- string ) + (end-record) tuck length swap - + head-slice >string ; +: end-record ( -- string ) + 1 end-record* ; + ! -- Error reporting TUPLE: xml-error line column ; @@ -57,72 +85,79 @@ M: xml-string-error error. ! -- Basic utility words -: more? ( -- ? ) - #! Return t if spot is not at the end of code - code get length get-index = not ; +: readln-nb ( -- string ) + ! read a non-blank line + readln dup "" = [ drop readln-nb ] when ; -: char ( -- char/f ) - more? [ get-index code get nth ] [ f ] if ; +: (incr-spot) ( -- char ) + get-column get-line-str 2dup length 1- < [ + >r 1+ dup set-column r> nth + ] [ + 2drop 0 set-column + readln-nb dup set-line-str + [ first ] [ f ] if* + get-line 1+ set-line + ] if ; : incr-spot ( -- ) #! Increment spot. - get-index 1+ set-index char "\n\r" member? - [ 0 set-column get-line 1+ set-line ] - [ get-column 1+ set-column ] if ; + get-char [ + "XML document unexpectedly ended" + throw + ] unless + (incr-spot) dup set-char + recording? over and [ push-record ] [ drop ] if ; : skip-until ( quot -- ) - #! quot: ( char -- ? ) - more? [ - char swap [ call ] keep swap [ drop ] [ - incr-spot skip-until + #! quot: ( -- ? ) + get-char [ + [ call ] keep swap [ drop ] [ + incr-spot skip-until ] if - ] [ drop ] if ; inline + ] [ 2drop ] if ; inline -: take-until ( quot -- string | quot: char -- ? ) +: take-until ( quot -- string | quot: -- ? ) #! Take the substring of a string starting at spot #! from code until the quotation given is true and #! advance spot to after the substring. - get-index >r skip-until r> - get-index code get subseq ; inline + new-record skip-until end-record ; inline + +: take-char ( ch -- string ) + [ dup get-char = ] take-until nip ; : pass-blank ( -- ) #! Advance code past any whitespace, including newlines - [ blank? not ] skip-until ; + [ get-char blank? not ] skip-until ; : string-matches? ( string -- ? ) - get-index dup pick length + code get - 2dup length > [ 3drop drop f ] [ sequence= ] if ; + dup length get-column tuck + + dup get-line-str length <= + [ get-line-str sequence= ] + [ 3drop f ] if ; -: (take-until-string) ( string -- n ) - more? [ - dup string-matches? [ - drop get-index - ] [ - incr-spot (take-until-string) - ] if - ] [ "Missing closing token" throw ] if ; - -: take-until-string ( string -- string ) - [ >r get-index r> (take-until-string) code get subseq ] keep - length get-index + set-index ; +: take-string ( match -- string ) + ! match must not contain a newline + [ dup string-matches? ] take-until + get-line-str + [ "Missing closing token" throw ] unless + swap length [ incr-spot ] times ; ! -- Parsing strings : expect ( ch -- ) - char 2dup = [ 2drop ] [ + get-char 2dup = [ 2drop ] [ >r ch>string r> ch>string throw ] if incr-spot ; : expect-string* ( num -- ) - #! only skips string + #! only skips string, and only for when you're sure the string is there [ incr-spot ] times ; : expect-string ( string -- ) - >r get-index r> t over [ char incr-spot = and ] each [ - 2drop - ] [ - swap get-index code get subseq throw - ] if ; + ! TODO: add error if this isn't long enough + new-record dup length [ incr-spot ] times + end-record 2dup = [ 2drop ] + [ throw ] if ; TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser @@ -143,42 +178,47 @@ TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser TUPLE: entity name ; -: parsed-ch ( sbuf ch -- sbuf ) over push incr-spot ; +: (parse-entity) ( string -- ) + dup entities hash [ push-record ] [ + prolog-data get prolog-standalone + [ throw ] [ + end-record , , new-record + ] if + ] ?if ; -: parse-entity ( sbuf -- sbuf ) - incr-spot [ CHAR: ; = ] take-until "#" ?head [ - "x" ?head 16 10 ? base> parsed-ch - ] [ - dup entities hash [ parsed-ch ] [ - prolog-data get prolog-standalone - [ throw ] [ - >r >string , r> , incr-spot - SBUF" " clone - ] if - ] ?if - ] if ; +: parse-entity ( -- ) + unrecord + ! the following line is in a scope to shield this + ! word from the record-altering side effects of + ! take-until. + [ CHAR: ; take-char ] with-scope + "#" ?head [ + "x" ?head 16 10 ? base> + push-record incr-spot + ] [ (parse-entity) ] if ; TUPLE: reference name ; -: parse-reference ( sbuf -- sbuf ) - >string , incr-spot [ CHAR: ; = ] take-until - , SBUF" " clone incr-spot ; +: parse-reference ( -- ) + unrecord end-record , CHAR: ; take-char + , new-record incr-spot ; -: (parse-text) ( sbuf -- ) - char { - { [ dup not ] [ drop >string , ] } ! should this be an error? - { [ dup CHAR: < = ] [ drop >string , ] } +: (parse-text) ( -- ) + get-char { + { [ dup not ] + [ drop 0 end-record* , ] } + { [ dup CHAR: < = ] [ drop end-record , ] } { [ dup CHAR: & = ] [ drop parse-entity (parse-text) ] } - { [ dup CHAR: % = ] - [ drop parse-reference (parse-text) ] } - { [ t ] [ parsed-ch (parse-text) ] } + { [ CHAR: % = ] + [ parse-reference (parse-text) ] } + { [ t ] [ incr-spot (parse-text) ] } } cond ; TUPLE: xml-string array ; : parse-text ( -- array ) - [ SBUF" " clone (parse-text) ] { } make ; + [ new-record (parse-text) ] { } make ; ! -- Parsing tags @@ -199,12 +239,12 @@ C: name ( space tag -- name ) [ 1.0name-char? ] [ 1.1name-char? ] if ; : (parse-name) ( -- str ) - char dup name-start-char? [ - incr-spot ch>string [ name-char? not ] take-until append + new-record get-char name-start-char? [ + [ get-char name-char? not ] skip-until end-record ] [ "Malformed name" throw ] if ; -: parse-name ( -- str-name ) - (parse-name) char CHAR: : = +: parse-name ( -- name ) + (parse-name) get-char CHAR: : = [ incr-spot (parse-name) ] [ "" swap ] if ; diff --git a/contrib/xml/writer.factor b/contrib/xml/writer.factor index ac10b7022c..5f470b4395 100644 --- a/contrib/xml/writer.factor +++ b/contrib/xml/writer.factor @@ -19,6 +19,8 @@ M: entity write-str-elem M: reference write-str-elem CHAR: % write1 reference-name write CHAR: ; write1 ; +UNION: str-elem string entity reference ; + : print-name ( name -- ) dup name-space dup "" = [ drop ] [ write CHAR: : write1 ] if @@ -32,7 +34,7 @@ M: reference write-str-elem GENERIC: (xml>string) ( object -- ) -M: object (xml>string) ! string element +M: str-elem (xml>string) ! string element write-str-elem ; M: contained-tag (xml>string) diff --git a/contrib/xml/xml.facts b/contrib/xml/xml.facts index 45d0bd3b1d..6d243ae264 100644 --- a/contrib/xml/xml.facts +++ b/contrib/xml/xml.facts @@ -12,15 +12,22 @@ HELP: xml>string { $values { "xml-doc" "an xml document" } { "string" "a string" } } { $description "converts an xml document into a string" } { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } -{ $see-also string>xml xml-reprint } ; +{ $see-also string>xml xml-reprint write-xml } ; HELP: xml-parse-error -{ $description "the exception class that all parsing errors in XML documents are in." } ; +{ $class-description "the exception class that all parsing errors in XML documents are in." } ; HELP: xml-reprint -{ $values { "in" "a string of XML" } { "out" "reprinted XML" } } +{ $values { "string" "a string of XML" } { "string" "reprinted XML" } } { $description "parses XML and converts it back into a string, for testing purposes" } -{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ; +{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } +{ $see-also write-xml xml>string string>xml } ; + +HELP: write-xml +{ $values { "xml-doc" "an XML document" } } +{ $description "prints the contents of an XML document to stdio" } +{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } +{ $see-also xml>string xml-reprint } ; HELP: PROCESS: { $syntax "PROCESS: word" } @@ -139,13 +146,15 @@ HELP: ( version encoding standalone -- prolog ) { $see-also prolog } ; ARTICLE: { "xml" "intro" } "XML" - "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress." + "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework." $terpri "The XML module was implemented by Daniel Ehrenberg, with edits by Slava Pestov. Main functions implemented include:" { $subsection string>xml } { $subsection xml>string } { $subsection xml-parse-error } { $subsection xml-reprint } + { $subsection write-xml } + { $subsection init-xml } "Data types that XML documents are made of:" { $subsection name } { $subsection tag }