diff --git a/contrib/xml/char-class.factor b/contrib/xml/char-class.factor new file mode 100644 index 0000000000..4f096e4045 --- /dev/null +++ b/contrib/xml/char-class.factor @@ -0,0 +1,35 @@ +IN: char-classes +USING: kernel sequences math ; + +: in-range-seq? ( number seq -- ? ) + #! seq: { { min max } { min max }* } + [ first2 between? ] contains-with? ; + +PREDICATE: integer name-start-char + { + { CHAR: _ CHAR: _ } + { CHAR: A CHAR: Z } + { CHAR: a CHAR: z } + { HEX: C0 HEX: D6 } + { HEX: D8 HEX: F6 } + { HEX: F8 HEX: 2FF } + { HEX: 370 HEX: 37D } + { HEX: 37F HEX: 1FFF } + { HEX: 200C HEX: 200D } + { HEX: 2070 HEX: 218F } + { HEX: 2C00 HEX: 2FEF } + { HEX: 3001 HEX: D7FF } + { HEX: F900 HEX: FDCF } + { HEX: FDF0 HEX: FFFD } + { HEX: 10000 HEX: EFFFF } + } in-range-seq? ; + +PREDICATE: integer name-char + dup name-start-char? swap { + { CHAR: - CHAR: - } + { CHAR: . CHAR: . } + { CHAR: 0 CHAR: 9 } + { HEX: b7 HEX: b7 } + { HEX: 300 HEX: 36F } + { HEX: 203F HEX: 2040 } + } in-range-seq? or ; diff --git a/contrib/xml/example.factor b/contrib/xml/example.factor new file mode 100644 index 0000000000..5f05f4d70d --- /dev/null +++ b/contrib/xml/example.factor @@ -0,0 +1,24 @@ +IN: xml-stupid-math +USING: xml io kernel math sequences strings ; + +PROCESS: calculate ( tag -- n ) + +: calc-2children ( tag -- n n ) + children-tags first2 >r calculate r> calculate ; + +TAG: number calculate + children>string string>number ; +TAG: add calculate + calc-2children + ; +TAG: minus calculate + calc-2children - ; +TAG: times calculate + calc-2children * ; +TAG: divide calculate + calc-2children / ; +TAG: neg calculate + children-tags first calculate neg ; + +: calc-arith ( string -- n ) + string>xml first-child-tag calculate ; + diff --git a/contrib/xml/load.factor b/contrib/xml/load.factor index b9a7c494dc..f32d0b4325 100644 --- a/contrib/xml/load.factor +++ b/contrib/xml/load.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. PROVIDE: contrib/xml { +files+ { + "char-class.factor" "tokenizer.factor" "parser.factor" "writer.factor" diff --git a/contrib/xml/parser.factor b/contrib/xml/parser.factor index 2676a5df17..34ee6f875b 100644 --- a/contrib/xml/parser.factor +++ b/contrib/xml/parser.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. IN: xml USING: errors hashtables io kernel math namespaces prettyprint sequences - arrays generic strings vectors ; + arrays generic strings vectors char-classes ; TUPLE: opener name props ; TUPLE: closer name ; @@ -146,8 +146,8 @@ GENERIC: process ( object -- ) M: f process drop ; M: object process add-child ; -M: vector process [ add-child ] each ; -M: array process [ add-child ] each ; ! does this ever occur? +M: vector process [ add-child ] each ; ! does this ever occur? +M: array process [ add-child ] each ; M: contained process [ contained-name ] keep contained-props @@ -228,8 +228,7 @@ M: extra-attrs error. dup prolog-data set ; : init-xml ( string -- ) - code set - [ spot line column ] [ 0 swap set ] each + code set { 0 1 1 } spot set init-xml-stack init-ns-stack ; UNION: any-tag tag contained-tag ; diff --git a/contrib/xml/tokenizer.factor b/contrib/xml/tokenizer.factor index 1573d264ad..b8db767d72 100644 --- a/contrib/xml/tokenizer.factor +++ b/contrib/xml/tokenizer.factor @@ -2,20 +2,24 @@ ! See http://factorcode.org/license.txt for BSD license. IN: xml USING: errors hashtables io kernel math namespaces prettyprint sequences tools - generic strings ; + generic strings char-classes ; SYMBOL: code #! Source code -SYMBOL: spot #! Current index of string +SYMBOL: spot #! { index line column } +: get-index ( -- index ) spot get first ; +: set-index ( index -- ) 0 spot get set-nth ; +: get-line ( -- line ) spot get second ; +: set-line ( line -- ) 1 spot get set-nth ; +: get-column ( -- column ) 2 spot get nth ; +: set-column ( column -- ) 2 spot get set-nth ; SYMBOL: prolog-data -SYMBOL: line -SYMBOL: column ! -- Error reporting TUPLE: xml-error line column ; C: xml-error ( -- xml-error ) - [ line get swap set-xml-error-line ] keep - [ column get swap set-xml-error-column ] keep ; + [ get-line swap set-xml-error-line ] keep + [ get-column swap set-xml-error-column ] keep ; : xml-error. ( xml-error -- ) "XML error" print @@ -53,21 +57,18 @@ M: xml-string-error error. ! -- Basic utility words -: set-code ( string -- ) ! for debugging - code set [ spot line column ] [ 0 swap set ] each ; - : more? ( -- ? ) #! Return t if spot is not at the end of code - code get length spot get = not ; + code get length get-index = not ; : char ( -- char/f ) - more? [ spot get code get nth ] [ f ] if ; + more? [ get-index code get nth ] [ f ] if ; : incr-spot ( -- ) #! Increment spot. - spot inc - char "\n\r" member? [ 0 column set line ] [ column ] if - inc ; + get-index 1+ set-index char "\n\r" member? + [ 0 set-column get-line 1+ set-line ] + [ get-column 1+ set-column ] if ; : skip-until ( quot -- ) #! quot: ( char -- ? ) @@ -81,29 +82,29 @@ M: xml-string-error error. #! Take the substring of a string starting at spot #! from code until the quotation given is true and #! advance spot to after the substring. - spot get >r skip-until r> - spot get code get subseq ; inline + get-index >r skip-until r> + get-index code get subseq ; inline : pass-blank ( -- ) #! Advance code past any whitespace, including newlines [ blank? not ] skip-until ; : string-matches? ( string -- ? ) - spot get dup pick length + code get + get-index dup pick length + code get 2dup length > [ 3drop drop f ] [ sequence= ] if ; : (take-until-string) ( string -- n ) more? [ dup string-matches? [ - drop spot get + drop get-index ] [ incr-spot (take-until-string) ] if ] [ "Missing closing token" throw ] if ; : take-until-string ( string -- string ) - [ >r spot get r> (take-until-string) code get subseq ] keep - length spot [ + ] change ; + [ >r get-index r> (take-until-string) code get subseq ] keep + length get-index + set-index ; ! -- Parsing strings @@ -117,10 +118,10 @@ M: xml-string-error error. [ incr-spot ] times ; : expect-string ( string -- ) - >r spot get r> t over [ char incr-spot = and ] each [ + >r get-index r> t over [ char incr-spot = and ] each [ 2drop ] [ - swap spot get code get subseq throw + swap get-index code get subseq throw ] if ; TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser @@ -144,64 +145,39 @@ TUPLE: entity name ; : parsed-ch ( sbuf ch -- sbuf ) over push incr-spot ; -: parse-entity ( vector sbuf -- vector sbuf ) +: parse-entity ( sbuf -- sbuf ) incr-spot [ CHAR: ; = ] take-until "#" ?head [ "x" ?head 16 10 ? base> parsed-ch ] [ dup entities hash [ parsed-ch ] [ prolog-data get prolog-standalone [ throw ] [ - >r >string over push r> over push incr-spot SBUF" " + >r >string , r> , incr-spot + SBUF" " clone ] if ] ?if ] if ; -: (parse-text) ( vector sbuf -- vector ) +TUPLE: reference name ; + +: parse-reference ( sbuf -- sbuf ) + , incr-spot [ CHAR: ; = ] take-until + , SBUF" " clone incr-spot ; + +: (parse-text) ( sbuf -- ) { - { [ more? not ] [ >string over push ] } - { [ char CHAR: < = ] [ >string over push ] } + { [ more? not ] [ >string , ] } ! should this be an error? + { [ char CHAR: < = ] [ >string , ] } { [ char CHAR: & = ] [ parse-entity (parse-text) ] } + { [ char CHAR: % = ] [ parse-reference (parse-text) ] } { [ t ] [ char parsed-ch (parse-text) ] } } cond ; : parse-text ( -- array ) - V{ } clone SBUF" " clone (parse-text) ; + [ SBUF" " clone (parse-text) ] { } make ; ! -- Parsing tags -: in-range-seq? ( number seq -- ? ) - #! seq: { { min max } { min max }* } - [ first2 between? ] contains-with? ; - -: name-start-char? ( ch -- ? ) - { - { CHAR: _ CHAR: _ } - { CHAR: A CHAR: Z } - { CHAR: a CHAR: z } - { HEX: C0 HEX: D6 } - { HEX: D8 HEX: F6 } - { HEX: F8 HEX: 2FF } - { HEX: 370 HEX: 37D } - { HEX: 37F HEX: 1FFF } - { HEX: 200C HEX: 200D } - { HEX: 2070 HEX: 218F } - { HEX: 2C00 HEX: 2FEF } - { HEX: 3001 HEX: D7FF } - { HEX: F900 HEX: FDCF } - { HEX: FDF0 HEX: FFFD } - { HEX: 10000 HEX: EFFFF } - } in-range-seq? ; - -: name-char? ( ch -- ? ) - dup name-start-char? swap { - { CHAR: - CHAR: - } - { CHAR: . CHAR: . } - { CHAR: 0 CHAR: 9 } - { HEX: b7 HEX: b7 } - { HEX: 300 HEX: 36F } - { HEX: 203F HEX: 2040 } - } in-range-seq? or ; - TUPLE: name space tag url ; C: name ( space tag -- name ) [ set-name-tag ] keep diff --git a/contrib/xml/writer.factor b/contrib/xml/writer.factor index c6f245e54a..e3bec57641 100644 --- a/contrib/xml/writer.factor +++ b/contrib/xml/writer.factor @@ -51,6 +51,9 @@ M: instruction (xml>string) M: entity (xml>string) CHAR: & , entity-name % CHAR: ; , ; +M: reference (xml>string) + CHAR: % , reference-name % CHAR: ; , ; + : xml-preamble ( xml -- ) "xml { $values { "string" "a string" } { "xml-doc" "an xml document" } } { $description "converts a string into an " { $snippet "xml-doc" } - " datatype for further processing" } ; + " datatype for further processing" } +{ $see-also xml>string xml-reprint } ; HELP: xml>string { $values { "xml-doc" "an xml document" } { "string" "a string" } } { $description "converts an xml document into a string" } -{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ; +{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } +{ $see-also string>xml xml-reprint } ; HELP: xml-parse-error { $description "the exception class that all parsing errors in XML documents are in." } ; @@ -20,9 +22,26 @@ HELP: xml-reprint { $description "parses XML and converts it back into a string, for testing purposes" } { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ; +HELP: PROCESS: +{ $syntax "PROCESS: word" } +{ $values { "word" "a new word to define" } } +{ $description "creates a new word to process XML tags" } +{ $see-also POSTPONE: TAG: } ; + +HELP: TAG: +{ $syntax "TAG: tag word definition... ;" } +{ $values { "tag" "an xml tag name" } { "word" "an XML process" } } +{ $description "defines what a process should do when it encounters a specific tag" } +{ $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } } +{ $see-also POSTPONE: PROCESS: } ; + ARTICLE: { "xml" "intro" } "XML" "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress." $terpri "The XML module was implemented by Daniel Ehrenberg, with edits by Slava Pestov. Main functions implemented include:" { $subsection string>xml } - { $subsection xml>string } ; + { $subsection xml>string } + { $subsection xml-parse-error } + { $subsection xml-reprint } + { $subsection POSTPONE: PROCESS: } + { $subsection POSTPONE: TAG: } ;