| 
									
										
										
										
											2009-01-21 19:16:51 -05:00
										 |  |  | ! Copyright (C) 2005, 2009 Daniel Ehrenberg | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2009-01-15 01:11:23 -05:00
										 |  |  | USING: accessors arrays io io.encodings.binary io.files | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  | io.streams.string kernel namespaces sequences strings io.encodings.utf8 | 
					
						
							| 
									
										
										
										
											2009-01-22 18:19:02 -05:00
										 |  |  | xml.data xml.errors xml.elements ascii xml.entities | 
					
						
							| 
									
										
										
										
											2009-01-29 19:25:23 -05:00
										 |  |  | xml.writer xml.state xml.autoencoding assocs xml.tokenize | 
					
						
							| 
									
										
										
										
											2009-03-16 19:28:15 -04:00
										 |  |  | combinators.short-circuit xml.name splitting io.streams.byte-array | 
					
						
							|  |  |  | combinators ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | IN: xml | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : add-child ( object -- )
 | 
					
						
							| 
									
										
										
										
											2009-05-25 17:38:33 -04:00
										 |  |  |     xml-stack get last second push ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : push-xml ( object -- )
 | 
					
						
							|  |  |  |     V{ } clone 2array xml-stack get push ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : pop-xml ( -- object )
 | 
					
						
							|  |  |  |     xml-stack get pop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | GENERIC: process ( object -- )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object process add-child ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: prolog process | 
					
						
							| 
									
										
										
										
											2009-01-29 19:25:23 -05:00
										 |  |  |     xml-stack get
 | 
					
						
							|  |  |  |     { V{ { f V{ "" } } } V{ { f V{ } } } } member?
 | 
					
						
							| 
									
										
										
										
											2009-02-04 13:32:47 -05:00
										 |  |  |     [ bad-prolog ] unless add-child ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-29 19:25:23 -05:00
										 |  |  | : before-main? ( -- ? )
 | 
					
						
							|  |  |  |     xml-stack get { | 
					
						
							|  |  |  |         [ length 1 = ] | 
					
						
							| 
									
										
										
										
											2009-01-29 23:19:07 -05:00
										 |  |  |         [ first second [ tag? ] any? not ] | 
					
						
							| 
									
										
										
										
											2009-01-29 19:25:23 -05:00
										 |  |  |     } 1&& ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | M: directive process | 
					
						
							| 
									
										
										
										
											2009-01-29 19:25:23 -05:00
										 |  |  |     before-main? [ misplaced-directive ] unless add-child ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | M: contained process | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     [ name>> ] [ attrs>> ] bi
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     <contained-tag> add-child ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: opener process push-xml ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : check-closer ( name opener -- name opener )
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |     dup [ unopened ] unless
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     2dup name>> =
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |     [ name>> swap mismatched ] unless ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | M: closer process | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     name>> pop-xml first2
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |     [ check-closer attrs>> ] dip
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     <tag> add-child ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : init-xml-stack ( -- )
 | 
					
						
							| 
									
										
										
										
											2009-01-20 16:37:21 -05:00
										 |  |  |     V{ } clone xml-stack set
 | 
					
						
							|  |  |  |     f push-xml ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : default-prolog ( -- prolog )
 | 
					
						
							| 
									
										
										
										
											2008-03-25 22:45:26 -04:00
										 |  |  |     "1.0" "UTF-8" f <prolog> ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : init-xml ( -- )
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  |     init-ns-stack | 
					
						
							|  |  |  |     extra-entities [ H{ } assoc-like ] change ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : assert-blanks ( seq pre? -- )
 | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |     swap [ string? ] filter
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     [ | 
					
						
							|  |  |  |         dup [ blank? ] all?
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |         [ drop ] [ swap pre/post-content ] if
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     ] each drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : no-pre/post ( pre post -- pre post/* )
 | 
					
						
							|  |  |  |     ! this does *not* affect the contents of the stack | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |     [ dup t assert-blanks ] [ dup f assert-blanks ] bi* ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : no-post-tags ( post -- post/* )
 | 
					
						
							|  |  |  |     ! this does *not* affect the contents of the stack | 
					
						
							| 
									
										
										
										
											2009-01-29 23:19:07 -05:00
										 |  |  |     dup [ tag? ] any? [ multitags ] when ;  | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : assure-tags ( seq -- seq )
 | 
					
						
							|  |  |  |     ! this does *not* affect the contents of the stack | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  |     [ notags ] unless* ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-02-04 13:32:47 -05:00
										 |  |  | : ?first ( seq -- elt/f ) 0 swap ?nth ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  | : get-prolog ( seq -- prolog )
 | 
					
						
							| 
									
										
										
										
											2009-02-04 13:32:47 -05:00
										 |  |  |     { "" } ?head drop
 | 
					
						
							|  |  |  |     ?first dup prolog? | 
					
						
							|  |  |  |     [ drop default-prolog ] unless ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : cut-prolog ( seq -- newseq )
 | 
					
						
							|  |  |  |     [ [ prolog? not ] [ "" = not ] bi and ] filter ;
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : make-xml-doc ( seq -- xml-doc )
 | 
					
						
							|  |  |  |     [ get-prolog ] keep
 | 
					
						
							| 
									
										
										
										
											2009-02-04 13:32:47 -05:00
										 |  |  |     dup [ tag? ] find [ | 
					
						
							|  |  |  |         assure-tags cut
 | 
					
						
							|  |  |  |         [ cut-prolog ] [ rest ] bi*
 | 
					
						
							|  |  |  |         no-pre/post no-post-tags | 
					
						
							|  |  |  |     ] dip swap <xml> ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! * Views of XML | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: text-now? | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | TUPLE: pull-xml scope ;
 | 
					
						
							|  |  |  | : <pull-xml> ( -- pull-xml )
 | 
					
						
							|  |  |  |     [ | 
					
						
							| 
									
										
										
										
											2009-07-22 19:49:57 -04:00
										 |  |  |         init-parser | 
					
						
							| 
									
										
										
										
											2008-05-05 03:19:25 -04:00
										 |  |  |         input-stream [ ] change ! bring var in this scope | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  |         init-xml text-now? on
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     ] H{ } make-assoc
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     pull-xml boa ;
 | 
					
						
							| 
									
										
										
										
											2009-01-15 01:11:23 -05:00
										 |  |  | ! pull-xml needs to call start-document somewhere | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : pull-event ( pull -- xml-event/f )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     scope>> [ | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |         text-now? get [ parse-text f ] [ | 
					
						
							|  |  |  |             get-char [ make-tag t ] [ f f ] if
 | 
					
						
							|  |  |  |         ] if text-now? set
 | 
					
						
							|  |  |  |     ] bind ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | : done? ( -- ? )
 | 
					
						
							|  |  |  |     xml-stack get length 1 = ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (pull-elem) ( pull -- xml-elem/f )
 | 
					
						
							|  |  |  |     dup pull-event dup closer? done? and [ nip ] [ | 
					
						
							|  |  |  |         process done? | 
					
						
							|  |  |  |         [ drop xml-stack get first second ] | 
					
						
							|  |  |  |         [ (pull-elem) ] if
 | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | : pull-elem ( pull -- xml-elem/f )
 | 
					
						
							|  |  |  |     [ init-xml-stack (pull-elem) ] with-scope ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | : call-under ( quot object -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-05-10 16:28:22 -04:00
										 |  |  |     swap [ call ] keep ; inline
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | : xml-loop ( quot: ( xml-elem -- ) -- )
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     parse-text call-under | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  |     get-char [ make-tag call-under xml-loop ] | 
					
						
							| 
									
										
										
										
											2008-08-28 23:28:01 -04:00
										 |  |  |     [ drop ] if ; inline recursive
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  | : read-seq ( stream quot n -- seq )
 | 
					
						
							|  |  |  |     rot [ | 
					
						
							|  |  |  |         depth set
 | 
					
						
							|  |  |  |         init-xml init-xml-stack | 
					
						
							|  |  |  |         call
 | 
					
						
							|  |  |  |         [ process ] xml-loop | 
					
						
							|  |  |  |         done? [ unclosed ] unless
 | 
					
						
							|  |  |  |         xml-stack get first second
 | 
					
						
							|  |  |  |     ] with-state ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-16 19:28:15 -04:00
										 |  |  | : make-xml ( stream quot -- xml )
 | 
					
						
							|  |  |  |     0 read-seq make-xml-doc ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : each-element ( stream quot: ( xml-elem -- ) -- )
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  |     swap [ | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  |         init-xml | 
					
						
							| 
									
										
										
										
											2009-01-15 23:20:24 -05:00
										 |  |  |         start-document [ call-under ] when*
 | 
					
						
							| 
									
										
										
										
											2009-01-27 15:15:00 -05:00
										 |  |  |         xml-loop | 
					
						
							|  |  |  |     ] with-state ; inline
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : read-xml ( stream -- xml )
 | 
					
						
							| 
									
										
										
										
											2009-03-16 19:28:15 -04:00
										 |  |  |     dup stream-element-type { | 
					
						
							|  |  |  |         { +character+ [ [ check ] make-xml ] } | 
					
						
							|  |  |  |         { +byte+ [ [ start-document [ process ] when* ] make-xml ] } | 
					
						
							|  |  |  |     } case ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  | : read-xml-chunk ( stream -- seq )
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  |     [ check ] 1 read-seq <xml-chunk> ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : string>xml ( string -- xml )
 | 
					
						
							| 
									
										
										
										
											2009-03-16 19:28:15 -04:00
										 |  |  |     <string-reader> read-xml ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  | : string>xml-chunk ( string -- xml )
 | 
					
						
							| 
									
										
										
										
											2009-01-29 17:57:13 -05:00
										 |  |  |     <string-reader> read-xml-chunk ;
 | 
					
						
							| 
									
										
										
										
											2008-12-02 20:59:16 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | : file>xml ( filename -- xml )
 | 
					
						
							| 
									
										
										
										
											2009-01-15 01:11:23 -05:00
										 |  |  |     binary <file-reader> read-xml ;
 | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-02-13 16:10:34 -05:00
										 |  |  | : bytes>xml ( byte-array -- xml )
 | 
					
						
							|  |  |  |     binary <byte-reader> read-xml ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 |  |  | : read-dtd ( stream -- dtd )
 | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  |     [ | 
					
						
							|  |  |  |         H{ } clone extra-entities set
 | 
					
						
							| 
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 |  |  |         take-internal-subset | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  |     ] with-state ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 |  |  | : file>dtd ( filename -- dtd )
 | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  |     utf8 <file-reader> read-dtd ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 |  |  | : string>dtd ( string -- dtd )
 | 
					
						
							| 
									
										
										
										
											2009-01-22 17:31:22 -05:00
										 |  |  |     <string-reader> read-dtd ;
 |