| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | ! Copyright (C) 2005, 2006 Daniel Ehrenberg | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2008-06-30 04:57:00 -04:00
										 |  |  | USING: accessors kernel namespaces sequences words io assocs | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | quotations strings parser lexer arrays xml.data xml.writer debugger | 
					
						
							| 
									
										
										
										
											2008-06-27 02:30:23 -04:00
										 |  |  | splitting vectors sequences.deep combinators ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | IN: xml.utilities | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! * System for words specialized on tag names | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TUPLE: process-missing process tag ;
 | 
					
						
							|  |  |  | M: process-missing error. | 
					
						
							|  |  |  |     "Tag <" write
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     dup tag>> print-name | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  |     "> not implemented on process process " write
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     name>> print ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : run-process ( tag word -- )
 | 
					
						
							|  |  |  |     2dup "xtable" word-prop | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     >r dup main>> r> at* [ 2nip call ] [ | 
					
						
							| 
									
										
										
										
											2008-04-13 16:06:27 -04:00
										 |  |  |         drop \ process-missing boa throw
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : PROCESS: | 
					
						
							|  |  |  |     CREATE | 
					
						
							|  |  |  |     dup H{ } clone "xtable" set-word-prop | 
					
						
							| 
									
										
										
										
											2008-01-10 22:03:34 -05:00
										 |  |  |     dup [ run-process ] curry define ; parsing | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : TAG: | 
					
						
							|  |  |  |     scan scan-word | 
					
						
							|  |  |  |     parse-definition | 
					
						
							|  |  |  |     swap "xtable" word-prop | 
					
						
							|  |  |  |     rot "/" split [ >r 2dup r> swap set-at ] each 2drop ;
 | 
					
						
							|  |  |  |     parsing | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! * Common utility functions | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : build-tag* ( items name -- tag )
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  |     assure-name swap >r f r> <tag> ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : build-tag ( item name -- tag )
 | 
					
						
							|  |  |  |     >r 1array r> build-tag* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | : standard-prolog ( -- prolog )
 | 
					
						
							| 
									
										
										
										
											2008-03-25 22:45:26 -04:00
										 |  |  |     T{ prolog f "1.0" "UTF-8" f } ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | : build-xml ( tag -- xml )
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  |     standard-prolog { } rot { } <xml> ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : children>string ( tag -- string )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     children>> { | 
					
						
							| 
									
										
										
										
											2008-06-27 02:30:23 -04:00
										 |  |  |         { [ dup empty? ] [ drop "" ] } | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |         { [ dup [ string? not ] contains? ] | 
					
						
							|  |  |  |           [ "XML tag unexpectedly contains non-text children" throw ] } | 
					
						
							| 
									
										
										
										
											2008-06-27 02:30:23 -04:00
										 |  |  |         [ concat ] | 
					
						
							|  |  |  |     } cond ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : children-tags ( tag -- sequence )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     children>> [ tag? ] filter ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : first-child-tag ( tag -- tag )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     children>> [ tag? ] find nip ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! * Accessing part of an XML document | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | ! for tag- words, a start means that it searches all children | 
					
						
							|  |  |  | ! and no star searches only direct children | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | : tag-named? ( name elem -- ? )
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  |     dup tag? [ names-match? ] [ 2drop f ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | : tags@ ( tag name -- children name )
 | 
					
						
							|  |  |  |     >r { } like r> assure-name ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : deep-tag-named ( tag name/string -- matching-tag )
 | 
					
						
							|  |  |  |     assure-name [ swap tag-named? ] curry deep-find ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | : deep-tags-named ( tag name/string -- tags-seq )
 | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |     tags@ [ swap tag-named? ] curry deep-filter ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : tag-named ( tag name/string -- matching-tag )
 | 
					
						
							|  |  |  |     ! like get-name-tag but only looks at direct children, | 
					
						
							|  |  |  |     ! not all the children down the tree. | 
					
						
							| 
									
										
										
										
											2008-01-09 17:36:30 -05:00
										 |  |  |     assure-name swap [ tag-named? ] with find nip ;
 | 
					
						
							| 
									
										
										
										
											2007-12-29 11:36:20 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : tags-named ( tag name/string -- tags-seq )
 | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |     tags@ swap [ tag-named? ] with filter ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : tag-with-attr? ( elem attr-value attr-name -- ? )
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  |     rot dup tag? [ at = ] [ 3drop f ] if ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : tag-with-attr ( tag attr-value attr-name -- matching-tag )
 | 
					
						
							|  |  |  |     assure-name [ tag-with-attr? ] 2curry find nip ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : tags-with-attr ( tag attr-value attr-name -- tags-seq )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     tags@ [ tag-with-attr? ] 2curry filter children>> ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | : deep-tag-with-attr ( tag attr-value attr-name -- matching-tag )
 | 
					
						
							|  |  |  |     assure-name [ tag-with-attr? ] 2curry deep-find ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | : deep-tags-with-attr ( tag attr-value attr-name -- tags-seq )
 | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |     tags@ [ tag-with-attr? ] 2curry deep-filter ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : get-id ( tag id -- elem ) ! elem=tag.getElementById(id) | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  |     "id" deep-tag-with-attr ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : deep-tags-named-with-attr ( tag tag-name attr-value attr-name -- tags )
 | 
					
						
							|  |  |  |     >r >r deep-tags-named r> r> tags-with-attr ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : assert-tag ( name name -- )
 | 
					
						
							|  |  |  |     names-match? [ "Unexpected XML tag found" throw ] unless ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 12:30:23 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | : insert-children ( children tag -- )
 | 
					
						
							| 
									
										
										
										
											2008-08-27 18:02:54 -04:00
										 |  |  |     dup children>> [ push-all ] | 
					
						
							|  |  |  |     [ swap V{ } like >>children drop ] if ;
 | 
					
						
							| 
									
										
										
										
											2008-01-06 13:17:50 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : insert-child ( child tag -- )
 | 
					
						
							|  |  |  |     >r 1vector r> insert-children ;
 |