2009-01-21 19:16:51 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2005, 2009 Daniel Ehrenberg
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 19:16:51 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								USING: namespaces xml.state kernel sequences accessors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xml.char-classes xml.errors math io sbufs fry strings ascii
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								xml.entities assocs splitting math.parser
							 | 
						
					
						
							
								
									
										
										
										
											2012-07-12 22:06:37 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								locals combinators combinators.short-circuit arrays hints ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: xml.tokenize
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! * Basic utility words
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: assure-good-char ( spot ch -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [
							 | 
						
					
						
							
								
									
										
										
										
											2012-07-12 22:06:37 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        over {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ version-1.0?>> over text? not ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ check>> ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        } 1&&
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ [ 1 + ] change-column drop ] dip
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            disallowed-char
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        ] [ 2drop ] if
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] [ drop ] if* ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								HINTS: assure-good-char { spot fixnum } ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: record ( spot char -- spot )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    over char>> [
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        CHAR: \n eq?
							 | 
						
					
						
							
								
									
										
										
										
											2009-08-13 20:21:44 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ [ 1 + ] change-line -1 ] [ dup column>> 1 + ] if
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        >>column
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] [ drop ] if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								HINTS: record { spot fixnum } ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: (next) ( spot -- spot char )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    spot next>> :> old-next
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    spot stream>> stream-read1 :> new-next
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    old-next CHAR: \r eq? [
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        spot CHAR: \n >>char
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        new-next CHAR: \n eq?
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ spot stream>> stream-read1 >>next ]
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ new-next >>next ] if
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] [ spot old-next >>char new-next >>next ] if
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    spot next>> ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: next* ( spot -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup char>> [ unexpected-end ] unless
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (next) [ record ] keep assure-good-char ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								HINTS: next* { spot } ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: next ( -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    spot get next* ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 19:16:51 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: init-parser ( -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    0 1 0 0 f t f <spot>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        input-stream get >>stream
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        read1 >>next
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    spot set next ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 19:16:51 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: with-state ( stream quot -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ! with-input-stream implicitly creates a new scope which we use
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap [ init-parser call ] with-input-stream ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: (skip-until) ( ... quot: ( ... char -- ... ? ) spot -- ... )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    spot char>> [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        quot call [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            spot next* quot spot (skip-until)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] unless
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] when* ; inline recursive
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: skip-until ( ... quot: ( ... char -- ... ? ) -- ... )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    spot get (skip-until) ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: take-until ( ... quot: ( ... char -- ... ? ) -- ... string )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    #! Take the substring of a string starting at spot
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    #! from code until the quotation given is true and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    #! advance spot to after the substring.
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								   10 <sbuf> [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       '[ _ keep over [ drop ] [ _ push ] if ] skip-until
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								   ] keep "" like ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-25 22:06:45 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: take-to ( seq -- string )
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    '[ _ member? ] take-until ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: pass-blank ( -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    #! Advance code past any whitespace, including newlines
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ blank? not ] skip-until ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-10-16 16:01:58 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: next-matching ( pos ch str -- pos' )
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ over ] dip nth eq? [ 1 + ] [ drop 0 ] if ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2011-10-16 16:01:58 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: string-matcher ( str -- quot: ( pos char -- pos ? ) )
							 | 
						
					
						
							
								
									
										
										
										
											2011-10-16 16:01:58 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    dup length 1 - '[ _ next-matching dup _ > ] ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-07 00:12:57 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: (take-string) ( match spot -- sbuf matched? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    10 <sbuf> f [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        spot char>> [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            nip over push
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            spot next*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            dup match tail? dup not
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] [ f ] if*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] loop ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: take-string ( match -- string )
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-07 00:12:57 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ spot get (take-string) [ missing-close ] unless ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ dupd [ length ] bi@ - over shorten "" like ] bi ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: expect ( string -- )
							 | 
						
					
						
							
								
									
										
										
										
											2010-01-14 10:10:13 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    2dup = [ 2drop ] [ expected ] if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-25 22:06:45 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Suddenly XML-specific
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-named-entity ( accum string -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup entities at [ swap push ] [
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 23:20:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        dup extra-entities get at
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ swap push-all ] [ no-entity ] ?if
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] ?if ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-25 22:06:45 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: take-; ( -- string )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    next ";" take-to next ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-entity ( accum -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-25 22:06:45 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    take-; "#" ?head [
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        "x" ?head 16 10 ? base> swap push
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 23:20:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] [ parse-named-entity ] if ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-pe ( accum -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-25 22:06:45 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    take-; dup pe-table get at
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 22:41:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ swap push-all ] [ no-entity ] ?if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: (parse-char) ( quot: ( ch -- ? ) accum spot -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    spot char>> :> char
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ char not ] [ ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ char quot call ] [ spot next* ] }
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ char CHAR: & eq? ] [
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            accum parse-entity
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            quot accum spot (parse-char)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							
								
									
										
										
										
											2012-09-11 17:43:19 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ char CHAR: % eq? [ in-dtd? get ] [ f ] if ] [
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:17:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            accum parse-pe
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            quot accum spot (parse-char)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            char accum push
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            spot next*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            quot accum spot (parse-char)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ]
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    } cond ; inline recursive
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: parse-char ( quot: ( ch -- ? ) -- seq )
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    512 <sbuf> [ spot get (parse-char) ] keep "" like ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: assure-no-]]> ( pos char -- pos' )
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "]]>" next-matching dup 2 > [ text-w/]]> ] when ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-20 16:37:21 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: parse-text ( -- string )
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    depth get zero? :> no-text
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    0 :> pos!
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [| char |
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pos char assure-no-]]> pos!
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        no-text [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            char blank? char CHAR: < eq? or [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                char 1string t pre/post-content
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            ] unless
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] when
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        char CHAR: < eq?
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] parse-char ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 19:16:51 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: close ( -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-23 16:29:28 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass-blank ">" expect ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: normalize-quote ( str -- str )
							 | 
						
					
						
							
								
									
										
										
										
											2012-08-24 18:16:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map! ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: (parse-quote) ( <-disallowed? ch -- string )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap '[
							 | 
						
					
						
							
								
									
										
										
										
											2011-09-30 15:47:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        dup _ eq? [ drop t ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ CHAR: < eq? _ and [ attr-w/< ] [ f ] if ] if
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-21 00:54:33 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] parse-char normalize-quote get-char
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ unclosed-quote ] unless ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 23:20:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-quote* ( <-disallowed? -- seq )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 23:20:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass-blank get-char dup "'\"" member?
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-19 23:25:15 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ next (parse-quote) ] [ quoteless-attr ] if ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: parse-quote ( -- seq )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   f parse-quote* ;
							 |