2009-01-21 19:16:51 -05:00
|
|
|
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
2007-09-20 18:09:08 -04:00
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2009-01-15 01:11:23 -05:00
|
|
|
USING: accessors arrays io io.encodings.binary io.files
|
2009-01-22 17:31:22 -05:00
|
|
|
io.streams.string kernel namespaces sequences strings io.encodings.utf8
|
2009-01-22 18:19:02 -05:00
|
|
|
xml.data xml.errors xml.elements ascii xml.entities
|
2009-01-29 19:25:23 -05:00
|
|
|
xml.writer xml.state xml.autoencoding assocs xml.tokenize
|
|
|
|
combinators.short-circuit xml.name ;
|
2007-09-20 18:09:08 -04:00
|
|
|
IN: xml
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
<PRIVATE
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: add-child ( object -- )
|
|
|
|
xml-stack get peek second push ;
|
|
|
|
|
|
|
|
: push-xml ( object -- )
|
|
|
|
V{ } clone 2array xml-stack get push ;
|
|
|
|
|
|
|
|
: pop-xml ( -- object )
|
|
|
|
xml-stack get pop ;
|
|
|
|
|
|
|
|
GENERIC: process ( object -- )
|
|
|
|
|
|
|
|
M: object process add-child ;
|
|
|
|
|
|
|
|
M: prolog process
|
2009-01-29 19:25:23 -05:00
|
|
|
xml-stack get
|
|
|
|
{ V{ { f V{ "" } } } V{ { f V{ } } } } member?
|
2008-12-02 20:59:16 -05:00
|
|
|
[ bad-prolog ] unless drop ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2009-01-29 19:25:23 -05:00
|
|
|
: before-main? ( -- ? )
|
|
|
|
xml-stack get {
|
|
|
|
[ length 1 = ]
|
|
|
|
[ first second [ tag? ] contains? not ]
|
|
|
|
} 1&& ;
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
M: directive process
|
2009-01-29 19:25:23 -05:00
|
|
|
before-main? [ misplaced-directive ] unless add-child ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
M: contained process
|
2008-08-27 18:02:54 -04:00
|
|
|
[ name>> ] [ attrs>> ] bi
|
2007-09-20 18:09:08 -04:00
|
|
|
<contained-tag> add-child ;
|
|
|
|
|
|
|
|
M: opener process push-xml ;
|
|
|
|
|
|
|
|
: check-closer ( name opener -- name opener )
|
2008-12-02 20:59:16 -05:00
|
|
|
dup [ unopened ] unless
|
2008-08-27 18:02:54 -04:00
|
|
|
2dup name>> =
|
2008-12-02 20:59:16 -05:00
|
|
|
[ name>> swap mismatched ] unless ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
M: closer process
|
2008-08-27 18:02:54 -04:00
|
|
|
name>> pop-xml first2
|
2008-12-02 20:59:16 -05:00
|
|
|
[ check-closer attrs>> ] dip
|
2007-09-20 18:09:08 -04:00
|
|
|
<tag> add-child ;
|
|
|
|
|
|
|
|
: init-xml-stack ( -- )
|
2009-01-20 16:37:21 -05:00
|
|
|
V{ } clone xml-stack set
|
|
|
|
f push-xml ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: default-prolog ( -- prolog )
|
2008-03-25 22:45:26 -04:00
|
|
|
"1.0" "UTF-8" f <prolog> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: init-xml ( -- )
|
2009-01-29 17:57:13 -05:00
|
|
|
init-ns-stack
|
|
|
|
extra-entities [ H{ } assoc-like ] change ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: assert-blanks ( seq pre? -- )
|
2008-04-26 00:17:08 -04:00
|
|
|
swap [ string? ] filter
|
2007-09-20 18:09:08 -04:00
|
|
|
[
|
|
|
|
dup [ blank? ] all?
|
2008-12-02 20:59:16 -05:00
|
|
|
[ drop ] [ swap pre/post-content ] if
|
2007-09-20 18:09:08 -04:00
|
|
|
] each drop ;
|
|
|
|
|
|
|
|
: no-pre/post ( pre post -- pre post/* )
|
|
|
|
! this does *not* affect the contents of the stack
|
2008-12-02 20:59:16 -05:00
|
|
|
[ dup t assert-blanks ] [ dup f assert-blanks ] bi* ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: no-post-tags ( post -- post/* )
|
|
|
|
! this does *not* affect the contents of the stack
|
2008-12-02 20:59:16 -05:00
|
|
|
dup [ tag? ] contains? [ multitags ] when ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: assure-tags ( seq -- seq )
|
|
|
|
! this does *not* affect the contents of the stack
|
2008-12-02 20:59:16 -05:00
|
|
|
[ notags ] unless* ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2009-01-29 17:57:13 -05:00
|
|
|
: get-prolog ( seq -- prolog )
|
|
|
|
first dup prolog? [ drop default-prolog ] unless ;
|
|
|
|
|
|
|
|
: make-xml-doc ( seq -- xml-doc )
|
|
|
|
[ get-prolog ] keep
|
2007-09-20 18:09:08 -04:00
|
|
|
dup [ tag? ] find
|
2008-12-02 20:59:16 -05:00
|
|
|
[ assure-tags cut rest no-pre/post no-post-tags ] dip
|
|
|
|
swap <xml> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
! * Views of XML
|
|
|
|
|
|
|
|
SYMBOL: text-now?
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
PRIVATE>
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
TUPLE: pull-xml scope ;
|
|
|
|
: <pull-xml> ( -- pull-xml )
|
|
|
|
[
|
2008-05-05 03:19:25 -04:00
|
|
|
input-stream [ ] change ! bring var in this scope
|
2009-01-29 17:57:13 -05:00
|
|
|
init-xml text-now? on
|
2007-09-20 18:09:08 -04:00
|
|
|
] H{ } make-assoc
|
2008-08-27 18:02:54 -04:00
|
|
|
pull-xml boa ;
|
2009-01-15 01:11:23 -05:00
|
|
|
! pull-xml needs to call start-document somewhere
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: pull-event ( pull -- xml-event/f )
|
2008-08-27 18:02:54 -04:00
|
|
|
scope>> [
|
2007-09-20 18:09:08 -04:00
|
|
|
text-now? get [ parse-text f ] [
|
|
|
|
get-char [ make-tag t ] [ f f ] if
|
|
|
|
] if text-now? set
|
|
|
|
] bind ;
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
<PRIVATE
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: done? ( -- ? )
|
|
|
|
xml-stack get length 1 = ;
|
|
|
|
|
|
|
|
: (pull-elem) ( pull -- xml-elem/f )
|
|
|
|
dup pull-event dup closer? done? and [ nip ] [
|
|
|
|
process done?
|
|
|
|
[ drop xml-stack get first second ]
|
|
|
|
[ (pull-elem) ] if
|
|
|
|
] if ;
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
PRIVATE>
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: pull-elem ( pull -- xml-elem/f )
|
|
|
|
[ init-xml-stack (pull-elem) ] with-scope ;
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
<PRIVATE
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: call-under ( quot object -- quot )
|
|
|
|
swap dup slip ; inline
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
: xml-loop ( quot: ( xml-elem -- ) -- )
|
2007-09-20 18:09:08 -04:00
|
|
|
parse-text call-under
|
2009-01-27 15:15:00 -05:00
|
|
|
get-char [ make-tag call-under xml-loop ]
|
2008-08-28 23:28:01 -04:00
|
|
|
[ drop ] if ; inline recursive
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2009-01-29 17:57:13 -05:00
|
|
|
: read-seq ( stream quot n -- seq )
|
|
|
|
rot [
|
|
|
|
depth set
|
|
|
|
init-xml init-xml-stack
|
|
|
|
call
|
|
|
|
[ process ] xml-loop
|
|
|
|
done? [ unclosed ] unless
|
|
|
|
xml-stack get first second
|
|
|
|
] with-state ; inline
|
|
|
|
|
2009-01-27 15:15:00 -05:00
|
|
|
PRIVATE>
|
|
|
|
|
|
|
|
: each-element ( stream quot: ( xml-elem -- ) -- )
|
2007-09-20 18:09:08 -04:00
|
|
|
swap [
|
2009-01-29 17:57:13 -05:00
|
|
|
init-xml
|
2009-01-15 23:20:24 -05:00
|
|
|
start-document [ call-under ] when*
|
2009-01-27 15:15:00 -05:00
|
|
|
xml-loop
|
|
|
|
] with-state ; inline
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: read-xml ( stream -- xml )
|
2009-01-29 17:57:13 -05:00
|
|
|
[ start-document [ process ] when* ]
|
|
|
|
0 read-seq make-xml-doc ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-12-02 20:59:16 -05:00
|
|
|
: read-xml-chunk ( stream -- seq )
|
2009-01-29 17:57:13 -05:00
|
|
|
[ check ] 1 read-seq <xml-chunk> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: string>xml ( string -- xml )
|
2009-01-29 17:57:13 -05:00
|
|
|
<string-reader> [ check ] 0 read-seq make-xml-doc ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-12-02 20:59:16 -05:00
|
|
|
: string>xml-chunk ( string -- xml )
|
2009-01-29 17:57:13 -05:00
|
|
|
<string-reader> read-xml-chunk ;
|
2008-12-02 20:59:16 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: file>xml ( filename -- xml )
|
2009-01-15 01:11:23 -05:00
|
|
|
binary <file-reader> read-xml ;
|
2009-01-22 17:31:22 -05:00
|
|
|
|
2009-01-23 16:29:28 -05:00
|
|
|
: read-dtd ( stream -- dtd )
|
2009-01-22 17:31:22 -05:00
|
|
|
[
|
|
|
|
H{ } clone extra-entities set
|
2009-01-23 16:29:28 -05:00
|
|
|
take-internal-subset
|
2009-01-22 17:31:22 -05:00
|
|
|
] with-state ;
|
|
|
|
|
2009-01-23 16:29:28 -05:00
|
|
|
: file>dtd ( filename -- dtd )
|
2009-01-22 17:31:22 -05:00
|
|
|
utf8 <file-reader> read-dtd ;
|
|
|
|
|
2009-01-23 16:29:28 -05:00
|
|
|
: string>dtd ( string -- dtd )
|
2009-01-22 17:31:22 -05:00
|
|
|
<string-reader> read-dtd ;
|