factor/basis/xml/xml.factor

212 lines
4.9 KiB
Factor
Raw Normal View History

2009-01-21 19:16:51 -05:00
! Copyright (C) 2005, 2009 Daniel Ehrenberg
2007-09-20 18:09:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
2017-01-22 17:47:58 -05:00
USING: accessors arrays ascii assocs combinators
combinators.short-circuit io io.encodings.binary
io.encodings.utf8 io.files io.streams.byte-array
io.streams.string kernel namespaces sequences splitting strings
xml.autoencoding xml.data xml.elements xml.errors xml.name
xml.state xml.tokenize ;
2007-09-20 18:09:08 -04:00
IN: xml
<PRIVATE
2007-09-20 18:09:08 -04:00
: add-child ( object -- )
xml-stack get last second push ;
2007-09-20 18:09:08 -04:00
: push-xml ( object -- )
V{ } clone 2array xml-stack get push ;
: pop-xml ( -- object )
xml-stack get pop ;
GENERIC: process ( object -- )
M: object process add-child ;
M: prolog process
2009-01-29 19:25:23 -05:00
xml-stack get
{ V{ { f V{ "" } } } V{ { f V{ } } } } member?
[ bad-prolog ] unless add-child ;
2007-09-20 18:09:08 -04:00
2009-01-29 19:25:23 -05:00
: before-main? ( -- ? )
xml-stack get {
[ length 1 = ]
2018-02-14 14:56:31 -05:00
[ first second [ tag? ] none? ]
2009-01-29 19:25:23 -05:00
} 1&& ;
2007-09-20 18:09:08 -04:00
M: directive process
2009-01-29 19:25:23 -05:00
before-main? [ misplaced-directive ] unless add-child ;
2007-09-20 18:09:08 -04:00
M: contained process
2008-08-27 18:02:54 -04:00
[ name>> ] [ attrs>> ] bi
2007-09-20 18:09:08 -04:00
<contained-tag> add-child ;
M: opener process push-xml ;
: check-closer ( name opener -- name opener )
dup [ unopened ] unless
2008-08-27 18:02:54 -04:00
2dup name>> =
[ name>> swap mismatched ] unless ;
2007-09-20 18:09:08 -04:00
M: closer process
2008-08-27 18:02:54 -04:00
name>> pop-xml first2
[ check-closer attrs>> ] dip
2007-09-20 18:09:08 -04:00
<tag> add-child ;
: init-xml-stack ( -- )
V{ } clone xml-stack namespaces:set
2009-01-20 16:37:21 -05:00
f push-xml ;
2007-09-20 18:09:08 -04:00
: default-prolog ( -- prolog )
2008-03-25 22:45:26 -04:00
"1.0" "UTF-8" f <prolog> ;
2007-09-20 18:09:08 -04:00
: init-xml ( -- )
2009-01-29 17:57:13 -05:00
init-ns-stack
extra-entities [ H{ } assoc-like ] change ;
2007-09-20 18:09:08 -04:00
: assert-blanks ( seq pre? -- )
swap [ string? ] filter
2007-09-20 18:09:08 -04:00
[
dup [ blank? ] all?
[ drop ] [ swap pre/post-content ] if
2007-09-20 18:09:08 -04:00
] each drop ;
: no-pre/post ( pre post -- pre post/* )
! this does *not* affect the contents of the stack
[ dup t assert-blanks ] [ dup f assert-blanks ] bi* ;
2007-09-20 18:09:08 -04:00
: no-post-tags ( post -- post/* )
! this does *not* affect the contents of the stack
dup [ tag? ] any? [ multitags ] when ;
2007-09-20 18:09:08 -04:00
: assure-tags ( seq -- seq )
! this does *not* affect the contents of the stack
[ notags ] unless* ;
2007-09-20 18:09:08 -04:00
2009-01-29 17:57:13 -05:00
: get-prolog ( seq -- prolog )
{ "" } ?head drop
?first dup prolog?
[ drop default-prolog ] unless ;
: cut-prolog ( seq -- newseq )
[ { [ prolog? not ] [ "" = not ] } 1&& ] filter ;
2009-01-29 17:57:13 -05:00
: make-xml-doc ( seq -- xml-doc )
[ get-prolog ] keep
dup [ tag? ] find [
assure-tags cut
[ cut-prolog ] [ rest ] bi*
no-pre/post no-post-tags
] dip swap <xml> ;
2007-09-20 18:09:08 -04:00
! * Views of XML
SYMBOL: text-now?
: collect-variables ( -- hash )
{
input-stream
extra-entities
spot
ns-stack
text-now?
} [ dup get ] H{ } map>assoc ;
PRIVATE>
2007-09-20 18:09:08 -04:00
TUPLE: pull-xml scope ;
: <pull-xml> ( -- pull-xml )
[
init-parser init-xml text-now? on collect-variables
] with-scope pull-xml boa ;
2009-01-15 01:11:23 -05:00
! pull-xml needs to call start-document somewhere
2007-09-20 18:09:08 -04:00
: pull-event ( pull -- xml-event/f )
2008-08-27 18:02:54 -04:00
scope>> [
2007-09-20 18:09:08 -04:00
text-now? get [ parse-text f ] [
get-char [ make-tag t ] [ f f ] if
] if text-now? namespaces:set
] with-variables ;
2007-09-20 18:09:08 -04:00
<PRIVATE
2007-09-20 18:09:08 -04:00
: done? ( -- ? )
xml-stack get length 1 = ;
: (pull-elem) ( pull -- xml-elem/f )
dup pull-event dup closer? done? and [ nip ] [
process done?
[ drop xml-stack get first second ]
[ (pull-elem) ] if
] if ;
PRIVATE>
2007-09-20 18:09:08 -04:00
: pull-elem ( pull -- xml-elem/f )
[ init-xml-stack (pull-elem) ] with-scope ;
<PRIVATE
2007-09-20 18:09:08 -04:00
: call-under ( quot object -- quot )
2009-05-10 16:28:22 -04:00
swap [ call ] keep ; inline
2007-09-20 18:09:08 -04:00
: xml-loop ( quot: ( xml-elem -- ) -- )
parse-text call-under get-char
[ make-tag call-under xml-loop ]
2008-08-28 23:28:01 -04:00
[ drop ] if ; inline recursive
2007-09-20 18:09:08 -04:00
2009-01-29 17:57:13 -05:00
: read-seq ( stream quot n -- seq )
rot [
depth namespaces:set
2009-01-29 17:57:13 -05:00
init-xml init-xml-stack
call
[ process ] xml-loop
2013-03-24 02:08:15 -04:00
done? [ throw-unclosed ] unless
2009-01-29 17:57:13 -05:00
xml-stack get first second
] with-state ; inline
: make-xml ( stream quot -- xml )
0 read-seq make-xml-doc ; inline
PRIVATE>
: each-element ( stream quot: ( xml-elem -- ) -- )
2007-09-20 18:09:08 -04:00
swap [
2009-01-29 17:57:13 -05:00
init-xml
2009-01-15 23:20:24 -05:00
start-document [ call-under ] when*
xml-loop
] with-state ; inline
2007-09-20 18:09:08 -04:00
: read-xml ( stream -- xml )
dup stream-element-type {
{ +character+ [ [ check ] make-xml ] }
{ +byte+ [ [ start-document [ process ] when* ] make-xml ] }
} case ;
2007-09-20 18:09:08 -04:00
: read-xml-chunk ( stream -- seq )
2009-01-29 17:57:13 -05:00
[ check ] 1 read-seq <xml-chunk> ;
2007-09-20 18:09:08 -04:00
: string>xml ( string -- xml )
<string-reader> read-xml ;
2007-09-20 18:09:08 -04:00
: string>xml-chunk ( string -- xml )
2009-01-29 17:57:13 -05:00
<string-reader> read-xml-chunk ;
2007-09-20 18:09:08 -04:00
: file>xml ( filename -- xml )
2009-01-15 01:11:23 -05:00
binary <file-reader> read-xml ;
2009-01-22 17:31:22 -05:00
: bytes>xml ( byte-array -- xml )
binary <byte-reader> read-xml ;
: read-dtd ( stream -- dtd )
2009-01-22 17:31:22 -05:00
[
H{ } clone extra-entities namespaces:set
take-internal-subset
2009-01-22 17:31:22 -05:00
] with-state ;
: file>dtd ( filename -- dtd )
2009-01-22 17:31:22 -05:00
utf8 <file-reader> read-dtd ;
: string>dtd ( string -- dtd )
2009-01-22 17:31:22 -05:00
<string-reader> read-dtd ;