! Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: io io.streams.string io.files kernel math namespaces prettyprint sequences arrays generic strings vectors xml.char-classes xml.data xml.errors xml.tokenize xml.writer xml.utilities state-parser assocs ascii io.encodings.utf8 ; IN: xml ! -- Overall parser with data tree ! A stack of { tag children } pairs SYMBOL: xml-stack : ( -- unclosed ) xml-stack get 1 tail-slice [ first opener-name ] map { set-unclosed-tags } unclosed construct ; : add-child ( object -- ) xml-stack get peek second push ; : push-xml ( object -- ) V{ } clone 2array xml-stack get push ; : pop-xml ( -- object ) xml-stack get pop ; GENERIC: process ( object -- ) M: object process add-child ; M: prolog process xml-stack get V{ { f V{ "" } } } = [ throw ] unless drop ; M: instruction process xml-stack get length 1 = [ throw ] unless add-child ; M: directive process xml-stack get dup length 1 = swap first second [ tag? ] contains? not and [ throw ] unless add-child ; M: contained process [ contained-name ] keep contained-attrs add-child ; M: opener process push-xml ; : check-closer ( name opener -- name opener ) dup [ throw ] unless 2dup opener-name = [ opener-name swap throw ] unless ; M: closer process closer-name pop-xml first2 >r check-closer opener-attrs r> add-child ; : init-xml-stack ( -- ) V{ } clone xml-stack set f push-xml ; : default-prolog ( -- prolog ) "1.0" "UTF-8" f ; : reset-prolog ( -- ) default-prolog prolog-data set ; : init-xml ( -- ) reset-prolog init-xml-stack init-ns-stack ; : assert-blanks ( seq pre? -- ) swap [ string? ] subset [ dup [ blank? ] all? [ drop ] [ swap
 throw ] if
    ] each drop ;

: no-pre/post ( pre post -- pre post/* )
    ! this does *not* affect the contents of the stack
    >r dup t assert-blanks r>
    dup f assert-blanks ;

: no-post-tags ( post -- post/* )
    ! this does *not* affect the contents of the stack
    dup [ tag? ] contains? [  throw ] when ; 

: assure-tags ( seq -- seq )
    ! this does *not* affect the contents of the stack
    [  throw ] unless* ;

: make-xml-doc ( prolog seq -- xml-doc )
    dup [ tag? ] find
    >r assure-tags cut 1 tail
    no-pre/post no-post-tags
    r> swap  ;

! * Views of XML

SYMBOL: text-now?

TUPLE: pull-xml scope ;
:  ( -- pull-xml )
    [
        stdio [ ] change ! bring stdio var in this scope
        init-parser reset-prolog init-ns-stack
        text-now? on
    ] H{ } make-assoc
    { set-pull-xml-scope } pull-xml construct ;

: pull-event ( pull -- xml-event/f )
    pull-xml-scope [
        text-now? get [ parse-text f ] [
            get-char [ make-tag t ] [ f f ] if
        ] if text-now? set
    ] bind ;

: done? ( -- ? )
    xml-stack get length 1 = ;

: (pull-elem) ( pull -- xml-elem/f )
    dup pull-event dup closer? done? and [ nip ] [
        process done?
        [ drop xml-stack get first second ]
        [ (pull-elem) ] if
    ] if ;

: pull-elem ( pull -- xml-elem/f )
    [ init-xml-stack (pull-elem) ] with-scope ;

: call-under ( quot object -- quot )
    swap dup slip ; inline

: sax-loop ( quot -- ) ! quot: xml-elem --
    parse-text call-under
    get-char [ make-tag call-under sax-loop ]
    [ drop ] if ; inline

: sax ( stream quot -- ) ! quot: xml-elem --
    swap [
        reset-prolog init-ns-stack
        prolog-data get call-under
        sax-loop
    ] state-parse ; inline

: (read-xml) ( -- )
    [ process ] sax-loop ; inline

: (xml-chunk) ( stream -- prolog seq )
    [
        init-xml (read-xml)
        done? [  throw ] unless
        xml-stack get first second
        prolog-data get swap
    ] state-parse ;

: read-xml ( stream -- xml )
    #! Produces a tree of XML nodes
    (xml-chunk) make-xml-doc ;

: xml-chunk ( stream -- seq )
    (xml-chunk) nip ;

: string>xml ( string -- xml )
     read-xml ;

: file>xml ( filename -- xml )
    ! Autodetect encoding!
    utf8  read-xml ;

: xml-reprint ( string -- )
    string>xml print-xml ;