Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2009-01-25 23:34:17 -08:00
commit 28dd81c685
23 changed files with 348 additions and 219 deletions

View File

@ -1,5 +1,5 @@
USING: syndication io kernel io.files tools.test io.encodings.utf8 USING: syndication io kernel io.files tools.test io.encodings.utf8
calendar urls ; calendar urls xml.writer ;
IN: syndication.tests IN: syndication.tests
\ download-feed must-infer \ download-feed must-infer
@ -43,3 +43,4 @@ IN: syndication.tests
} }
} }
} ] [ "resource:basis/syndication/test/atom.xml" load-news-file ] unit-test } ] [ "resource:basis/syndication/test/atom.xml" load-news-file ] unit-test
[ ] [ "resource:basis/syndication/test/atom.xml" load-news-file xml>string drop ] unit-test

View File

@ -1,10 +1,10 @@
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg. ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
! Portions copyright (C) 2008 Slava Pestov. ! Portions copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml.utilities kernel assocs xml.generator math.order USING: xml.utilities kernel assocs math.order
strings sequences xml.data xml.writer strings sequences xml.data xml.writer
io.streams.string combinators xml xml.entities.html io.files io io.streams.string combinators xml xml.entities.html io.files io
http.client namespaces make xml.generator hashtables http.client namespaces make xml.interpolate hashtables
calendar.format accessors continuations urls present ; calendar.format accessors continuations urls present ;
IN: syndication IN: syndication
@ -114,26 +114,31 @@ TUPLE: entry title url description date ;
http-get nip string>feed ; http-get nip string>feed ;
! Atom generation ! Atom generation
: simple-tag, ( content name -- )
[ , ] tag, ;
: simple-tag*, ( content name attrs -- ) : entry>xml ( entry -- xml )
[ , ] tag*, ; {
[ title>> ]
: entry, ( entry -- ) [ url>> present ]
"entry" [ [ date>> timestamp>rfc3339 ]
{ [ description>> ]
[ title>> "title" { { "type" "html" } } simple-tag*, ] } cleave
[ url>> present "href" associate "link" swap contained*, ] [XML
[ date>> timestamp>rfc3339 "published" simple-tag, ] <entry>
[ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ] <title type="html"><-></title>
} cleave <link href=<-> />
] tag, ; <published><-></published>
<content type="html"><-></content>
</entry>
XML] ;
: feed>xml ( feed -- xml ) : feed>xml ( feed -- xml )
"feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [ [ title>> ]
[ title>> "title" simple-tag, ] [ url>> present ]
[ url>> present "href" associate "link" swap contained*, ] [ entries>> [ entry>xml ] map ] tri
[ entries>> [ entry, ] each ] <XML
tri <feed xmlns="http://www.w3.org/2005/Atom">
] make-xml* ; <title><-></title>
<link href=<-> />
<->
</feed>
XML> ;

View File

@ -10,8 +10,8 @@ IN: xml.autoencoding
: start-utf16le ( -- tag ) : start-utf16le ( -- tag )
utf16le decode-input-if utf16le decode-input-if
CHAR: ? expect "?\0" expect
0 expect check instruct ; check instruct ;
: 10xxxxxx? ( ch -- ? ) : 10xxxxxx? ( ch -- ? )
-6 shift 3 bitand 2 = ; -6 shift 3 bitand 2 = ;
@ -36,10 +36,10 @@ IN: xml.autoencoding
: skip-utf8-bom ( -- tag ) : skip-utf8-bom ( -- tag )
"\u0000bb\u0000bf" expect utf8 decode-input "\u0000bb\u0000bf" expect utf8 decode-input
CHAR: < expect check make-tag ; "<" expect check make-tag ;
: decode-expecting ( encoding string -- tag ) : decode-expecting ( encoding string -- tag )
[ decode-input-if next ] [ expect-string ] bi* check make-tag ; [ decode-input-if next ] [ expect ] bi* check make-tag ;
: start-utf16be ( -- tag ) : start-utf16be ( -- tag )
utf16be "<" decode-expecting ; utf16be "<" decode-expecting ;

View File

@ -150,3 +150,52 @@ HELP: assure-name
HELP: <simple-name> HELP: <simple-name>
{ $values { "string" string } { "name" name } } { $values { "string" string } { "name" name } }
{ $description "Converts a string into an XML name with an empty prefix and URL." } ; { $description "Converts a string into an XML name with an empty prefix and URL." } ;
HELP: element-decl
{ $class-description "Describes the class of element declarations, like <!ELEMENT greeting (#PCDATA)>." } ;
HELP: <element-decl>
{ $values { "name" name } { "content-spec" string } { "element-decl" entity-decl } }
{ $description "Creates an element declaration object, of the class " { $link element-decl } } ;
HELP: attlist-decl
{ $class-description "Describes the class of element declarations, like <!ATTLIST pre xml:space (preserve) #FIXED 'preserve'>." } ;
HELP: <attlist-decl>
{ $values { "name" name } { "att-defs" string } { "attlist-decl" attlist-decl } }
{ $description "Creates an element declaration object, of the class " { $link attlist-decl } } ;
HELP: entity-decl
{ $class-description "Describes the class of element declarations, like <!ENTITY foo 'bar'>." } ;
HELP: <entity-decl>
{ $values { "name" name } { "def" string } { "pe?" "t or f" } { "entity-decl" entity-decl } }
{ $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like <!ENTITY % foo 'bar'> and f if the object is like <!ENTITY foo 'bar'>, that is, it can be used outside of the DTD." } ;
HELP: system-id
{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as <!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } ;
HELP: <system-id>
{ $values { "system-literal" string } { "system-id" system-id } }
{ $description "Constructs a " { $link system-id } " tuple." } ;
HELP: public-id
{ $class-description "Describes the class of public identifiers within an XML DTD directive, such as <!DOCTYPE open-hatch " { $emphasis "PUBLIC '-//Textuality//TEXT Standard open-hatch boilerplate//EN' 'http://www.textuality.com/boilerplate/OpenHatch.xml'" } ">" } ;
HELP: <public-id>
{ $values { "pubid-literal" string } { "system-literal" string } { "public-id" public-id } }
{ $description "Constructs a " { $link system-id } " tuple." } ;
HELP: notation-decl
{ $class-description "Describes the class of element declarations, like <!NOTATION jpg SYSTEM './jpgviewer'>." } ;
HELP: <notation-decl>
{ $values { "name" string } { "id" id } { "notation-decl" notation-decl } }
{ $description "Creates an notation declaration object, of the class " { $link notation-decl } "." } ;
HELP: doctype-decl
{ $class-description "Describes the class of doctype declarations." } ;
HELP: <doctype-decl>
{ $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } }
{ $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ;

View File

@ -5,6 +5,9 @@ delegate.protocols delegate vectors accessors multiline
macros words quotations combinators slots fry strings ; macros words quotations combinators slots fry strings ;
IN: xml.data IN: xml.data
TUPLE: interpolated var ;
C: <interpolated> interpolated
UNION: nullable-string string POSTPONE: f ; UNION: nullable-string string POSTPONE: f ;
TUPLE: name TUPLE: name
@ -85,11 +88,13 @@ C: <comment> comment
TUPLE: directive ; TUPLE: directive ;
TUPLE: element-decl < directive TUPLE: element-decl < directive
{ name string } { content-spec string } ; { name string }
{ content-spec string } ;
C: <element-decl> element-decl C: <element-decl> element-decl
TUPLE: attlist-decl < directive TUPLE: attlist-decl < directive
{ name string } { att-defs string } ; { name string }
{ att-defs string } ;
C: <attlist-decl> attlist-decl C: <attlist-decl> attlist-decl
UNION: boolean t POSTPONE: f ; UNION: boolean t POSTPONE: f ;
@ -108,13 +113,23 @@ C: <public-id> public-id
UNION: id system-id public-id POSTPONE: f ; UNION: id system-id public-id POSTPONE: f ;
TUPLE: dtd
{ directives sequence }
{ entities assoc }
{ parameter-entities assoc } ;
C: <dtd> dtd
UNION: dtd/f dtd POSTPONE: f ;
TUPLE: doctype-decl < directive TUPLE: doctype-decl < directive
{ name string } { name string }
{ external-id id } { external-id id }
{ internal-subset sequence } ; { internal-subset dtd/f } ;
C: <doctype-decl> doctype-decl C: <doctype-decl> doctype-decl
TUPLE: notation-decl < directive name id ; TUPLE: notation-decl < directive
{ name string }
{ id string } ;
C: <notation-decl> notation-decl C: <notation-decl> notation-decl
TUPLE: instruction { text string } ; TUPLE: instruction { text string } ;

View File

@ -2,12 +2,9 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml.tokenize xml.data xml.state kernel sequences ascii USING: xml.tokenize xml.data xml.state kernel sequences ascii
fry xml.errors combinators hashtables namespaces xml.entities fry xml.errors combinators hashtables namespaces xml.entities
strings ; strings xml.name ;
IN: xml.dtd IN: xml.dtd
: take-word ( -- string )
[ get-char blank? ] take-until ;
: take-decl-contents ( -- first second ) : take-decl-contents ( -- first second )
pass-blank take-word pass-blank ">" take-string ; pass-blank take-word pass-blank ">" take-string ;
@ -20,36 +17,15 @@ IN: xml.dtd
: take-notation-decl ( -- notation-decl ) : take-notation-decl ( -- notation-decl )
take-decl-contents <notation-decl> ; take-decl-contents <notation-decl> ;
: take-until-one-of ( seps -- str sep )
'[ get-char _ member? ] take-until get-char ;
: take-system-id ( -- system-id )
parse-quote <system-id> close ;
: take-public-id ( -- public-id )
parse-quote parse-quote <public-id> close ;
UNION: dtd-acceptable UNION: dtd-acceptable
directive comment instruction ; directive comment instruction ;
: (take-external-id) ( token -- external-id )
pass-blank {
{ "SYSTEM" [ take-system-id ] }
{ "PUBLIC" [ take-public-id ] }
[ bad-external-id ]
} case ;
: take-external-id ( -- external-id )
take-word (take-external-id) ;
: only-blanks ( str -- )
[ blank? ] all? [ bad-decl ] unless ;
: take-entity-def ( var -- entity-name entity-def ) : take-entity-def ( var -- entity-name entity-def )
[ [
take-word pass-blank get-char { take-word pass-blank get-char {
{ CHAR: ' [ parse-quote ] } { CHAR: ' [ parse-quote ] }
{ CHAR: " [ parse-quote ] } { CHAR: " [ parse-quote ] }
[ drop take-external-id ] [ drop take-external-id close ]
} case } case
] dip '[ swap _ [ ?set-at ] change ] 2keep ; ] dip '[ swap _ [ ?set-at ] change ] 2keep ;

View File

@ -3,12 +3,26 @@
USING: kernel namespaces xml.tokenize xml.state xml.name USING: kernel namespaces xml.tokenize xml.state xml.name
xml.data accessors arrays make xml.char-classes fry assocs sequences xml.data accessors arrays make xml.char-classes fry assocs sequences
math xml.errors sets combinators io.encodings io.encodings.iana math xml.errors sets combinators io.encodings io.encodings.iana
unicode.case xml.dtd strings ; unicode.case xml.dtd strings xml.entities ;
IN: xml.elements IN: xml.elements
: take-interpolated ( quot -- interpolated )
interpolating? get [
drop get-char CHAR: > =
[ next f ] [
pass-blank " \t\r\n-" take-to
pass-blank "->" expect
] if <interpolated>
] [ call ] if ; inline
: interpolate-quote ( -- interpolated )
[ quoteless-attr ] take-interpolated ;
: parse-attr ( -- ) : parse-attr ( -- )
parse-name pass-blank CHAR: = expect pass-blank parse-name pass-blank "=" expect pass-blank
t parse-quote* 2array , ; get-char CHAR: < =
[ "<-" expect interpolate-quote ]
[ t parse-quote* ] if 2array , ;
: start-tag ( -- name ? ) : start-tag ( -- name ? )
#! Outputs the name and whether this is a closing tag #! Outputs the name and whether this is a closing tag
@ -31,14 +45,14 @@ IN: xml.elements
: end-tag ( name attrs-alist -- tag ) : end-tag ( name attrs-alist -- tag )
tag-ns pass-blank get-char CHAR: / = tag-ns pass-blank get-char CHAR: / =
[ pop-ns <contained> next CHAR: > expect ] [ pop-ns <contained> next ">" expect ]
[ depth inc <opener> close ] if ; [ depth inc <opener> close ] if ;
: take-comment ( -- comment ) : take-comment ( -- comment )
"--" expect-string "--" expect
"--" take-string "--" take-string
<comment> <comment>
CHAR: > expect ; ">" expect ;
: assure-no-extra ( seq -- ) : assure-no-extra ( seq -- )
[ first ] map { [ first ] map {
@ -80,7 +94,7 @@ SYMBOL: string-input?
string-input? get [ drop ] [ decode-input ] if ; string-input? get [ drop ] [ decode-input ] if ;
: parse-prolog ( -- prolog ) : parse-prolog ( -- prolog )
pass-blank middle-tag "?>" expect-string pass-blank middle-tag "?>" expect
dup assure-no-extra prolog-attrs dup assure-no-extra prolog-attrs
dup encoding>> dup "UTF-16" = dup encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input-if ] when* ] if [ drop ] [ name>encoding [ decode-input-if ] when* ] if
@ -96,45 +110,45 @@ SYMBOL: string-input?
: take-cdata ( -- string ) : take-cdata ( -- string )
depth get zero? [ bad-cdata ] when depth get zero? [ bad-cdata ] when
"[CDATA[" expect-string "]]>" take-string ; "[CDATA[" expect "]]>" take-string ;
DEFER: make-tag ! Is this unavoidable? DEFER: make-tag ! Is this unavoidable?
: expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE : expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
: (take-internal-subset) ( -- ) : dtd-loop ( -- )
pass-blank get-char { pass-blank get-char {
{ CHAR: ] [ next ] } { CHAR: ] [ next ] }
{ CHAR: % [ expand-pe ] } { CHAR: % [ expand-pe ] }
{ CHAR: < [ { CHAR: < [
next make-tag dup dtd-acceptable? next make-tag dup dtd-acceptable?
[ bad-doctype ] unless , (take-internal-subset) [ bad-doctype ] unless , dtd-loop
] } ] }
{ f [ ] }
[ 1string bad-doctype ] [ 1string bad-doctype ]
} case ; } case ;
: take-internal-subset ( -- seq ) : take-internal-subset ( -- dtd )
[ [
H{ } pe-table set H{ } clone pe-table set
t in-dtd? set t in-dtd? set
(take-internal-subset) dtd-loop
] { } make ; pe-table get
] { } make swap extra-entities get swap <dtd> ;
: nontrivial-doctype ( -- external-id internal-subset ) : take-optional-id ( -- id/f )
pass-blank get-char CHAR: [ = [ get-char "SP" member?
next take-internal-subset f swap close [ take-external-id ] [ f ] if ;
] [
" >" take-until-one-of { : take-internal ( -- dtd/f )
{ CHAR: \s [ (take-external-id) ] } get-char CHAR: [ =
{ CHAR: > [ only-blanks f ] } [ next take-internal-subset ] [ f ] if ;
} case f
] if ;
: take-doctype-decl ( -- doctype-decl ) : take-doctype-decl ( -- doctype-decl )
pass-blank " >" take-until-one-of { pass-blank take-name
{ CHAR: \s [ nontrivial-doctype ] } pass-blank take-optional-id
{ CHAR: > [ f f ] } pass-blank take-internal
} case <doctype-decl> ; <doctype-decl> close ;
: take-directive ( -- doctype ) : take-directive ( -- doctype )
take-name dup "DOCTYPE" = take-name dup "DOCTYPE" =
@ -151,12 +165,18 @@ DEFER: make-tag ! Is this unavoidable?
[ drop take-directive ] [ drop take-directive ]
} case ; } case ;
: normal-tag ( -- tag )
start-tag
[ dup add-ns pop-ns <closer> depth dec close ]
[ middle-tag end-tag ] if ;
: interpolate-tag ( -- interpolated )
[ "-" bad-name ] take-interpolated ;
: make-tag ( -- tag ) : make-tag ( -- tag )
{ {
{ [ get-char dup CHAR: ! = ] [ drop next direct ] } { [ get-char dup CHAR: ! = ] [ drop next direct ] }
{ [ CHAR: ? = ] [ next instruct ] } { [ dup CHAR: ? = ] [ drop next instruct ] }
[ { [ dup CHAR: - = ] [ drop next interpolate-tag ] }
start-tag [ dup add-ns pop-ns <closer> depth dec close ] [ drop normal-tag ]
[ middle-tag end-tag ] if
]
} cond ; } cond ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg ! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: namespaces make kernel assocs sequences fry values USING: namespaces make kernel assocs sequences fry values
io.files io.encodings.binary ; io.files io.encodings.binary xml.state ;
IN: xml.entities IN: xml.entities
: entities-out : entities-out
@ -37,7 +37,5 @@ IN: xml.entities
{ "quot" CHAR: " } { "quot" CHAR: " }
} ; } ;
SYMBOL: extra-entities
: with-entities ( entities quot -- ) : with-entities ( entities quot -- )
[ swap extra-entities set call ] with-scope ; inline [ swap extra-entities set call ] with-scope ; inline

View File

@ -1,13 +1,13 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs io.encodings.binary io.files kernel namespaces sequences USING: assocs io.encodings.binary io.files kernel namespaces sequences
values xml xml.entities ; values xml xml.entities accessors xml.state ;
IN: xml.entities.html IN: xml.entities.html
VALUE: html-entities VALUE: html-entities
: read-entities-file ( file -- table ) : read-entities-file ( file -- table )
file>dtd nip ; file>dtd entities>> ;
: get-html ( -- table ) : get-html ( -- table )
{ "lat1" "special" "symbol" } [ { "lat1" "special" "symbol" } [

View File

@ -1 +0,0 @@
Daniel Ehrenberg

View File

@ -1,3 +0,0 @@
USING: tools.test io.streams.string xml.generator xml.writer accessors ;
[ "<html><body><a href=\"blah\"/></body></html>" ]
[ "html" [ "body" [ "a" { { "href" "blah" } } contained*, ] tag, ] make-xml [ body>> write-xml-chunk ] with-string-writer ] unit-test

View File

@ -1,25 +0,0 @@
! Copyright (C) 2006, 2007 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces make kernel xml.data xml.utilities assocs
sequences ;
IN: xml.generator
: comment, ( string -- ) <comment> , ;
: instruction, ( string -- ) <instruction> , ;
: nl, ( -- ) "\n" , ;
: (tag,) ( name attrs quot -- tag )
-rot [ V{ } make ] 2dip rot <tag> ; inline
: tag*, ( name attrs quot -- )
(tag,) , ; inline
: contained*, ( name attrs -- )
f <tag> , ;
: tag, ( name quot -- ) f swap tag*, ; inline
: contained, ( name -- ) f contained*, ; inline
: make-xml* ( name attrs quot -- xml )
(tag,) build-xml ; inline
: make-xml ( name quot -- xml )
f swap make-xml* ; inline

View File

@ -1,4 +1,46 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: tools.test xml.interpolate ; USING: tools.test xml.interpolate multiline kernel assocs
sequences accessors xml.writer xml.interpolate.private
locals splitting ;
IN: xml.interpolate.tests IN: xml.interpolate.tests
[ "a" "c" { "a" "c" f } ] [
"<?xml version='1.0'?><x><-a-><b val=<-c->/><-></x>"
interpolated-doc
[ second var>> ]
[ fourth "val" swap at var>> ]
[ extract-variables ] tri
] unit-test
[ {" <?xml version="1.0" encoding="UTF-8"?>
<x>
one
<b val="two"/>
y
<foo/>
</x>"} ] [
[let* | a [ "one" ] c [ "two" ] x [ "y" ]
d [ [XML <-x-> <foo/> XML] ] |
<XML
<x> <-a-> <b val=<-c->/> <-d-> </x>
XML> pprint-xml>string
]
] unit-test
[ {" <?xml version="1.0" encoding="UTF-8"?>
<doc>
<item>
one
</item>
<item>
two
</item>
<item>
three
</item>
</doc>"} ] [
"one two three" " " split
[ [XML <item><-></item> XML] ] map
<XML <doc><-></doc> XML> pprint-xml>string
] unit-test

View File

@ -1,4 +1,95 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: ; USING: xml xml.state kernel sequences fry assocs xml.data
accessors strings make multiline parser namespaces macros
sequences.deep generalizations locals words combinators
math ;
IN: xml.interpolate IN: xml.interpolate
<PRIVATE
: interpolated-chunk ( string -- chunk )
t interpolating? [ string>xml-chunk ] with-variable ;
: interpolated-doc ( string -- xml )
t interpolating? [ string>xml ] with-variable ;
DEFER: interpolate-sequence
: interpolate-attrs ( table attrs -- attrs )
swap '[ dup interpolated? [ var>> _ at ] when ] assoc-map ;
: interpolate-tag ( table tag -- tag )
[ nip name>> ]
[ attrs>> interpolate-attrs ]
[ children>> [ interpolate-sequence ] [ drop f ] if* ] 2tri
<tag> ;
GENERIC: push-item ( item -- )
M: string push-item , ;
M: object push-item , ;
M: sequence push-item % ;
GENERIC: interpolate-item ( table item -- )
M: object interpolate-item nip , ;
M: tag interpolate-item interpolate-tag , ;
M: interpolated interpolate-item
var>> swap at push-item ;
: interpolate-sequence ( table seq -- seq )
[ [ interpolate-item ] with each ] { } make ;
: interpolate-xml-doc ( table xml -- xml )
(clone) [ interpolate-tag ] change-body ;
GENERIC# (each-interpolated) 1 ( item quot -- ) inline
M: interpolated (each-interpolated) call ;
M: tag (each-interpolated)
swap attrs>> values
[ interpolated? ] filter
swap each ;
M: object (each-interpolated) 2drop ;
: each-interpolated ( xml quot -- )
'[ _ (each-interpolated) ] deep-each ; inline
:: number<-> ( doc -- doc )
0 :> n! doc [
dup var>> [ n >>var n 1+ n! ] unless drop
] each-interpolated doc ;
MACRO: interpolate-xml ( string -- doc )
interpolated-doc number<-> '[ _ interpolate-xml-doc ] ;
MACRO: interpolate-chunk ( string -- chunk )
interpolated-chunk number<-> '[ _ interpolate-sequence ] ;
: >search-hash ( seq -- hash )
[ dup search ] H{ } map>assoc ;
: extract-variables ( xml -- seq )
[ [ var>> , ] each-interpolated ] { } make ;
: collect ( accum seq -- accum )
{
{ [ dup [ ] all? ] [ >search-hash parsed ] } ! locals
{ [ dup [ not ] all? ] [ ! fry
length parsed \ narray parsed \ <enum> parsed
] }
[ drop "XML interpolation contains both fry and locals" throw ] ! mixed
} cond ;
: parse-def ( accum delimiter word -- accum )
[
parse-multiline-string
[ interpolated-chunk extract-variables collect ] keep
parsed
] dip parsed ;
PRIVATE>
: <XML
"XML>" \ interpolate-xml parse-def ; parsing
: [XML
"XML]" \ interpolate-chunk parse-def ; parsing

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces accessors xml.tokenize xml.data assocs USING: kernel namespaces accessors xml.tokenize xml.data assocs
xml.errors xml.char-classes combinators.short-circuit splitting xml.errors xml.char-classes combinators.short-circuit splitting
fry xml.state sequences ; fry xml.state sequences combinators ascii ;
IN: xml.name IN: xml.name
! XML namespace processing: ns = namespace ! XML namespace processing: ns = namespace
@ -74,3 +74,21 @@ SYMBOL: ns-stack
: parse-name-starting ( string -- name ) : parse-name-starting ( string -- name )
take-name append interpret-name ; take-name append interpret-name ;
: take-system-id ( -- system-id )
parse-quote <system-id> ;
: take-public-id ( -- public-id )
parse-quote parse-quote <public-id> ;
: (take-external-id) ( token -- external-id )
pass-blank {
{ "SYSTEM" [ take-system-id ] }
{ "PUBLIC" [ take-public-id ] }
[ bad-external-id ]
} case ;
: take-word ( -- string )
[ get-char blank? ] take-until ;
: take-external-id ( -- external-id )
take-word (take-external-id) ;

View File

@ -23,3 +23,11 @@ SYMBOL: xml-stack
SYMBOL: prolog-data SYMBOL: prolog-data
SYMBOL: depth SYMBOL: depth
SYMBOL: interpolating?
SYMBOL: in-dtd?
SYMBOL: pe-table
SYMBOL: extra-entities

View File

@ -7,6 +7,9 @@ IN: xml.test.state
: take-rest ( -- string ) : take-rest ( -- string )
[ f ] take-until ; [ f ] take-until ;
: take-char ( char -- string )
1string take-to ;
[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test [ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
[ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test [ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test
[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test [ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test

View File

@ -49,10 +49,10 @@ SYMBOL: xml-file
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test [ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail [ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test [ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test [ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test [ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test [ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test [ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test

View File

@ -1,8 +1,8 @@
! Copyright (C) 2008 Slava Pestov. ! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml io.encodings.utf8 io.files kernel tools.test ; USING: xml xml.data kernel tools.test ;
IN: xml.tests IN: xml.tests
[ ] [ [ t ] [
"resource:basis/xmode/xmode.dtd" file>dtd 2drop "resource:basis/xmode/xmode.dtd" file>dtd dtd?
] unit-test ] unit-test

View File

@ -58,8 +58,8 @@ IN: xml.tokenize
'[ @ [ t ] [ get-char _ push f ] if ] skip-until '[ @ [ t ] [ get-char _ push f ] if ] skip-until
] keep >string ; inline ] keep >string ; inline
: take-char ( ch -- string ) : take-to ( seq -- string )
[ dup get-char = ] take-until nip ; '[ get-char _ member? ] take-until ;
: pass-blank ( -- ) : pass-blank ( -- )
#! Advance code past any whitespace, including newlines #! Advance code past any whitespace, including newlines
@ -75,33 +75,29 @@ IN: xml.tokenize
dup length rot length 1- - head dup length rot length 1- - head
get-char [ missing-close ] unless next ; get-char [ missing-close ] unless next ;
: expect ( ch -- ) : expect ( string -- )
get-char 2dup = [ 2drop ] [
[ 1string ] bi@ expected
] if next ;
: expect-string ( string -- )
dup [ get-char next ] replicate 2dup = dup [ get-char next ] replicate 2dup =
[ 2drop ] [ expected ] if ; [ 2drop ] [ expected ] if ;
! Suddenly XML-specific
: parse-named-entity ( string -- ) : parse-named-entity ( string -- )
dup entities at [ , ] [ dup entities at [ , ] [
dup extra-entities get at dup extra-entities get at
[ % ] [ no-entity ] ?if [ % ] [ no-entity ] ?if
] ?if ; ] ?if ;
: take-; ( -- string )
next ";" take-to next ;
: parse-entity ( -- ) : parse-entity ( -- )
next CHAR: ; take-char next take-; "#" ?head [
"#" ?head [
"x" ?head 16 10 ? base> , "x" ?head 16 10 ? base> ,
] [ parse-named-entity ] if ; ] [ parse-named-entity ] if ;
SYMBOL: pe-table
SYMBOL: in-dtd?
: parse-pe ( -- ) : parse-pe ( -- )
next CHAR: ; take-char dup next take-; dup pe-table get at
pe-table get at [ % ] [ no-entity ] ?if ; [ % ] [ no-entity ] ?if ;
:: (parse-char) ( quot: ( ch -- ? ) -- ) :: (parse-char) ( quot: ( ch -- ? ) -- )
get-char :> char get-char :> char
@ -131,7 +127,7 @@ SYMBOL: in-dtd?
] parse-char ; ] parse-char ;
: close ( -- ) : close ( -- )
pass-blank CHAR: > expect ; pass-blank ">" expect ;
: normalize-quote ( str -- str ) : normalize-quote ( str -- str )
[ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ; [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;

View File

@ -136,10 +136,10 @@ M: public-id write-xml-chunk
[ pubid-literal>> write "' '" write ] [ pubid-literal>> write "' '" write ]
[ system-literal>> write "'" write ] bi ; [ system-literal>> write "'" write ] bi ;
: write-internal-subset ( seq -- ) : write-internal-subset ( dtd -- )
[ [
"[" write indent "[" write indent
[ ?indent write-xml-chunk ] each directives>> [ ?indent write-xml-chunk ] each
unindent ?indent "]" write unindent ?indent "]" write
] when* ; ] when* ;

View File

@ -164,21 +164,15 @@ TUPLE: pull-xml scope ;
: file>xml ( filename -- xml ) : file>xml ( filename -- xml )
binary <file-reader> read-xml ; binary <file-reader> read-xml ;
: (read-dtd) ( -- dtd ) : read-dtd ( stream -- dtd )
! should filter out blanks, throw error on non-dtd stuff
V{ } clone dup [ push ] curry sax-loop ;
: read-dtd ( stream -- dtd entities )
[ [
t in-dtd? set
reset-prolog reset-prolog
H{ } clone extra-entities set H{ } clone extra-entities set
(read-dtd) take-internal-subset
extra-entities get
] with-state ; ] with-state ;
: file>dtd ( filename -- dtd entities ) : file>dtd ( filename -- dtd )
utf8 <file-reader> read-dtd ; utf8 <file-reader> read-dtd ;
: string>dtd ( string -- dtd entities ) : string>dtd ( string -- dtd )
<string-reader> read-dtd ; <string-reader> read-dtd ;

View File

@ -1,58 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: lexer parser splitting kernel quotations namespaces make
sequences assocs sequences.lib xml.generator xml.utilities
xml.data ;
IN: xml.syntax
: parsed-name ( accum -- accum )
scan ":" split1 [ f <name> ] [ <simple-name> ] if* parsed ;
: run-combinator ( accum quot1 quot2 -- accum )
>r [ ] like parsed r> [ parsed ] each ;
: parse-tag-contents ( accum contained? -- accum )
[ \ contained*, parsed ] [
scan-word \ [ =
[ POSTPONE: [ \ tag*, parsed ]
[ "Expected [ missing" throw ] if
] if ;
DEFER: >>
: attributes-parsed ( accum quot -- accum )
[ f parsed ] [
>r \ >r parsed r> parsed
[ H{ } make-assoc r> swap ] [ parsed ] each
] if-empty ;
: <<
parsed-name [
\ >> parse-until >quotation
attributes-parsed \ contained? get
] with-scope parse-tag-contents ; parsing
: ==
\ call parsed parsed-name \ set parsed ; parsing
: //
\ contained? on ; parsing
: parse-special ( accum end-token word -- accum )
>r parse-tokens " " join parsed r> parsed ;
: <!-- "-->" \ comment, parse-special ; parsing
: <! ">" \ directive, parse-special ; parsing
: <? "?>" \ instruction, parse-special ; parsing
: >xml-document ( seq -- xml )
dup first prolog? [ unclip-slice ] [ standard-prolog ] if swap
[ tag? ] split-around <xml> ;
DEFER: XML>
: <XML
\ XML> [ >quotation ] parse-literal
{ } parsed \ make parsed \ >xml-document parsed ; parsing