Merge branch 'master' of git://factorcode.org/git/factor

db4
Slava Pestov 2009-01-22 20:08:41 -06:00
commit ca7bd0105d
20 changed files with 124 additions and 73 deletions

View File

@ -0,0 +1 @@
Implements the automatic detection of encodings of XML documents

View File

@ -1,6 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
IN: xml.backend
! A stack of { tag children } pairs
SYMBOL: xml-stack

View File

@ -0,0 +1 @@
XML-related character classes

View File

@ -1,11 +1,16 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg ! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences sequences.private assocs arrays USING: kernel sequences sequences.private assocs arrays
delegate.protocols delegate vectors accessors multiline delegate.protocols delegate vectors accessors multiline
macros words quotations combinators slots fry ; macros words quotations combinators slots fry strings ;
IN: xml.data IN: xml.data
TUPLE: name space main url ; UNION: nullable-string string POSTPONE: f ;
TUPLE: name
{ space nullable-string }
{ main string }
{ url nullable-string } ;
C: <name> name C: <name> name
: ?= ( object/f object/f -- ? ) : ?= ( object/f object/f -- ? )
@ -25,48 +30,7 @@ C: <name> name
: assure-name ( string/name -- name ) : assure-name ( string/name -- name )
dup name? [ <null-name> ] unless ; dup name? [ <null-name> ] unless ;
TUPLE: opener name attrs ; TUPLE: attrs { alist sequence } ;
C: <opener> opener
TUPLE: closer name ;
C: <closer> closer
TUPLE: contained name attrs ;
C: <contained> contained
TUPLE: comment text ;
C: <comment> comment
TUPLE: directive ;
TUPLE: element-decl < directive name content-spec ;
C: <element-decl> element-decl
TUPLE: attlist-decl < directive name att-defs ;
C: <attlist-decl> attlist-decl
TUPLE: entity-decl < directive name def pe? ;
C: <entity-decl> entity-decl
TUPLE: system-id system-literal ;
C: <system-id> system-id
TUPLE: public-id pubid-literal system-literal ;
C: <public-id> public-id
TUPLE: doctype-decl < directive name external-id internal-subset ;
C: <doctype-decl> doctype-decl
TUPLE: notation-decl < directive name id ;
C: <notation-decl> notation-decl
TUPLE: instruction text ;
C: <instruction> instruction
TUPLE: prolog version encoding standalone ;
C: <prolog> prolog
TUPLE: attrs alist ;
C: <attrs> attrs C: <attrs> attrs
: attr@ ( key alist -- index {key,value} ) : attr@ ( key alist -- index {key,value} )
@ -105,7 +69,66 @@ M: attrs clone
INSTANCE: attrs assoc INSTANCE: attrs assoc
TUPLE: tag name attrs children ; TUPLE: opener { name name } { attrs attrs } ;
C: <opener> opener
TUPLE: closer { name name } ;
C: <closer> closer
TUPLE: contained { name name } { attrs attrs } ;
C: <contained> contained
TUPLE: comment { text string } ;
C: <comment> comment
TUPLE: directive ;
TUPLE: element-decl < directive
{ name string } { content-spec string } ;
C: <element-decl> element-decl
TUPLE: attlist-decl < directive
{ name string } { att-defs string } ;
C: <attlist-decl> attlist-decl
UNION: boolean t POSTPONE: f ;
TUPLE: entity-decl < directive
{ name string }
{ def string }
{ pe? boolean } ;
C: <entity-decl> entity-decl
TUPLE: system-id { system-literal string } ;
C: <system-id> system-id
TUPLE: public-id { pubid-literal string } { system-literal string } ;
C: <public-id> public-id
UNION: id system-id public-id POSTPONE: f ;
TUPLE: doctype-decl < directive
{ name string }
{ external-id id }
{ internal-subset sequence } ;
C: <doctype-decl> doctype-decl
TUPLE: notation-decl < directive name id ;
C: <notation-decl> notation-decl
TUPLE: instruction { text string } ;
C: <instruction> instruction
TUPLE: prolog
{ version string }
{ encoding string }
{ standalone boolean } ;
C: <prolog> prolog
TUPLE: tag
{ name name }
{ attrs attrs }
{ children sequence } ;
: <tag> ( name attrs children -- tag ) : <tag> ( name attrs children -- tag )
[ assure-name ] [ T{ attrs } assoc-like ] [ ] tri* [ assure-name ] [ T{ attrs } assoc-like ] [ ] tri*
@ -137,7 +160,11 @@ MACRO: clone-slots ( class -- tuple )
M: tag clone M: tag clone
tag clone-slots ; tag clone-slots ;
TUPLE: xml prolog before body after ; TUPLE: xml
{ prolog prolog }
{ before sequence }
{ body tag }
{ after sequence } ;
C: <xml> xml C: <xml> xml
CONSULT: sequence-protocol xml body>> ; CONSULT: sequence-protocol xml body>> ;

View File

@ -0,0 +1 @@
Contains XML data types and basic tools for manipulation

View File

@ -0,0 +1 @@
Implements the parsing of directives in DTDs

View File

@ -0,0 +1 @@
Implements the parsing of XML tags

View File

@ -0,0 +1 @@
Contains built-in XML entities

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math USING: xml.data xml.writer kernel generic io prettyprint math
debugger sequences xml.state accessors summary debugger sequences xml.state accessors summary
namespaces io.streams.string xml.backend xml.writer.private ; namespaces io.streams.string ;
IN: xml.errors IN: xml.errors
TUPLE: parsing-error line column ; TUPLE: parsing-error line column ;

View File

@ -0,0 +1 @@
XML parsing errors

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1 @@
Implements parsing XML names

View File

@ -17,3 +17,9 @@ C: <spot> spot
: set-next ( char -- ) spot get swap >>next drop ; : set-next ( char -- ) spot get swap >>next drop ;
: get-check ( -- ? ) spot get check>> ; : get-check ( -- ? ) spot get check>> ;
: check ( -- ) spot get t >>check drop ; : check ( -- ) spot get t >>check drop ;
SYMBOL: xml-stack
SYMBOL: prolog-data
SYMBOL: depth

View File

@ -0,0 +1 @@
Primitive device for storing the state of the XML parser

View File

@ -1,5 +1,5 @@
USING: kernel xml sequences assocs tools.test io arrays namespaces USING: kernel xml sequences assocs tools.test io arrays namespaces fry
accessors xml.data xml.utilities xml.writer generic sequences.deep ; accessors xml.data xml.utilities xml.writer generic sequences.deep multiline ;
IN: xml.tests IN: xml.tests
: sub-tag : sub-tag
@ -20,24 +20,39 @@ M: object (r-ref) drop ;
! Example ! Example
: sample-doc ( -- string ) STRING: sample-doc
{ <html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>
"<html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>" <body>
"<body>" <span f:sub='foo'/>
"<span f:sub='foo'/>" <div f:sub='bar'/>
"<div f:sub='bar'/>" <p f:sub='baz'>paragraph</p>
"<p f:sub='baz'>paragraph</p>" </body></html>
"</body></html>" ;
} concat ;
STRING: expected-result
<?xml version="1.0" encoding="UTF-8"?>
<html xmlns:f="http://littledan.onigirihouse.com/namespaces/replace">
<body>
<span f:sub="foo">
foo
</span>
<div f:sub="bar">
blah
<a/>
</div>
<p f:sub="baz"/>
</body>
</html>
;
: test-refs ( -- string ) : test-refs ( -- string )
[ [
H{ H{
{ "foo" { "foo" } } { "foo" { "foo" } }
{ "bar" { "blah" T{ tag f T{ name f "" "a" "" } f f } } } { "bar" { "blah" T{ tag f T{ name f "" "a" "" } T{ attrs } f } } }
{ "baz" f } { "baz" f }
} ref-table set } ref-table set
sample-doc string>xml dup template xml>string sample-doc string>xml dup template pprint-xml>string
] with-scope ; ] with-scope ;
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns:f=\"http://littledan.onigirihouse.com/namespaces/replace\"><body><span f:sub=\"foo\">foo</span><div f:sub=\"bar\">blah<a/></div><p f:sub=\"baz\"/></body></html>" ] [ test-refs ] unit-test expected-result '[ _ ] [ test-refs ] unit-test

View File

@ -0,0 +1 @@
Basic tools for parsing XML

View File

@ -6,10 +6,6 @@ circular xml.entities assocs make splitting math.parser
locals combinators arrays ; locals combinators arrays ;
IN: xml.tokenize IN: xml.tokenize
SYMBOL: prolog-data
SYMBOL: depth
: version=1.0? ( -- ? ) : version=1.0? ( -- ? )
prolog-data get [ version>> "1.0" = ] [ t ] if* ; prolog-data get [ version>> "1.0" = ] [ t ] if* ;

View File

@ -0,0 +1 @@
Utilities for manipulating an XML DOM tree

View File

@ -0,0 +1 @@
Tools for printing XML, including prettyprinting

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings io.encodings.utf8 io.streams.string kernel namespaces sequences strings io.encodings.utf8
xml.backend xml.data xml.errors xml.elements ascii xml.entities xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ; xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml IN: xml