Merge branch 'master' of git://factorcode.org/git/factor

db4
Slava Pestov 2009-01-22 20:08:41 -06:00
commit ca7bd0105d
20 changed files with 124 additions and 73 deletions

View File

@ -0,0 +1 @@
Implements the automatic detection of encodings of XML documents

View File

@ -1,6 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
IN: xml.backend
! A stack of { tag children } pairs
SYMBOL: xml-stack

View File

@ -0,0 +1 @@
XML-related character classes

View File

@ -1,11 +1,16 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences sequences.private assocs arrays
delegate.protocols delegate vectors accessors multiline
macros words quotations combinators slots fry ;
macros words quotations combinators slots fry strings ;
IN: xml.data
TUPLE: name space main url ;
UNION: nullable-string string POSTPONE: f ;
TUPLE: name
{ space nullable-string }
{ main string }
{ url nullable-string } ;
C: <name> name
: ?= ( object/f object/f -- ? )
@ -25,48 +30,7 @@ C: <name> name
: assure-name ( string/name -- name )
dup name? [ <null-name> ] unless ;
TUPLE: opener name attrs ;
C: <opener> opener
TUPLE: closer name ;
C: <closer> closer
TUPLE: contained name attrs ;
C: <contained> contained
TUPLE: comment text ;
C: <comment> comment
TUPLE: directive ;
TUPLE: element-decl < directive name content-spec ;
C: <element-decl> element-decl
TUPLE: attlist-decl < directive name att-defs ;
C: <attlist-decl> attlist-decl
TUPLE: entity-decl < directive name def pe? ;
C: <entity-decl> entity-decl
TUPLE: system-id system-literal ;
C: <system-id> system-id
TUPLE: public-id pubid-literal system-literal ;
C: <public-id> public-id
TUPLE: doctype-decl < directive name external-id internal-subset ;
C: <doctype-decl> doctype-decl
TUPLE: notation-decl < directive name id ;
C: <notation-decl> notation-decl
TUPLE: instruction text ;
C: <instruction> instruction
TUPLE: prolog version encoding standalone ;
C: <prolog> prolog
TUPLE: attrs alist ;
TUPLE: attrs { alist sequence } ;
C: <attrs> attrs
: attr@ ( key alist -- index {key,value} )
@ -105,7 +69,66 @@ M: attrs clone
INSTANCE: attrs assoc
TUPLE: tag name attrs children ;
TUPLE: opener { name name } { attrs attrs } ;
C: <opener> opener
TUPLE: closer { name name } ;
C: <closer> closer
TUPLE: contained { name name } { attrs attrs } ;
C: <contained> contained
TUPLE: comment { text string } ;
C: <comment> comment
TUPLE: directive ;
TUPLE: element-decl < directive
{ name string } { content-spec string } ;
C: <element-decl> element-decl
TUPLE: attlist-decl < directive
{ name string } { att-defs string } ;
C: <attlist-decl> attlist-decl
UNION: boolean t POSTPONE: f ;
TUPLE: entity-decl < directive
{ name string }
{ def string }
{ pe? boolean } ;
C: <entity-decl> entity-decl
TUPLE: system-id { system-literal string } ;
C: <system-id> system-id
TUPLE: public-id { pubid-literal string } { system-literal string } ;
C: <public-id> public-id
UNION: id system-id public-id POSTPONE: f ;
TUPLE: doctype-decl < directive
{ name string }
{ external-id id }
{ internal-subset sequence } ;
C: <doctype-decl> doctype-decl
TUPLE: notation-decl < directive name id ;
C: <notation-decl> notation-decl
TUPLE: instruction { text string } ;
C: <instruction> instruction
TUPLE: prolog
{ version string }
{ encoding string }
{ standalone boolean } ;
C: <prolog> prolog
TUPLE: tag
{ name name }
{ attrs attrs }
{ children sequence } ;
: <tag> ( name attrs children -- tag )
[ assure-name ] [ T{ attrs } assoc-like ] [ ] tri*
@ -137,7 +160,11 @@ MACRO: clone-slots ( class -- tuple )
M: tag clone
tag clone-slots ;
TUPLE: xml prolog before body after ;
TUPLE: xml
{ prolog prolog }
{ before sequence }
{ body tag }
{ after sequence } ;
C: <xml> xml
CONSULT: sequence-protocol xml body>> ;

View File

@ -0,0 +1 @@
Contains XML data types and basic tools for manipulation

View File

@ -0,0 +1 @@
Implements the parsing of directives in DTDs

View File

@ -0,0 +1 @@
Implements the parsing of XML tags

View File

@ -0,0 +1 @@
Contains built-in XML entities

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math
debugger sequences xml.state accessors summary
namespaces io.streams.string xml.backend xml.writer.private ;
namespaces io.streams.string ;
IN: xml.errors
TUPLE: parsing-error line column ;

View File

@ -0,0 +1 @@
XML parsing errors

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1 @@
Implements parsing XML names

View File

@ -17,3 +17,9 @@ C: <spot> spot
: set-next ( char -- ) spot get swap >>next drop ;
: get-check ( -- ? ) spot get check>> ;
: check ( -- ) spot get t >>check drop ;
SYMBOL: xml-stack
SYMBOL: prolog-data
SYMBOL: depth

View File

@ -0,0 +1 @@
Primitive device for storing the state of the XML parser

View File

@ -1,5 +1,5 @@
USING: kernel xml sequences assocs tools.test io arrays namespaces
accessors xml.data xml.utilities xml.writer generic sequences.deep ;
USING: kernel xml sequences assocs tools.test io arrays namespaces fry
accessors xml.data xml.utilities xml.writer generic sequences.deep multiline ;
IN: xml.tests
: sub-tag
@ -20,24 +20,39 @@ M: object (r-ref) drop ;
! Example
: sample-doc ( -- string )
{
"<html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>"
"<body>"
"<span f:sub='foo'/>"
"<div f:sub='bar'/>"
"<p f:sub='baz'>paragraph</p>"
"</body></html>"
} concat ;
STRING: sample-doc
<html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>
<body>
<span f:sub='foo'/>
<div f:sub='bar'/>
<p f:sub='baz'>paragraph</p>
</body></html>
;
STRING: expected-result
<?xml version="1.0" encoding="UTF-8"?>
<html xmlns:f="http://littledan.onigirihouse.com/namespaces/replace">
<body>
<span f:sub="foo">
foo
</span>
<div f:sub="bar">
blah
<a/>
</div>
<p f:sub="baz"/>
</body>
</html>
;
: test-refs ( -- string )
[
H{
{ "foo" { "foo" } }
{ "bar" { "blah" T{ tag f T{ name f "" "a" "" } f f } } }
{ "bar" { "blah" T{ tag f T{ name f "" "a" "" } T{ attrs } f } } }
{ "baz" f }
} ref-table set
sample-doc string>xml dup template xml>string
sample-doc string>xml dup template pprint-xml>string
] with-scope ;
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns:f=\"http://littledan.onigirihouse.com/namespaces/replace\"><body><span f:sub=\"foo\">foo</span><div f:sub=\"bar\">blah<a/></div><p f:sub=\"baz\"/></body></html>" ] [ test-refs ] unit-test
expected-result '[ _ ] [ test-refs ] unit-test

View File

@ -0,0 +1 @@
Basic tools for parsing XML

View File

@ -6,10 +6,6 @@ circular xml.entities assocs make splitting math.parser
locals combinators arrays ;
IN: xml.tokenize
SYMBOL: prolog-data
SYMBOL: depth
: version=1.0? ( -- ? )
prolog-data get [ version>> "1.0" = ] [ t ] if* ;

View File

@ -0,0 +1 @@
Utilities for manipulating an XML DOM tree

View File

@ -0,0 +1 @@
Tools for printing XML, including prettyprinting

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings io.encodings.utf8
xml.backend xml.data xml.errors xml.elements ascii xml.entities
xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml