combinator utilities and character classes for xml

darcs
microdan 2006-11-21 21:03:26 +00:00
parent 3716de2737
commit 50f07c1fb5
6 changed files with 457 additions and 68 deletions

View File

@ -5,7 +5,7 @@ USING: kernel sequences math ;
#! seq: { { min max } { min max }* }
[ first2 between? ] contains-with? ;
PREDICATE: integer name-start-char
PREDICATE: integer 1.1name-start-char
{
{ CHAR: _ CHAR: _ }
{ CHAR: A CHAR: Z }
@ -24,8 +24,8 @@ PREDICATE: integer name-start-char
{ HEX: 10000 HEX: EFFFF }
} in-range-seq? ;
PREDICATE: integer name-char
dup name-start-char? swap {
PREDICATE: integer 1.1name-char
dup 1.1name-start-char? swap {
{ CHAR: - CHAR: - }
{ CHAR: . CHAR: . }
{ CHAR: 0 CHAR: 9 }
@ -33,3 +33,355 @@ PREDICATE: integer name-char
{ HEX: 300 HEX: 36F }
{ HEX: 203F HEX: 2040 }
} in-range-seq? or ;
! The following 335 lines were automatically generated
! from Appendix B of the XML 1.0 standard, version 3
PREDICATE: integer base-char {
{ HEX: 0041 HEX: 005A }
{ HEX: 0061 HEX: 007A }
{ HEX: 00C0 HEX: 00D6 }
{ HEX: 00D8 HEX: 00F6 }
{ HEX: 00F8 HEX: 00FF }
{ HEX: 0100 HEX: 0131 }
{ HEX: 0134 HEX: 013E }
{ HEX: 0141 HEX: 0148 }
{ HEX: 014A HEX: 017E }
{ HEX: 0180 HEX: 01C3 }
{ HEX: 01CD HEX: 01F0 }
{ HEX: 01F4 HEX: 01F5 }
{ HEX: 01FA HEX: 0217 }
{ HEX: 0250 HEX: 02A8 }
{ HEX: 02BB HEX: 02C1 }
{ HEX: 0386 HEX: 0386 }
{ HEX: 0388 HEX: 038A }
{ HEX: 038C HEX: 038C }
{ HEX: 038E HEX: 03A1 }
{ HEX: 03A3 HEX: 03CE }
{ HEX: 03D0 HEX: 03D6 }
{ HEX: 03DA HEX: 03DA }
{ HEX: 03DC HEX: 03DC }
{ HEX: 03DE HEX: 03DE }
{ HEX: 03E0 HEX: 03E0 }
{ HEX: 03E2 HEX: 03F3 }
{ HEX: 0401 HEX: 040C }
{ HEX: 040E HEX: 044F }
{ HEX: 0451 HEX: 045C }
{ HEX: 045E HEX: 0481 }
{ HEX: 0490 HEX: 04C4 }
{ HEX: 04C7 HEX: 04C8 }
{ HEX: 04CB HEX: 04CC }
{ HEX: 04D0 HEX: 04EB }
{ HEX: 04EE HEX: 04F5 }
{ HEX: 04F8 HEX: 04F9 }
{ HEX: 0531 HEX: 0556 }
{ HEX: 0559 HEX: 0559 }
{ HEX: 0561 HEX: 0586 }
{ HEX: 05D0 HEX: 05EA }
{ HEX: 05F0 HEX: 05F2 }
{ HEX: 0621 HEX: 063A }
{ HEX: 0641 HEX: 064A }
{ HEX: 0671 HEX: 06B7 }
{ HEX: 06BA HEX: 06BE }
{ HEX: 06C0 HEX: 06CE }
{ HEX: 06D0 HEX: 06D3 }
{ HEX: 06D5 HEX: 06D5 }
{ HEX: 06E5 HEX: 06E6 }
{ HEX: 0905 HEX: 0939 }
{ HEX: 093D HEX: 093D }
{ HEX: 0958 HEX: 0961 }
{ HEX: 0985 HEX: 098C }
{ HEX: 098F HEX: 0990 }
{ HEX: 0993 HEX: 09A8 }
{ HEX: 09AA HEX: 09B0 }
{ HEX: 09B2 HEX: 09B2 }
{ HEX: 09B6 HEX: 09B9 }
{ HEX: 09DC HEX: 09DD }
{ HEX: 09DF HEX: 09E1 }
{ HEX: 09F0 HEX: 09F1 }
{ HEX: 0A05 HEX: 0A0A }
{ HEX: 0A0F HEX: 0A10 }
{ HEX: 0A13 HEX: 0A28 }
{ HEX: 0A2A HEX: 0A30 }
{ HEX: 0A32 HEX: 0A33 }
{ HEX: 0A35 HEX: 0A36 }
{ HEX: 0A38 HEX: 0A39 }
{ HEX: 0A59 HEX: 0A5C }
{ HEX: 0A5E HEX: 0A5E }
{ HEX: 0A72 HEX: 0A74 }
{ HEX: 0A85 HEX: 0A8B }
{ HEX: 0A8D HEX: 0A8D }
{ HEX: 0A8F HEX: 0A91 }
{ HEX: 0A93 HEX: 0AA8 }
{ HEX: 0AAA HEX: 0AB0 }
{ HEX: 0AB2 HEX: 0AB3 }
{ HEX: 0AB5 HEX: 0AB9 }
{ HEX: 0ABD HEX: 0ABD }
{ HEX: 0AE0 HEX: 0AE0 }
{ HEX: 0B05 HEX: 0B0C }
{ HEX: 0B0F HEX: 0B10 }
{ HEX: 0B13 HEX: 0B28 }
{ HEX: 0B2A HEX: 0B30 }
{ HEX: 0B32 HEX: 0B33 }
{ HEX: 0B36 HEX: 0B39 }
{ HEX: 0B3D HEX: 0B3D }
{ HEX: 0B5C HEX: 0B5D }
{ HEX: 0B5F HEX: 0B61 }
{ HEX: 0B85 HEX: 0B8A }
{ HEX: 0B8E HEX: 0B90 }
{ HEX: 0B92 HEX: 0B95 }
{ HEX: 0B99 HEX: 0B9A }
{ HEX: 0B9C HEX: 0B9C }
{ HEX: 0B9E HEX: 0B9F }
{ HEX: 0BA3 HEX: 0BA4 }
{ HEX: 0BA8 HEX: 0BAA }
{ HEX: 0BAE HEX: 0BB5 }
{ HEX: 0BB7 HEX: 0BB9 }
{ HEX: 0C05 HEX: 0C0C }
{ HEX: 0C0E HEX: 0C10 }
{ HEX: 0C12 HEX: 0C28 }
{ HEX: 0C2A HEX: 0C33 }
{ HEX: 0C35 HEX: 0C39 }
{ HEX: 0C60 HEX: 0C61 }
{ HEX: 0C85 HEX: 0C8C }
{ HEX: 0C8E HEX: 0C90 }
{ HEX: 0C92 HEX: 0CA8 }
{ HEX: 0CAA HEX: 0CB3 }
{ HEX: 0CB5 HEX: 0CB9 }
{ HEX: 0CDE HEX: 0CDE }
{ HEX: 0CE0 HEX: 0CE1 }
{ HEX: 0D05 HEX: 0D0C }
{ HEX: 0D0E HEX: 0D10 }
{ HEX: 0D12 HEX: 0D28 }
{ HEX: 0D2A HEX: 0D39 }
{ HEX: 0D60 HEX: 0D61 }
{ HEX: 0E01 HEX: 0E2E }
{ HEX: 0E30 HEX: 0E30 }
{ HEX: 0E32 HEX: 0E33 }
{ HEX: 0E40 HEX: 0E45 }
{ HEX: 0E81 HEX: 0E82 }
{ HEX: 0E84 HEX: 0E84 }
{ HEX: 0E87 HEX: 0E88 }
{ HEX: 0E8A HEX: 0E8A }
{ HEX: 0E8D HEX: 0E8D }
{ HEX: 0E94 HEX: 0E97 }
{ HEX: 0E99 HEX: 0E9F }
{ HEX: 0EA1 HEX: 0EA3 }
{ HEX: 0EA5 HEX: 0EA5 }
{ HEX: 0EA7 HEX: 0EA7 }
{ HEX: 0EAA HEX: 0EAB }
{ HEX: 0EAD HEX: 0EAE }
{ HEX: 0EB0 HEX: 0EB0 }
{ HEX: 0EB2 HEX: 0EB3 }
{ HEX: 0EBD HEX: 0EBD }
{ HEX: 0EC0 HEX: 0EC4 }
{ HEX: 0F40 HEX: 0F47 }
{ HEX: 0F49 HEX: 0F69 }
{ HEX: 10A0 HEX: 10C5 }
{ HEX: 10D0 HEX: 10F6 }
{ HEX: 1100 HEX: 1100 }
{ HEX: 1102 HEX: 1103 }
{ HEX: 1105 HEX: 1107 }
{ HEX: 1109 HEX: 1109 }
{ HEX: 110B HEX: 110C }
{ HEX: 110E HEX: 1112 }
{ HEX: 113C HEX: 113C }
{ HEX: 113E HEX: 113E }
{ HEX: 1140 HEX: 1140 }
{ HEX: 114C HEX: 114C }
{ HEX: 114E HEX: 114E }
{ HEX: 1150 HEX: 1150 }
{ HEX: 1154 HEX: 1155 }
{ HEX: 1159 HEX: 1159 }
{ HEX: 115F HEX: 1161 }
{ HEX: 1163 HEX: 1163 }
{ HEX: 1165 HEX: 1165 }
{ HEX: 1167 HEX: 1167 }
{ HEX: 1169 HEX: 1169 }
{ HEX: 116D HEX: 116E }
{ HEX: 1172 HEX: 1173 }
{ HEX: 1175 HEX: 1175 }
{ HEX: 119E HEX: 119E }
{ HEX: 11A8 HEX: 11A8 }
{ HEX: 11AB HEX: 11AB }
{ HEX: 11AE HEX: 11AF }
{ HEX: 11B7 HEX: 11B8 }
{ HEX: 11BA HEX: 11BA }
{ HEX: 11BC HEX: 11C2 }
{ HEX: 11EB HEX: 11EB }
{ HEX: 11F0 HEX: 11F0 }
{ HEX: 11F9 HEX: 11F9 }
{ HEX: 1E00 HEX: 1E9B }
{ HEX: 1EA0 HEX: 1EF9 }
{ HEX: 1F00 HEX: 1F15 }
{ HEX: 1F18 HEX: 1F1D }
{ HEX: 1F20 HEX: 1F45 }
{ HEX: 1F48 HEX: 1F4D }
{ HEX: 1F50 HEX: 1F57 }
{ HEX: 1F59 HEX: 1F59 }
{ HEX: 1F5B HEX: 1F5B }
{ HEX: 1F5D HEX: 1F5D }
{ HEX: 1F5F HEX: 1F7D }
{ HEX: 1F80 HEX: 1FB4 }
{ HEX: 1FB6 HEX: 1FBC }
{ HEX: 1FBE HEX: 1FBE }
{ HEX: 1FC2 HEX: 1FC4 }
{ HEX: 1FC6 HEX: 1FCC }
{ HEX: 1FD0 HEX: 1FD3 }
{ HEX: 1FD6 HEX: 1FDB }
{ HEX: 1FE0 HEX: 1FEC }
{ HEX: 1FF2 HEX: 1FF4 }
{ HEX: 1FF6 HEX: 1FFC }
{ HEX: 2126 HEX: 2126 }
{ HEX: 212A HEX: 212B }
{ HEX: 212E HEX: 212E }
{ HEX: 2180 HEX: 2182 }
{ HEX: 3041 HEX: 3094 }
{ HEX: 30A1 HEX: 30FA }
{ HEX: 3105 HEX: 312C }
{ HEX: AC00 HEX: D7A3 } } in-range-seq? ;
PREDICATE: integer ideographic {
{ HEX: 4E00 HEX: 9FA5 }
{ HEX: 3007 HEX: 3007 }
{ HEX: 3021 HEX: 3029 } } in-range-seq? ;
PREDICATE: integer combining-char {
{ HEX: 0300 HEX: 0345 }
{ HEX: 0360 HEX: 0361 }
{ HEX: 0483 HEX: 0486 }
{ HEX: 0591 HEX: 05A1 }
{ HEX: 05A3 HEX: 05B9 }
{ HEX: 05BB HEX: 05BD }
{ HEX: 05BF HEX: 05BF }
{ HEX: 05C1 HEX: 05C2 }
{ HEX: 05C4 HEX: 05C4 }
{ HEX: 064B HEX: 0652 }
{ HEX: 0670 HEX: 0670 }
{ HEX: 06D6 HEX: 06DC }
{ HEX: 06DD HEX: 06DF }
{ HEX: 06E0 HEX: 06E4 }
{ HEX: 06E7 HEX: 06E8 }
{ HEX: 06EA HEX: 06ED }
{ HEX: 0901 HEX: 0903 }
{ HEX: 093C HEX: 093C }
{ HEX: 093E HEX: 094C }
{ HEX: 094D HEX: 094D }
{ HEX: 0951 HEX: 0954 }
{ HEX: 0962 HEX: 0963 }
{ HEX: 0981 HEX: 0983 }
{ HEX: 09BC HEX: 09BC }
{ HEX: 09BE HEX: 09BE }
{ HEX: 09BF HEX: 09BF }
{ HEX: 09C0 HEX: 09C4 }
{ HEX: 09C7 HEX: 09C8 }
{ HEX: 09CB HEX: 09CD }
{ HEX: 09D7 HEX: 09D7 }
{ HEX: 09E2 HEX: 09E3 }
{ HEX: 0A02 HEX: 0A02 }
{ HEX: 0A3C HEX: 0A3C }
{ HEX: 0A3E HEX: 0A3E }
{ HEX: 0A3F HEX: 0A3F }
{ HEX: 0A40 HEX: 0A42 }
{ HEX: 0A47 HEX: 0A48 }
{ HEX: 0A4B HEX: 0A4D }
{ HEX: 0A70 HEX: 0A71 }
{ HEX: 0A81 HEX: 0A83 }
{ HEX: 0ABC HEX: 0ABC }
{ HEX: 0ABE HEX: 0AC5 }
{ HEX: 0AC7 HEX: 0AC9 }
{ HEX: 0ACB HEX: 0ACD }
{ HEX: 0B01 HEX: 0B03 }
{ HEX: 0B3C HEX: 0B3C }
{ HEX: 0B3E HEX: 0B43 }
{ HEX: 0B47 HEX: 0B48 }
{ HEX: 0B4B HEX: 0B4D }
{ HEX: 0B56 HEX: 0B57 }
{ HEX: 0B82 HEX: 0B83 }
{ HEX: 0BBE HEX: 0BC2 }
{ HEX: 0BC6 HEX: 0BC8 }
{ HEX: 0BCA HEX: 0BCD }
{ HEX: 0BD7 HEX: 0BD7 }
{ HEX: 0C01 HEX: 0C03 }
{ HEX: 0C3E HEX: 0C44 }
{ HEX: 0C46 HEX: 0C48 }
{ HEX: 0C4A HEX: 0C4D }
{ HEX: 0C55 HEX: 0C56 }
{ HEX: 0C82 HEX: 0C83 }
{ HEX: 0CBE HEX: 0CC4 }
{ HEX: 0CC6 HEX: 0CC8 }
{ HEX: 0CCA HEX: 0CCD }
{ HEX: 0CD5 HEX: 0CD6 }
{ HEX: 0D02 HEX: 0D03 }
{ HEX: 0D3E HEX: 0D43 }
{ HEX: 0D46 HEX: 0D48 }
{ HEX: 0D4A HEX: 0D4D }
{ HEX: 0D57 HEX: 0D57 }
{ HEX: 0E31 HEX: 0E31 }
{ HEX: 0E34 HEX: 0E3A }
{ HEX: 0E47 HEX: 0E4E }
{ HEX: 0EB1 HEX: 0EB1 }
{ HEX: 0EB4 HEX: 0EB9 }
{ HEX: 0EBB HEX: 0EBC }
{ HEX: 0EC8 HEX: 0ECD }
{ HEX: 0F18 HEX: 0F19 }
{ HEX: 0F35 HEX: 0F35 }
{ HEX: 0F37 HEX: 0F37 }
{ HEX: 0F39 HEX: 0F39 }
{ HEX: 0F3E HEX: 0F3E }
{ HEX: 0F3F HEX: 0F3F }
{ HEX: 0F71 HEX: 0F84 }
{ HEX: 0F86 HEX: 0F8B }
{ HEX: 0F90 HEX: 0F95 }
{ HEX: 0F97 HEX: 0F97 }
{ HEX: 0F99 HEX: 0FAD }
{ HEX: 0FB1 HEX: 0FB7 }
{ HEX: 0FB9 HEX: 0FB9 }
{ HEX: 20D0 HEX: 20DC }
{ HEX: 20E1 HEX: 20E1 }
{ HEX: 302A HEX: 302F }
{ HEX: 3099 HEX: 3099 }
{ HEX: 309A HEX: 309A } } in-range-seq? ;
PREDICATE: integer unicode-digit {
{ HEX: 0030 HEX: 0039 }
{ HEX: 0660 HEX: 0669 }
{ HEX: 06F0 HEX: 06F9 }
{ HEX: 0966 HEX: 096F }
{ HEX: 09E6 HEX: 09EF }
{ HEX: 0A66 HEX: 0A6F }
{ HEX: 0AE6 HEX: 0AEF }
{ HEX: 0B66 HEX: 0B6F }
{ HEX: 0BE7 HEX: 0BEF }
{ HEX: 0C66 HEX: 0C6F }
{ HEX: 0CE6 HEX: 0CEF }
{ HEX: 0D66 HEX: 0D6F }
{ HEX: 0E50 HEX: 0E59 }
{ HEX: 0ED0 HEX: 0ED9 }
{ HEX: 0F20 HEX: 0F29 } } in-range-seq? ;
PREDICATE: integer extender {
{ HEX: 00B7 HEX: 00B7 }
{ HEX: 02D0 HEX: 02D0 }
{ HEX: 02D1 HEX: 02D1 }
{ HEX: 0387 HEX: 0387 }
{ HEX: 0640 HEX: 0640 }
{ HEX: 0E46 HEX: 0E46 }
{ HEX: 0EC6 HEX: 0EC6 }
{ HEX: 3005 HEX: 3005 }
{ HEX: 3031 HEX: 3035 }
{ HEX: 309D HEX: 309E }
{ HEX: 30FC HEX: 30FE } } in-range-seq? ;
! end automatically generated code
UNION: 1.0letter base-char ideographic ;
PREDICATE: integer 1.0name-start-char
dup 1.0letter? swap CHAR: _ = or ;
PREDICATE: integer 1.0other-name-chars
{ CHAR: . CHAR: - CHAR: _ } member? ;
UNION: 1.0name-char
1.0letter unicode-digit 1.0other-name-chars
combining-char extender ;

View File

@ -16,34 +16,35 @@ TUPLE: instruction text ;
char CHAR: / = dup [ incr-spot ] when
parse-name swap ;
: (parse-quot) ( ch vector sbuf -- vector )
{
{ [ more? not ] [ "File ended in quote" <xml-string-error> throw ] }
{ [ char >r pick r> swap = ] [ >string over push nip incr-spot ] }
{ [ char CHAR: & = ] [ parse-entity (parse-quot) ] }
{ [ t ] [ char parsed-ch (parse-quot) ] }
: (parse-quot) ( ch sbuf -- )
char {
{ [ dup not ] [ "File ended in quote" <xml-string-error> throw ] }
{ [ 3dup nip = ] [ drop >string , drop incr-spot ] }
{ [ dup CHAR: & = ] [ drop parse-entity (parse-quot) ] }
{ [ dup CHAR: % = ] [ drop parse-reference (parse-quot) ] }
{ [ t ] [ parsed-ch (parse-quot) ] }
} cond ;
: parse-quot ( ch -- array )
V{ } clone SBUF" " clone (parse-quot) ;
[ SBUF" " clone (parse-quot) ] { } make ;
: parse-prop-value ( -- str )
: parse-prop-value ( -- seq )
char dup "'\"" member? [
incr-spot parse-quot
] [
"Attribute lacks quote" <xml-string-error> throw
] if ;
: parse-prop ( -- seq )
: parse-prop ( -- )
parse-name pass-blank CHAR: = expect pass-blank
parse-prop-value 2array ;
parse-prop-value swap set ;
: (middle-tag) ( seq -- seq )
pass-blank char name-char?
[ parse-prop over push (middle-tag) ] when ;
: (middle-tag) ( -- )
pass-blank char name-start-char?
[ parse-prop (middle-tag) ] when ;
: middle-tag ( -- hash )
V{ } clone (middle-tag) alist>hash pass-blank ;
[ (middle-tag) ] make-hash pass-blank ;
: end-tag ( string hash -- tag )
pass-blank char CHAR: / =
@ -81,7 +82,11 @@ TUPLE: instruction text ;
! -- Overall parser with data tree
TUPLE: tag name props children ;
TUPLE: tag props children ;
C: tag ( name props children -- tag )
[ set-tag-children ] keep
[ set-tag-props ] keep
[ set-delegate ] keep ;
TUPLE: contained-tag ;
C: contained-tag ( name props -- contained-tag )
@ -212,37 +217,41 @@ M: extra-attrs error.
<xml-string-error> throw ] unless
concat ;
: prolog-attr ( hash name default -- value )
>r "" swap <name> swap ?hash concat-strings
[ r> drop ] [ r> ] if* ;
: prolog-attrs ( hash -- )
T{ name f "" "version" f } over hash [
concat-strings prolog-data get set-prolog-version
] when*
T{ name f "" "encoding" f } over hash [
concat-strings prolog-data get set-prolog-encoding
] when*
T{ name f "" "standalone" f } swap hash [
concat-strings yes/no>bool
prolog-data get set-prolog-standalone
] when* ;
: parse-prolog ( -- prolog )
: parse-prolog ( -- )
"<?xml" string-matches? [
5 expect-string*
pass-blank middle-tag "?>" expect-string
dup assure-no-extra
] [ f ] if
[ "version" "1.0" prolog-attr ] keep
[ "encoding" "iso-8859-1" prolog-attr ] keep
"standalone" "no" prolog-attr yes/no>bool
<prolog> dup prolog-data set ;
dup assure-no-extra prolog-attrs
] when ;
: init-xml ( string -- )
code set { 0 1 1 } clone spot set
code set { 0 1 1 } spot set
init-xml-stack init-ns-stack ;
UNION: any-tag tag contained-tag ;
TUPLE: notags ;
M: notags error.
drop "XML document lacks a main tag" print ;
"XML document lacks a main tag" print ;
TUPLE: multitags ;
M: multitags error.
drop "XML document contains multiple main tags" print ;
"XML document contains multiple tags" print ;
: make-xml-doc ( prolog seq -- xml-doc )
dup [ any-tag? ] find
: make-xml-doc ( seq -- xml-doc )
prolog-data get swap dup [ any-tag? ] find
>r dup -1 = [ <notags> throw ] when
swap cut 1 tail
dup [ any-tag? ] contains? [ <multitags> throw ] when r>
@ -260,7 +269,8 @@ M: multitags error.
xml-stack get
dup length 1 = [ <unclosed> throw ] unless
first second
] with-scope make-xml-doc ;
make-xml-doc
] with-scope ;
UNION: xml-parse-error multitags notags xml-error extra-attrs nonexist-ns
not-yes/no unclosed mismatched xml-string-error expected no-entity ;

View File

@ -9,9 +9,9 @@ SYMBOL: xml-file
contents string>xml xml-file set ] unit-test
[ "1.0" ] [ xml-file get xml-doc-prolog prolog-version ] unit-test
[ f ] [ xml-file get xml-doc-prolog prolog-standalone ] unit-test
[ "a" ] [ xml-file get tag-name name-space ] unit-test
[ "http://www.hello.com" ] [ xml-file get tag-name name-url ] unit-test
[ V{ "that" } ] [ T{ name f "" "this" "http://d.de" } xml-file get
[ "a" ] [ xml-file get name-space ] unit-test
[ "http://www.hello.com" ] [ xml-file get name-url ] unit-test
[ { "that" } ] [ T{ name f "" "this" "http://d.de" } xml-file get
tag-props hash ] unit-test
[ t ] [ xml-file get tag-children second contained-tag? ] unit-test
[ t ] [ [ "<a></b>" string>xml ] catch xml-parse-error? ] unit-test

View File

@ -165,12 +165,14 @@ TUPLE: reference name ;
<reference> , SBUF" " clone incr-spot ;
: (parse-text) ( sbuf -- )
{
{ [ more? not ] [ >string , ] } ! should this be an error?
{ [ char CHAR: < = ] [ >string , ] }
{ [ char CHAR: & = ] [ parse-entity (parse-text) ] }
{ [ char CHAR: % = ] [ parse-reference (parse-text) ] }
{ [ t ] [ char parsed-ch (parse-text) ] }
char {
{ [ dup not ] [ drop >string , ] } ! should this be an error?
{ [ dup CHAR: < = ] [ drop >string , ] }
{ [ dup CHAR: & = ]
[ drop parse-entity (parse-text) ] }
{ [ dup CHAR: % = ]
[ drop parse-reference (parse-text) ] }
{ [ t ] [ parsed-ch (parse-text) ] }
} cond ;
: parse-text ( -- array )
@ -183,6 +185,17 @@ C: name ( space tag -- name )
[ set-name-tag ] keep
[ set-name-space ] keep ;
: get-version ( -- string )
prolog-data get prolog-version ;
: name-start-char? ( char -- ? )
get-version "1.0" =
[ 1.0name-start-char? ] [ 1.1name-start-char? ] if ;
: name-char? ( char -- ? )
get-version "1.0" =
[ 1.0name-char? ] [ 1.1name-char? ] if ;
: (parse-name) ( -- str )
char dup name-start-char? [
incr-spot ch>string [ name-char? not ] take-until append

View File

@ -2,39 +2,20 @@
! See http://factorcode.org/license.txt for BSD license.
IN: xml
USING: kernel namespaces sequences words io errors hashtables
strings parser arrays ;
! * Easy XML generation for more literal things
! should this be rewritten?
: text ( string -- )
chars>entities add-child ;
: tag ( string attr-quot contents-quot -- )
>r swap >r make-hash r> swap r>
-rot dupd <opener> process
slip
<closer> process ; inline
: comment ( string -- )
<comment> add-child ;
: make-xml ( quot -- vector )
#! Produces a tree of XML from a quotation to generate it
[ init-xml call xml-stack get first second ] with-scope ; inline
strings parser arrays generic ;
! * System for words specialized on tag names
TUPLE: process-missing process tag ;
M: process-missing error.
"Tag <" write
process-missing-tag tag-name write
process-missing-tag print-name
"> not implemented on process process " write
dup process-missing-process word-name print ;
: run-process ( tag word -- )
2dup "xtable" word-prop
>r dup tag-name name-tag r> hash* [ 2nip call ] [
>r dup name-tag r> hash* [ 2nip call ] [
drop <process-missing> throw
] if ;
@ -70,3 +51,36 @@ M: process-missing error.
: first-child-tag ( tag -- tag )
tag-children [ any-tag? ] find nip ;
! * Utilities for searching through XML documents
! These all work from the outside in, top to bottom.
: with-delegate ( object quot -- object )
over clone >r >r delegate r> call r>
[ set-delegate ] keep ; inline
GENERIC: (xml-each) ( quot tag -- ) inline
M: tag (xml-each)
[ swap call ] 2keep
tag-children [ (xml-each) ] each-with ;
M: object (xml-each)
swap call ;
M: xml-doc (xml-each)
delegate (xml-each) ;
: xml-each ( tag quot -- ) swap (xml-each) ; inline
GENERIC: (xml-map) ( quot tag -- tag ) inline
M: tag (xml-map)
clone over >r swap call r>
swap [ tag-children [ (xml-map) ] map-with ] keep
[ set-tag-children ] keep ;
M: object (xml-map)
swap call ;
M: xml-doc (xml-map)
[ (xml-map) ] with-delegate ;
: xml-map ( tag quot -- tag ) swap (xml-map) ; inline
! : xml-subset ( tag quot -- tag )
! V{ } clone rot [ ! this is wrong
! [ swap >r call [ r> push ] [ r> 2drop ] if ] 3keep drop
! ] xml-each 2drop ;

View File

@ -24,17 +24,17 @@ M: string (xml>string) chars>entities % ;
M: contained-tag (xml>string)
CHAR: < ,
dup tag-name print-name
dup print-name
tag-props print-props
"/>" % ;
M: tag (xml>string)
CHAR: < ,
dup tag-name print-name
dup print-name
dup tag-props print-props
CHAR: > ,
dup tag-children [ (xml>string) ] each
"</" % tag-name print-name CHAR: > , ;
"</" % print-name CHAR: > , ;
M: comment (xml>string)
"<!--" % comment-text % "-->" % ;