XML parses entities now
parent
96d729c464
commit
72a3d309f9
|
@ -3,7 +3,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml.utilities kernel assocs xml.generator math.order
|
||||
strings sequences xml.data xml.writer
|
||||
io.streams.string combinators xml xml.entities io.files io
|
||||
io.streams.string combinators xml xml.entities.html io.files io
|
||||
http.client namespaces make xml.generator hashtables
|
||||
calendar.format accessors continuations urls present ;
|
||||
IN: syndication
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces make kernel assocs sequences fry ;
|
||||
USING: namespaces make kernel assocs sequences fry values
|
||||
io.files io.encodings.binary ;
|
||||
IN: xml.entities
|
||||
|
||||
: entities-out
|
||||
|
@ -36,265 +37,7 @@ IN: xml.entities
|
|||
{ "quot" CHAR: " }
|
||||
} ;
|
||||
|
||||
: html-entities
|
||||
#! generated from:
|
||||
#! http://www.w3.org/TR/REC-html40/sgml/entities.html
|
||||
H{
|
||||
{ "nbsp" 160 }
|
||||
{ "iexcl" 161 }
|
||||
{ "cent" 162 }
|
||||
{ "pound" 163 }
|
||||
{ "curren" 164 }
|
||||
{ "yen" 165 }
|
||||
{ "brvbar" 166 }
|
||||
{ "sect" 167 }
|
||||
{ "uml" 168 }
|
||||
{ "copy" 169 }
|
||||
{ "ordf" 170 }
|
||||
{ "laquo" 171 }
|
||||
{ "not" 172 }
|
||||
{ "shy" 173 }
|
||||
{ "reg" 174 }
|
||||
{ "macr" 175 }
|
||||
{ "deg" 176 }
|
||||
{ "plusmn" 177 }
|
||||
{ "sup2" 178 }
|
||||
{ "sup3" 179 }
|
||||
{ "acute" 180 }
|
||||
{ "micro" 181 }
|
||||
{ "para" 182 }
|
||||
{ "middot" 183 }
|
||||
{ "cedil" 184 }
|
||||
{ "sup1" 185 }
|
||||
{ "ordm" 186 }
|
||||
{ "raquo" 187 }
|
||||
{ "frac14" 188 }
|
||||
{ "frac12" 189 }
|
||||
{ "frac34" 190 }
|
||||
{ "iquest" 191 }
|
||||
{ "Agrave" 192 }
|
||||
{ "Aacute" 193 }
|
||||
{ "Acirc" 194 }
|
||||
{ "Atilde" 195 }
|
||||
{ "Auml" 196 }
|
||||
{ "Aring" 197 }
|
||||
{ "AElig" 198 }
|
||||
{ "Ccedil" 199 }
|
||||
{ "Egrave" 200 }
|
||||
{ "Eacute" 201 }
|
||||
{ "Ecirc" 202 }
|
||||
{ "Euml" 203 }
|
||||
{ "Igrave" 204 }
|
||||
{ "Iacute" 205 }
|
||||
{ "Icirc" 206 }
|
||||
{ "Iuml" 207 }
|
||||
{ "ETH" 208 }
|
||||
{ "Ntilde" 209 }
|
||||
{ "Ograve" 210 }
|
||||
{ "Oacute" 211 }
|
||||
{ "Ocirc" 212 }
|
||||
{ "Otilde" 213 }
|
||||
{ "Ouml" 214 }
|
||||
{ "times" 215 }
|
||||
{ "Oslash" 216 }
|
||||
{ "Ugrave" 217 }
|
||||
{ "Uacute" 218 }
|
||||
{ "Ucirc" 219 }
|
||||
{ "Uuml" 220 }
|
||||
{ "Yacute" 221 }
|
||||
{ "THORN" 222 }
|
||||
{ "szlig" 223 }
|
||||
{ "agrave" 224 }
|
||||
{ "aacute" 225 }
|
||||
{ "acirc" 226 }
|
||||
{ "atilde" 227 }
|
||||
{ "auml" 228 }
|
||||
{ "aring" 229 }
|
||||
{ "aelig" 230 }
|
||||
{ "ccedil" 231 }
|
||||
{ "egrave" 232 }
|
||||
{ "eacute" 233 }
|
||||
{ "ecirc" 234 }
|
||||
{ "euml" 235 }
|
||||
{ "igrave" 236 }
|
||||
{ "iacute" 237 }
|
||||
{ "icirc" 238 }
|
||||
{ "iuml" 239 }
|
||||
{ "eth" 240 }
|
||||
{ "ntilde" 241 }
|
||||
{ "ograve" 242 }
|
||||
{ "oacute" 243 }
|
||||
{ "ocirc" 244 }
|
||||
{ "otilde" 245 }
|
||||
{ "ouml" 246 }
|
||||
{ "divide" 247 }
|
||||
{ "oslash" 248 }
|
||||
{ "ugrave" 249 }
|
||||
{ "uacute" 250 }
|
||||
{ "ucirc" 251 }
|
||||
{ "uuml" 252 }
|
||||
{ "yacute" 253 }
|
||||
{ "thorn" 254 }
|
||||
{ "yuml" 255 }
|
||||
{ "fnof" 402 }
|
||||
{ "Alpha" 913 }
|
||||
{ "Beta" 914 }
|
||||
{ "Gamma" 915 }
|
||||
{ "Delta" 916 }
|
||||
{ "Epsilon" 917 }
|
||||
{ "Zeta" 918 }
|
||||
{ "Eta" 919 }
|
||||
{ "Theta" 920 }
|
||||
{ "Iota" 921 }
|
||||
{ "Kappa" 922 }
|
||||
{ "Lambda" 923 }
|
||||
{ "Mu" 924 }
|
||||
{ "Nu" 925 }
|
||||
{ "Xi" 926 }
|
||||
{ "Omicron" 927 }
|
||||
{ "Pi" 928 }
|
||||
{ "Rho" 929 }
|
||||
{ "Sigma" 931 }
|
||||
{ "Tau" 932 }
|
||||
{ "Upsilon" 933 }
|
||||
{ "Phi" 934 }
|
||||
{ "Chi" 935 }
|
||||
{ "Psi" 936 }
|
||||
{ "Omega" 937 }
|
||||
{ "alpha" 945 }
|
||||
{ "beta" 946 }
|
||||
{ "gamma" 947 }
|
||||
{ "delta" 948 }
|
||||
{ "epsilon" 949 }
|
||||
{ "zeta" 950 }
|
||||
{ "eta" 951 }
|
||||
{ "theta" 952 }
|
||||
{ "iota" 953 }
|
||||
{ "kappa" 954 }
|
||||
{ "lambda" 955 }
|
||||
{ "mu" 956 }
|
||||
{ "nu" 957 }
|
||||
{ "xi" 958 }
|
||||
{ "omicron" 959 }
|
||||
{ "pi" 960 }
|
||||
{ "rho" 961 }
|
||||
{ "sigmaf" 962 }
|
||||
{ "sigma" 963 }
|
||||
{ "tau" 964 }
|
||||
{ "upsilon" 965 }
|
||||
{ "phi" 966 }
|
||||
{ "chi" 967 }
|
||||
{ "psi" 968 }
|
||||
{ "omega" 969 }
|
||||
{ "thetasym" 977 }
|
||||
{ "upsih" 978 }
|
||||
{ "piv" 982 }
|
||||
{ "bull" 8226 }
|
||||
{ "hellip" 8230 }
|
||||
{ "prime" 8242 }
|
||||
{ "Prime" 8243 }
|
||||
{ "oline" 8254 }
|
||||
{ "frasl" 8260 }
|
||||
{ "weierp" 8472 }
|
||||
{ "image" 8465 }
|
||||
{ "real" 8476 }
|
||||
{ "trade" 8482 }
|
||||
{ "alefsym" 8501 }
|
||||
{ "larr" 8592 }
|
||||
{ "uarr" 8593 }
|
||||
{ "rarr" 8594 }
|
||||
{ "darr" 8595 }
|
||||
{ "harr" 8596 }
|
||||
{ "crarr" 8629 }
|
||||
{ "lArr" 8656 }
|
||||
{ "uArr" 8657 }
|
||||
{ "rArr" 8658 }
|
||||
{ "dArr" 8659 }
|
||||
{ "hArr" 8660 }
|
||||
{ "forall" 8704 }
|
||||
{ "part" 8706 }
|
||||
{ "exist" 8707 }
|
||||
{ "empty" 8709 }
|
||||
{ "nabla" 8711 }
|
||||
{ "isin" 8712 }
|
||||
{ "notin" 8713 }
|
||||
{ "ni" 8715 }
|
||||
{ "prod" 8719 }
|
||||
{ "sum" 8721 }
|
||||
{ "minus" 8722 }
|
||||
{ "lowast" 8727 }
|
||||
{ "radic" 8730 }
|
||||
{ "prop" 8733 }
|
||||
{ "infin" 8734 }
|
||||
{ "ang" 8736 }
|
||||
{ "and" 8743 }
|
||||
{ "or" 8744 }
|
||||
{ "cap" 8745 }
|
||||
{ "cup" 8746 }
|
||||
{ "int" 8747 }
|
||||
{ "there4" 8756 }
|
||||
{ "sim" 8764 }
|
||||
{ "cong" 8773 }
|
||||
{ "asymp" 8776 }
|
||||
{ "ne" 8800 }
|
||||
{ "equiv" 8801 }
|
||||
{ "le" 8804 }
|
||||
{ "ge" 8805 }
|
||||
{ "sub" 8834 }
|
||||
{ "sup" 8835 }
|
||||
{ "nsub" 8836 }
|
||||
{ "sube" 8838 }
|
||||
{ "supe" 8839 }
|
||||
{ "oplus" 8853 }
|
||||
{ "otimes" 8855 }
|
||||
{ "perp" 8869 }
|
||||
{ "sdot" 8901 }
|
||||
{ "lceil" 8968 }
|
||||
{ "rceil" 8969 }
|
||||
{ "lfloor" 8970 }
|
||||
{ "rfloor" 8971 }
|
||||
{ "lang" 9001 }
|
||||
{ "rang" 9002 }
|
||||
{ "loz" 9674 }
|
||||
{ "spades" 9824 }
|
||||
{ "clubs" 9827 }
|
||||
{ "hearts" 9829 }
|
||||
{ "diams" 9830 }
|
||||
{ "OElig" 338 }
|
||||
{ "oelig" 339 }
|
||||
{ "Scaron" 352 }
|
||||
{ "scaron" 353 }
|
||||
{ "Yuml" 376 }
|
||||
{ "circ" 710 }
|
||||
{ "tilde" 732 }
|
||||
{ "ensp" 8194 }
|
||||
{ "emsp" 8195 }
|
||||
{ "thinsp" 8201 }
|
||||
{ "zwnj" 8204 }
|
||||
{ "zwj" 8205 }
|
||||
{ "lrm" 8206 }
|
||||
{ "rlm" 8207 }
|
||||
{ "ndash" 8211 }
|
||||
{ "mdash" 8212 }
|
||||
{ "lsquo" 8216 }
|
||||
{ "rsquo" 8217 }
|
||||
{ "sbquo" 8218 }
|
||||
{ "ldquo" 8220 }
|
||||
{ "rdquo" 8221 }
|
||||
{ "bdquo" 8222 }
|
||||
{ "dagger" 8224 }
|
||||
{ "Dagger" 8225 }
|
||||
{ "permil" 8240 }
|
||||
{ "lsaquo" 8249 }
|
||||
{ "rsaquo" 8250 }
|
||||
{ "euro" 8364 }
|
||||
} ;
|
||||
|
||||
SYMBOL: extra-entities
|
||||
f extra-entities set-global
|
||||
|
||||
: with-entities ( entities quot -- )
|
||||
[ swap extra-entities set call ] with-scope ; inline
|
||||
|
||||
: with-html-entities ( quot -- )
|
||||
html-entities swap with-entities ; inline
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,4 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test xml.entities.html ;
|
||||
IN: xml.entities.html.tests
|
|
@ -0,0 +1,22 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs io.encodings.binary io.files kernel namespaces sequences
|
||||
values xml xml.entities ;
|
||||
IN: xml.entities.html
|
||||
|
||||
VALUE: html-entities
|
||||
|
||||
: read-entities-file ( file -- table )
|
||||
f swap binary <file-reader>
|
||||
[ 2drop extra-entities get ] sax ;
|
||||
|
||||
: get-html ( -- table )
|
||||
{ "lat1" "special" "symbol" } [
|
||||
"resource:basis/xml/entities/html/xhtml-"
|
||||
swap ".ent" 3append read-entities-file
|
||||
] map first3 assoc-union assoc-union ;
|
||||
|
||||
get-html to: html-entities
|
||||
|
||||
: with-html-entities ( quot -- )
|
||||
html-entities swap with-entities ; inline
|
|
@ -0,0 +1,196 @@
|
|||
<!-- Portions (C) International Organization for Standardization 1986
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLlat1 PUBLIC
|
||||
"-//W3C//ENTITIES Latin 1 for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
|
||||
%HTMLlat1;
|
||||
-->
|
||||
|
||||
<!ENTITY nbsp " "> <!-- no-break space = non-breaking space,
|
||||
U+00A0 ISOnum -->
|
||||
<!ENTITY iexcl "¡"> <!-- inverted exclamation mark, U+00A1 ISOnum -->
|
||||
<!ENTITY cent "¢"> <!-- cent sign, U+00A2 ISOnum -->
|
||||
<!ENTITY pound "£"> <!-- pound sign, U+00A3 ISOnum -->
|
||||
<!ENTITY curren "¤"> <!-- currency sign, U+00A4 ISOnum -->
|
||||
<!ENTITY yen "¥"> <!-- yen sign = yuan sign, U+00A5 ISOnum -->
|
||||
<!ENTITY brvbar "¦"> <!-- broken bar = broken vertical bar,
|
||||
U+00A6 ISOnum -->
|
||||
<!ENTITY sect "§"> <!-- section sign, U+00A7 ISOnum -->
|
||||
<!ENTITY uml "¨"> <!-- diaeresis = spacing diaeresis,
|
||||
U+00A8 ISOdia -->
|
||||
<!ENTITY copy "©"> <!-- copyright sign, U+00A9 ISOnum -->
|
||||
<!ENTITY ordf "ª"> <!-- feminine ordinal indicator, U+00AA ISOnum -->
|
||||
<!ENTITY laquo "«"> <!-- left-pointing double angle quotation mark
|
||||
= left pointing guillemet, U+00AB ISOnum -->
|
||||
<!ENTITY not "¬"> <!-- not sign = angled dash,
|
||||
U+00AC ISOnum -->
|
||||
<!ENTITY shy "­"> <!-- soft hyphen = discretionary hyphen,
|
||||
U+00AD ISOnum -->
|
||||
<!ENTITY reg "®"> <!-- registered sign = registered trade mark sign,
|
||||
U+00AE ISOnum -->
|
||||
<!ENTITY macr "¯"> <!-- macron = spacing macron = overline
|
||||
= APL overbar, U+00AF ISOdia -->
|
||||
<!ENTITY deg "°"> <!-- degree sign, U+00B0 ISOnum -->
|
||||
<!ENTITY plusmn "±"> <!-- plus-minus sign = plus-or-minus sign,
|
||||
U+00B1 ISOnum -->
|
||||
<!ENTITY sup2 "²"> <!-- superscript two = superscript digit two
|
||||
= squared, U+00B2 ISOnum -->
|
||||
<!ENTITY sup3 "³"> <!-- superscript three = superscript digit three
|
||||
= cubed, U+00B3 ISOnum -->
|
||||
<!ENTITY acute "´"> <!-- acute accent = spacing acute,
|
||||
U+00B4 ISOdia -->
|
||||
<!ENTITY micro "µ"> <!-- micro sign, U+00B5 ISOnum -->
|
||||
<!ENTITY para "¶"> <!-- pilcrow sign = paragraph sign,
|
||||
U+00B6 ISOnum -->
|
||||
<!ENTITY middot "·"> <!-- middle dot = Georgian comma
|
||||
= Greek middle dot, U+00B7 ISOnum -->
|
||||
<!ENTITY cedil "¸"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
|
||||
<!ENTITY sup1 "¹"> <!-- superscript one = superscript digit one,
|
||||
U+00B9 ISOnum -->
|
||||
<!ENTITY ordm "º"> <!-- masculine ordinal indicator,
|
||||
U+00BA ISOnum -->
|
||||
<!ENTITY raquo "»"> <!-- right-pointing double angle quotation mark
|
||||
= right pointing guillemet, U+00BB ISOnum -->
|
||||
<!ENTITY frac14 "¼"> <!-- vulgar fraction one quarter
|
||||
= fraction one quarter, U+00BC ISOnum -->
|
||||
<!ENTITY frac12 "½"> <!-- vulgar fraction one half
|
||||
= fraction one half, U+00BD ISOnum -->
|
||||
<!ENTITY frac34 "¾"> <!-- vulgar fraction three quarters
|
||||
= fraction three quarters, U+00BE ISOnum -->
|
||||
<!ENTITY iquest "¿"> <!-- inverted question mark
|
||||
= turned question mark, U+00BF ISOnum -->
|
||||
<!ENTITY Agrave "À"> <!-- latin capital letter A with grave
|
||||
= latin capital letter A grave,
|
||||
U+00C0 ISOlat1 -->
|
||||
<!ENTITY Aacute "Á"> <!-- latin capital letter A with acute,
|
||||
U+00C1 ISOlat1 -->
|
||||
<!ENTITY Acirc "Â"> <!-- latin capital letter A with circumflex,
|
||||
U+00C2 ISOlat1 -->
|
||||
<!ENTITY Atilde "Ã"> <!-- latin capital letter A with tilde,
|
||||
U+00C3 ISOlat1 -->
|
||||
<!ENTITY Auml "Ä"> <!-- latin capital letter A with diaeresis,
|
||||
U+00C4 ISOlat1 -->
|
||||
<!ENTITY Aring "Å"> <!-- latin capital letter A with ring above
|
||||
= latin capital letter A ring,
|
||||
U+00C5 ISOlat1 -->
|
||||
<!ENTITY AElig "Æ"> <!-- latin capital letter AE
|
||||
= latin capital ligature AE,
|
||||
U+00C6 ISOlat1 -->
|
||||
<!ENTITY Ccedil "Ç"> <!-- latin capital letter C with cedilla,
|
||||
U+00C7 ISOlat1 -->
|
||||
<!ENTITY Egrave "È"> <!-- latin capital letter E with grave,
|
||||
U+00C8 ISOlat1 -->
|
||||
<!ENTITY Eacute "É"> <!-- latin capital letter E with acute,
|
||||
U+00C9 ISOlat1 -->
|
||||
<!ENTITY Ecirc "Ê"> <!-- latin capital letter E with circumflex,
|
||||
U+00CA ISOlat1 -->
|
||||
<!ENTITY Euml "Ë"> <!-- latin capital letter E with diaeresis,
|
||||
U+00CB ISOlat1 -->
|
||||
<!ENTITY Igrave "Ì"> <!-- latin capital letter I with grave,
|
||||
U+00CC ISOlat1 -->
|
||||
<!ENTITY Iacute "Í"> <!-- latin capital letter I with acute,
|
||||
U+00CD ISOlat1 -->
|
||||
<!ENTITY Icirc "Î"> <!-- latin capital letter I with circumflex,
|
||||
U+00CE ISOlat1 -->
|
||||
<!ENTITY Iuml "Ï"> <!-- latin capital letter I with diaeresis,
|
||||
U+00CF ISOlat1 -->
|
||||
<!ENTITY ETH "Ð"> <!-- latin capital letter ETH, U+00D0 ISOlat1 -->
|
||||
<!ENTITY Ntilde "Ñ"> <!-- latin capital letter N with tilde,
|
||||
U+00D1 ISOlat1 -->
|
||||
<!ENTITY Ograve "Ò"> <!-- latin capital letter O with grave,
|
||||
U+00D2 ISOlat1 -->
|
||||
<!ENTITY Oacute "Ó"> <!-- latin capital letter O with acute,
|
||||
U+00D3 ISOlat1 -->
|
||||
<!ENTITY Ocirc "Ô"> <!-- latin capital letter O with circumflex,
|
||||
U+00D4 ISOlat1 -->
|
||||
<!ENTITY Otilde "Õ"> <!-- latin capital letter O with tilde,
|
||||
U+00D5 ISOlat1 -->
|
||||
<!ENTITY Ouml "Ö"> <!-- latin capital letter O with diaeresis,
|
||||
U+00D6 ISOlat1 -->
|
||||
<!ENTITY times "×"> <!-- multiplication sign, U+00D7 ISOnum -->
|
||||
<!ENTITY Oslash "Ø"> <!-- latin capital letter O with stroke
|
||||
= latin capital letter O slash,
|
||||
U+00D8 ISOlat1 -->
|
||||
<!ENTITY Ugrave "Ù"> <!-- latin capital letter U with grave,
|
||||
U+00D9 ISOlat1 -->
|
||||
<!ENTITY Uacute "Ú"> <!-- latin capital letter U with acute,
|
||||
U+00DA ISOlat1 -->
|
||||
<!ENTITY Ucirc "Û"> <!-- latin capital letter U with circumflex,
|
||||
U+00DB ISOlat1 -->
|
||||
<!ENTITY Uuml "Ü"> <!-- latin capital letter U with diaeresis,
|
||||
U+00DC ISOlat1 -->
|
||||
<!ENTITY Yacute "Ý"> <!-- latin capital letter Y with acute,
|
||||
U+00DD ISOlat1 -->
|
||||
<!ENTITY THORN "Þ"> <!-- latin capital letter THORN,
|
||||
U+00DE ISOlat1 -->
|
||||
<!ENTITY szlig "ß"> <!-- latin small letter sharp s = ess-zed,
|
||||
U+00DF ISOlat1 -->
|
||||
<!ENTITY agrave "à"> <!-- latin small letter a with grave
|
||||
= latin small letter a grave,
|
||||
U+00E0 ISOlat1 -->
|
||||
<!ENTITY aacute "á"> <!-- latin small letter a with acute,
|
||||
U+00E1 ISOlat1 -->
|
||||
<!ENTITY acirc "â"> <!-- latin small letter a with circumflex,
|
||||
U+00E2 ISOlat1 -->
|
||||
<!ENTITY atilde "ã"> <!-- latin small letter a with tilde,
|
||||
U+00E3 ISOlat1 -->
|
||||
<!ENTITY auml "ä"> <!-- latin small letter a with diaeresis,
|
||||
U+00E4 ISOlat1 -->
|
||||
<!ENTITY aring "å"> <!-- latin small letter a with ring above
|
||||
= latin small letter a ring,
|
||||
U+00E5 ISOlat1 -->
|
||||
<!ENTITY aelig "æ"> <!-- latin small letter ae
|
||||
= latin small ligature ae, U+00E6 ISOlat1 -->
|
||||
<!ENTITY ccedil "ç"> <!-- latin small letter c with cedilla,
|
||||
U+00E7 ISOlat1 -->
|
||||
<!ENTITY egrave "è"> <!-- latin small letter e with grave,
|
||||
U+00E8 ISOlat1 -->
|
||||
<!ENTITY eacute "é"> <!-- latin small letter e with acute,
|
||||
U+00E9 ISOlat1 -->
|
||||
<!ENTITY ecirc "ê"> <!-- latin small letter e with circumflex,
|
||||
U+00EA ISOlat1 -->
|
||||
<!ENTITY euml "ë"> <!-- latin small letter e with diaeresis,
|
||||
U+00EB ISOlat1 -->
|
||||
<!ENTITY igrave "ì"> <!-- latin small letter i with grave,
|
||||
U+00EC ISOlat1 -->
|
||||
<!ENTITY iacute "í"> <!-- latin small letter i with acute,
|
||||
U+00ED ISOlat1 -->
|
||||
<!ENTITY icirc "î"> <!-- latin small letter i with circumflex,
|
||||
U+00EE ISOlat1 -->
|
||||
<!ENTITY iuml "ï"> <!-- latin small letter i with diaeresis,
|
||||
U+00EF ISOlat1 -->
|
||||
<!ENTITY eth "ð"> <!-- latin small letter eth, U+00F0 ISOlat1 -->
|
||||
<!ENTITY ntilde "ñ"> <!-- latin small letter n with tilde,
|
||||
U+00F1 ISOlat1 -->
|
||||
<!ENTITY ograve "ò"> <!-- latin small letter o with grave,
|
||||
U+00F2 ISOlat1 -->
|
||||
<!ENTITY oacute "ó"> <!-- latin small letter o with acute,
|
||||
U+00F3 ISOlat1 -->
|
||||
<!ENTITY ocirc "ô"> <!-- latin small letter o with circumflex,
|
||||
U+00F4 ISOlat1 -->
|
||||
<!ENTITY otilde "õ"> <!-- latin small letter o with tilde,
|
||||
U+00F5 ISOlat1 -->
|
||||
<!ENTITY ouml "ö"> <!-- latin small letter o with diaeresis,
|
||||
U+00F6 ISOlat1 -->
|
||||
<!ENTITY divide "÷"> <!-- division sign, U+00F7 ISOnum -->
|
||||
<!ENTITY oslash "ø"> <!-- latin small letter o with stroke,
|
||||
= latin small letter o slash,
|
||||
U+00F8 ISOlat1 -->
|
||||
<!ENTITY ugrave "ù"> <!-- latin small letter u with grave,
|
||||
U+00F9 ISOlat1 -->
|
||||
<!ENTITY uacute "ú"> <!-- latin small letter u with acute,
|
||||
U+00FA ISOlat1 -->
|
||||
<!ENTITY ucirc "û"> <!-- latin small letter u with circumflex,
|
||||
U+00FB ISOlat1 -->
|
||||
<!ENTITY uuml "ü"> <!-- latin small letter u with diaeresis,
|
||||
U+00FC ISOlat1 -->
|
||||
<!ENTITY yacute "ý"> <!-- latin small letter y with acute,
|
||||
U+00FD ISOlat1 -->
|
||||
<!ENTITY thorn "þ"> <!-- latin small letter thorn,
|
||||
U+00FE ISOlat1 -->
|
||||
<!ENTITY yuml "ÿ"> <!-- latin small letter y with diaeresis,
|
||||
U+00FF ISOlat1 -->
|
|
@ -0,0 +1,80 @@
|
|||
<!-- Special characters for XHTML -->
|
||||
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLspecial PUBLIC
|
||||
"-//W3C//ENTITIES Special for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent">
|
||||
%HTMLspecial;
|
||||
-->
|
||||
|
||||
<!-- Portions (C) International Organization for Standardization 1986:
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
|
||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
|
||||
New names (i.e., not in ISO 8879 list) do not clash with any
|
||||
existing ISO 8879 entity names. ISO 10646 character numbers
|
||||
are given for each character, in hex. values are decimal
|
||||
conversions of the ISO 10646 values and refer to the document
|
||||
character set. Names are Unicode names.
|
||||
-->
|
||||
|
||||
<!-- C0 Controls and Basic Latin -->
|
||||
<!ENTITY quot """> <!-- quotation mark, U+0022 ISOnum -->
|
||||
<!ENTITY amp "&#38;"> <!-- ampersand, U+0026 ISOnum -->
|
||||
<!ENTITY lt "&#60;"> <!-- less-than sign, U+003C ISOnum -->
|
||||
<!ENTITY gt ">"> <!-- greater-than sign, U+003E ISOnum -->
|
||||
<!ENTITY apos "'"> <!-- apostrophe = APL quote, U+0027 ISOnum -->
|
||||
|
||||
<!-- Latin Extended-A -->
|
||||
<!ENTITY OElig "Œ"> <!-- latin capital ligature OE,
|
||||
U+0152 ISOlat2 -->
|
||||
<!ENTITY oelig "œ"> <!-- latin small ligature oe, U+0153 ISOlat2 -->
|
||||
<!-- ligature is a misnomer, this is a separate character in some languages -->
|
||||
<!ENTITY Scaron "Š"> <!-- latin capital letter S with caron,
|
||||
U+0160 ISOlat2 -->
|
||||
<!ENTITY scaron "š"> <!-- latin small letter s with caron,
|
||||
U+0161 ISOlat2 -->
|
||||
<!ENTITY Yuml "Ÿ"> <!-- latin capital letter Y with diaeresis,
|
||||
U+0178 ISOlat2 -->
|
||||
|
||||
<!-- Spacing Modifier Letters -->
|
||||
<!ENTITY circ "ˆ"> <!-- modifier letter circumflex accent,
|
||||
U+02C6 ISOpub -->
|
||||
<!ENTITY tilde "˜"> <!-- small tilde, U+02DC ISOdia -->
|
||||
|
||||
<!-- General Punctuation -->
|
||||
<!ENTITY ensp " "> <!-- en space, U+2002 ISOpub -->
|
||||
<!ENTITY emsp " "> <!-- em space, U+2003 ISOpub -->
|
||||
<!ENTITY thinsp " "> <!-- thin space, U+2009 ISOpub -->
|
||||
<!ENTITY zwnj "‌"> <!-- zero width non-joiner,
|
||||
U+200C NEW RFC 2070 -->
|
||||
<!ENTITY zwj "‍"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
|
||||
<!ENTITY lrm "‎"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
|
||||
<!ENTITY rlm "‏"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
|
||||
<!ENTITY ndash "–"> <!-- en dash, U+2013 ISOpub -->
|
||||
<!ENTITY mdash "—"> <!-- em dash, U+2014 ISOpub -->
|
||||
<!ENTITY lsquo "‘"> <!-- left single quotation mark,
|
||||
U+2018 ISOnum -->
|
||||
<!ENTITY rsquo "’"> <!-- right single quotation mark,
|
||||
U+2019 ISOnum -->
|
||||
<!ENTITY sbquo "‚"> <!-- single low-9 quotation mark, U+201A NEW -->
|
||||
<!ENTITY ldquo "“"> <!-- left double quotation mark,
|
||||
U+201C ISOnum -->
|
||||
<!ENTITY rdquo "”"> <!-- right double quotation mark,
|
||||
U+201D ISOnum -->
|
||||
<!ENTITY bdquo "„"> <!-- double low-9 quotation mark, U+201E NEW -->
|
||||
<!ENTITY dagger "†"> <!-- dagger, U+2020 ISOpub -->
|
||||
<!ENTITY Dagger "‡"> <!-- double dagger, U+2021 ISOpub -->
|
||||
<!ENTITY permil "‰"> <!-- per mille sign, U+2030 ISOtech -->
|
||||
<!ENTITY lsaquo "‹"> <!-- single left-pointing angle quotation mark,
|
||||
U+2039 ISO proposed -->
|
||||
<!-- lsaquo is proposed but not yet ISO standardized -->
|
||||
<!ENTITY rsaquo "›"> <!-- single right-pointing angle quotation mark,
|
||||
U+203A ISO proposed -->
|
||||
<!-- rsaquo is proposed but not yet ISO standardized -->
|
||||
|
||||
<!-- Currency Symbols -->
|
||||
<!ENTITY euro "€"> <!-- euro sign, U+20AC NEW -->
|
|
@ -0,0 +1,237 @@
|
|||
<!-- Mathematical, Greek and Symbolic characters for XHTML -->
|
||||
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLsymbol PUBLIC
|
||||
"-//W3C//ENTITIES Symbols for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent">
|
||||
%HTMLsymbol;
|
||||
-->
|
||||
|
||||
<!-- Portions (C) International Organization for Standardization 1986:
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
|
||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
|
||||
New names (i.e., not in ISO 8879 list) do not clash with any
|
||||
existing ISO 8879 entity names. ISO 10646 character numbers
|
||||
are given for each character, in hex. values are decimal
|
||||
conversions of the ISO 10646 values and refer to the document
|
||||
character set. Names are Unicode names.
|
||||
-->
|
||||
|
||||
<!-- Latin Extended-B -->
|
||||
<!ENTITY fnof "ƒ"> <!-- latin small letter f with hook = function
|
||||
= florin, U+0192 ISOtech -->
|
||||
|
||||
<!-- Greek -->
|
||||
<!ENTITY Alpha "Α"> <!-- greek capital letter alpha, U+0391 -->
|
||||
<!ENTITY Beta "Β"> <!-- greek capital letter beta, U+0392 -->
|
||||
<!ENTITY Gamma "Γ"> <!-- greek capital letter gamma,
|
||||
U+0393 ISOgrk3 -->
|
||||
<!ENTITY Delta "Δ"> <!-- greek capital letter delta,
|
||||
U+0394 ISOgrk3 -->
|
||||
<!ENTITY Epsilon "Ε"> <!-- greek capital letter epsilon, U+0395 -->
|
||||
<!ENTITY Zeta "Ζ"> <!-- greek capital letter zeta, U+0396 -->
|
||||
<!ENTITY Eta "Η"> <!-- greek capital letter eta, U+0397 -->
|
||||
<!ENTITY Theta "Θ"> <!-- greek capital letter theta,
|
||||
U+0398 ISOgrk3 -->
|
||||
<!ENTITY Iota "Ι"> <!-- greek capital letter iota, U+0399 -->
|
||||
<!ENTITY Kappa "Κ"> <!-- greek capital letter kappa, U+039A -->
|
||||
<!ENTITY Lambda "Λ"> <!-- greek capital letter lamda,
|
||||
U+039B ISOgrk3 -->
|
||||
<!ENTITY Mu "Μ"> <!-- greek capital letter mu, U+039C -->
|
||||
<!ENTITY Nu "Ν"> <!-- greek capital letter nu, U+039D -->
|
||||
<!ENTITY Xi "Ξ"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
|
||||
<!ENTITY Omicron "Ο"> <!-- greek capital letter omicron, U+039F -->
|
||||
<!ENTITY Pi "Π"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
|
||||
<!ENTITY Rho "Ρ"> <!-- greek capital letter rho, U+03A1 -->
|
||||
<!-- there is no Sigmaf, and no U+03A2 character either -->
|
||||
<!ENTITY Sigma "Σ"> <!-- greek capital letter sigma,
|
||||
U+03A3 ISOgrk3 -->
|
||||
<!ENTITY Tau "Τ"> <!-- greek capital letter tau, U+03A4 -->
|
||||
<!ENTITY Upsilon "Υ"> <!-- greek capital letter upsilon,
|
||||
U+03A5 ISOgrk3 -->
|
||||
<!ENTITY Phi "Φ"> <!-- greek capital letter phi,
|
||||
U+03A6 ISOgrk3 -->
|
||||
<!ENTITY Chi "Χ"> <!-- greek capital letter chi, U+03A7 -->
|
||||
<!ENTITY Psi "Ψ"> <!-- greek capital letter psi,
|
||||
U+03A8 ISOgrk3 -->
|
||||
<!ENTITY Omega "Ω"> <!-- greek capital letter omega,
|
||||
U+03A9 ISOgrk3 -->
|
||||
|
||||
<!ENTITY alpha "α"> <!-- greek small letter alpha,
|
||||
U+03B1 ISOgrk3 -->
|
||||
<!ENTITY beta "β"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
|
||||
<!ENTITY gamma "γ"> <!-- greek small letter gamma,
|
||||
U+03B3 ISOgrk3 -->
|
||||
<!ENTITY delta "δ"> <!-- greek small letter delta,
|
||||
U+03B4 ISOgrk3 -->
|
||||
<!ENTITY epsilon "ε"> <!-- greek small letter epsilon,
|
||||
U+03B5 ISOgrk3 -->
|
||||
<!ENTITY zeta "ζ"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
|
||||
<!ENTITY eta "η"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
|
||||
<!ENTITY theta "θ"> <!-- greek small letter theta,
|
||||
U+03B8 ISOgrk3 -->
|
||||
<!ENTITY iota "ι"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
|
||||
<!ENTITY kappa "κ"> <!-- greek small letter kappa,
|
||||
U+03BA ISOgrk3 -->
|
||||
<!ENTITY lambda "λ"> <!-- greek small letter lamda,
|
||||
U+03BB ISOgrk3 -->
|
||||
<!ENTITY mu "μ"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
|
||||
<!ENTITY nu "ν"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
|
||||
<!ENTITY xi "ξ"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
|
||||
<!ENTITY omicron "ο"> <!-- greek small letter omicron, U+03BF NEW -->
|
||||
<!ENTITY pi "π"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
|
||||
<!ENTITY rho "ρ"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
|
||||
<!ENTITY sigmaf "ς"> <!-- greek small letter final sigma,
|
||||
U+03C2 ISOgrk3 -->
|
||||
<!ENTITY sigma "σ"> <!-- greek small letter sigma,
|
||||
U+03C3 ISOgrk3 -->
|
||||
<!ENTITY tau "τ"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
|
||||
<!ENTITY upsilon "υ"> <!-- greek small letter upsilon,
|
||||
U+03C5 ISOgrk3 -->
|
||||
<!ENTITY phi "φ"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
|
||||
<!ENTITY chi "χ"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
|
||||
<!ENTITY psi "ψ"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
|
||||
<!ENTITY omega "ω"> <!-- greek small letter omega,
|
||||
U+03C9 ISOgrk3 -->
|
||||
<!ENTITY thetasym "ϑ"> <!-- greek theta symbol,
|
||||
U+03D1 NEW -->
|
||||
<!ENTITY upsih "ϒ"> <!-- greek upsilon with hook symbol,
|
||||
U+03D2 NEW -->
|
||||
<!ENTITY piv "ϖ"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
|
||||
|
||||
<!-- General Punctuation -->
|
||||
<!ENTITY bull "•"> <!-- bullet = black small circle,
|
||||
U+2022 ISOpub -->
|
||||
<!-- bullet is NOT the same as bullet operator, U+2219 -->
|
||||
<!ENTITY hellip "…"> <!-- horizontal ellipsis = three dot leader,
|
||||
U+2026 ISOpub -->
|
||||
<!ENTITY prime "′"> <!-- prime = minutes = feet, U+2032 ISOtech -->
|
||||
<!ENTITY Prime "″"> <!-- double prime = seconds = inches,
|
||||
U+2033 ISOtech -->
|
||||
<!ENTITY oline "‾"> <!-- overline = spacing overscore,
|
||||
U+203E NEW -->
|
||||
<!ENTITY frasl "⁄"> <!-- fraction slash, U+2044 NEW -->
|
||||
|
||||
<!-- Letterlike Symbols -->
|
||||
<!ENTITY weierp "℘"> <!-- script capital P = power set
|
||||
= Weierstrass p, U+2118 ISOamso -->
|
||||
<!ENTITY image "ℑ"> <!-- black-letter capital I = imaginary part,
|
||||
U+2111 ISOamso -->
|
||||
<!ENTITY real "ℜ"> <!-- black-letter capital R = real part symbol,
|
||||
U+211C ISOamso -->
|
||||
<!ENTITY trade "™"> <!-- trade mark sign, U+2122 ISOnum -->
|
||||
<!ENTITY alefsym "ℵ"> <!-- alef symbol = first transfinite cardinal,
|
||||
U+2135 NEW -->
|
||||
<!-- alef symbol is NOT the same as hebrew letter alef,
|
||||
U+05D0 although the same glyph could be used to depict both characters -->
|
||||
|
||||
<!-- Arrows -->
|
||||
<!ENTITY larr "←"> <!-- leftwards arrow, U+2190 ISOnum -->
|
||||
<!ENTITY uarr "↑"> <!-- upwards arrow, U+2191 ISOnum-->
|
||||
<!ENTITY rarr "→"> <!-- rightwards arrow, U+2192 ISOnum -->
|
||||
<!ENTITY darr "↓"> <!-- downwards arrow, U+2193 ISOnum -->
|
||||
<!ENTITY harr "↔"> <!-- left right arrow, U+2194 ISOamsa -->
|
||||
<!ENTITY crarr "↵"> <!-- downwards arrow with corner leftwards
|
||||
= carriage return, U+21B5 NEW -->
|
||||
<!ENTITY lArr "⇐"> <!-- leftwards double arrow, U+21D0 ISOtech -->
|
||||
<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
|
||||
but also does not have any other character for that function. So lArr can
|
||||
be used for 'is implied by' as ISOtech suggests -->
|
||||
<!ENTITY uArr "⇑"> <!-- upwards double arrow, U+21D1 ISOamsa -->
|
||||
<!ENTITY rArr "⇒"> <!-- rightwards double arrow,
|
||||
U+21D2 ISOtech -->
|
||||
<!-- Unicode does not say this is the 'implies' character but does not have
|
||||
another character with this function so rArr can be used for 'implies'
|
||||
as ISOtech suggests -->
|
||||
<!ENTITY dArr "⇓"> <!-- downwards double arrow, U+21D3 ISOamsa -->
|
||||
<!ENTITY hArr "⇔"> <!-- left right double arrow,
|
||||
U+21D4 ISOamsa -->
|
||||
|
||||
<!-- Mathematical Operators -->
|
||||
<!ENTITY forall "∀"> <!-- for all, U+2200 ISOtech -->
|
||||
<!ENTITY part "∂"> <!-- partial differential, U+2202 ISOtech -->
|
||||
<!ENTITY exist "∃"> <!-- there exists, U+2203 ISOtech -->
|
||||
<!ENTITY empty "∅"> <!-- empty set = null set, U+2205 ISOamso -->
|
||||
<!ENTITY nabla "∇"> <!-- nabla = backward difference,
|
||||
U+2207 ISOtech -->
|
||||
<!ENTITY isin "∈"> <!-- element of, U+2208 ISOtech -->
|
||||
<!ENTITY notin "∉"> <!-- not an element of, U+2209 ISOtech -->
|
||||
<!ENTITY ni "∋"> <!-- contains as member, U+220B ISOtech -->
|
||||
<!ENTITY prod "∏"> <!-- n-ary product = product sign,
|
||||
U+220F ISOamsb -->
|
||||
<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
|
||||
the same glyph might be used for both -->
|
||||
<!ENTITY sum "∑"> <!-- n-ary summation, U+2211 ISOamsb -->
|
||||
<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
|
||||
though the same glyph might be used for both -->
|
||||
<!ENTITY minus "−"> <!-- minus sign, U+2212 ISOtech -->
|
||||
<!ENTITY lowast "∗"> <!-- asterisk operator, U+2217 ISOtech -->
|
||||
<!ENTITY radic "√"> <!-- square root = radical sign,
|
||||
U+221A ISOtech -->
|
||||
<!ENTITY prop "∝"> <!-- proportional to, U+221D ISOtech -->
|
||||
<!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech -->
|
||||
<!ENTITY ang "∠"> <!-- angle, U+2220 ISOamso -->
|
||||
<!ENTITY and "∧"> <!-- logical and = wedge, U+2227 ISOtech -->
|
||||
<!ENTITY or "∨"> <!-- logical or = vee, U+2228 ISOtech -->
|
||||
<!ENTITY cap "∩"> <!-- intersection = cap, U+2229 ISOtech -->
|
||||
<!ENTITY cup "∪"> <!-- union = cup, U+222A ISOtech -->
|
||||
<!ENTITY int "∫"> <!-- integral, U+222B ISOtech -->
|
||||
<!ENTITY there4 "∴"> <!-- therefore, U+2234 ISOtech -->
|
||||
<!ENTITY sim "∼"> <!-- tilde operator = varies with = similar to,
|
||||
U+223C ISOtech -->
|
||||
<!-- tilde operator is NOT the same character as the tilde, U+007E,
|
||||
although the same glyph might be used to represent both -->
|
||||
<!ENTITY cong "≅"> <!-- approximately equal to, U+2245 ISOtech -->
|
||||
<!ENTITY asymp "≈"> <!-- almost equal to = asymptotic to,
|
||||
U+2248 ISOamsr -->
|
||||
<!ENTITY ne "≠"> <!-- not equal to, U+2260 ISOtech -->
|
||||
<!ENTITY equiv "≡"> <!-- identical to, U+2261 ISOtech -->
|
||||
<!ENTITY le "≤"> <!-- less-than or equal to, U+2264 ISOtech -->
|
||||
<!ENTITY ge "≥"> <!-- greater-than or equal to,
|
||||
U+2265 ISOtech -->
|
||||
<!ENTITY sub "⊂"> <!-- subset of, U+2282 ISOtech -->
|
||||
<!ENTITY sup "⊃"> <!-- superset of, U+2283 ISOtech -->
|
||||
<!ENTITY nsub "⊄"> <!-- not a subset of, U+2284 ISOamsn -->
|
||||
<!ENTITY sube "⊆"> <!-- subset of or equal to, U+2286 ISOtech -->
|
||||
<!ENTITY supe "⊇"> <!-- superset of or equal to,
|
||||
U+2287 ISOtech -->
|
||||
<!ENTITY oplus "⊕"> <!-- circled plus = direct sum,
|
||||
U+2295 ISOamsb -->
|
||||
<!ENTITY otimes "⊗"> <!-- circled times = vector product,
|
||||
U+2297 ISOamsb -->
|
||||
<!ENTITY perp "⊥"> <!-- up tack = orthogonal to = perpendicular,
|
||||
U+22A5 ISOtech -->
|
||||
<!ENTITY sdot "⋅"> <!-- dot operator, U+22C5 ISOamsb -->
|
||||
<!-- dot operator is NOT the same character as U+00B7 middle dot -->
|
||||
|
||||
<!-- Miscellaneous Technical -->
|
||||
<!ENTITY lceil "⌈"> <!-- left ceiling = APL upstile,
|
||||
U+2308 ISOamsc -->
|
||||
<!ENTITY rceil "⌉"> <!-- right ceiling, U+2309 ISOamsc -->
|
||||
<!ENTITY lfloor "⌊"> <!-- left floor = APL downstile,
|
||||
U+230A ISOamsc -->
|
||||
<!ENTITY rfloor "⌋"> <!-- right floor, U+230B ISOamsc -->
|
||||
<!ENTITY lang "〈"> <!-- left-pointing angle bracket = bra,
|
||||
U+2329 ISOtech -->
|
||||
<!-- lang is NOT the same character as U+003C 'less than sign'
|
||||
or U+2039 'single left-pointing angle quotation mark' -->
|
||||
<!ENTITY rang "〉"> <!-- right-pointing angle bracket = ket,
|
||||
U+232A ISOtech -->
|
||||
<!-- rang is NOT the same character as U+003E 'greater than sign'
|
||||
or U+203A 'single right-pointing angle quotation mark' -->
|
||||
|
||||
<!-- Geometric Shapes -->
|
||||
<!ENTITY loz "◊"> <!-- lozenge, U+25CA ISOpub -->
|
||||
|
||||
<!-- Miscellaneous Symbols -->
|
||||
<!ENTITY spades "♠"> <!-- black spade suit, U+2660 ISOpub -->
|
||||
<!-- black here seems to mean filled as opposed to hollow -->
|
||||
<!ENTITY clubs "♣"> <!-- black club suit = shamrock,
|
||||
U+2663 ISOpub -->
|
||||
<!ENTITY hearts "♥"> <!-- black heart suit = valentine,
|
||||
U+2665 ISOpub -->
|
||||
<!ENTITY diams "♦"> <!-- black diamond suit, U+2666 ISOpub -->
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: xml.tests
|
||||
USING: kernel xml tools.test io namespaces make sequences
|
||||
xml.errors xml.entities parser strings xml.data io.files
|
||||
xml.errors xml.entities.html parser strings xml.data io.files
|
||||
xml.writer xml.utilities state-parser continuations assocs
|
||||
sequences.deep accessors io.streams.string ;
|
||||
|
||||
|
@ -62,3 +62,6 @@ SYMBOL: xml-file
|
|||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
|
||||
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
|
||||
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
|
||||
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
|
|
@ -4,7 +4,7 @@ USING: accessors arrays ascii assocs combinators
|
|||
combinators.short-circuit fry io.encodings io.encodings.iana
|
||||
io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
|
||||
math math.parser namespaces sequences sets splitting state-parser
|
||||
strings xml.char-classes xml.data xml.entities xml.errors ;
|
||||
strings xml.char-classes xml.data xml.entities xml.errors hashtables ;
|
||||
IN: xml.tokenize
|
||||
|
||||
! XML namespace processing: ns = namespace
|
||||
|
@ -74,20 +74,17 @@ SYMBOL: ns-stack
|
|||
|
||||
! -- Parsing strings
|
||||
|
||||
: (parse-entity) ( string -- )
|
||||
: parse-named-entity ( string -- )
|
||||
dup entities at [ , ] [
|
||||
prolog-data get standalone>>
|
||||
[ no-entity ] [
|
||||
dup extra-entities get at
|
||||
[ , ] [ no-entity ] ?if
|
||||
] if
|
||||
dup extra-entities get at
|
||||
[ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish
|
||||
] ?if ;
|
||||
|
||||
: parse-entity ( -- )
|
||||
next CHAR: ; take-char next
|
||||
"#" ?head [
|
||||
"x" ?head 16 10 ? base> ,
|
||||
] [ (parse-entity) ] if ;
|
||||
] [ parse-named-entity ] if ;
|
||||
|
||||
: (parse-char) ( ch -- )
|
||||
get-char {
|
||||
|
@ -100,10 +97,6 @@ SYMBOL: ns-stack
|
|||
: parse-char ( ch -- string )
|
||||
[ (parse-char) ] "" make ;
|
||||
|
||||
: parse-quot ( ch -- string )
|
||||
parse-char get-char
|
||||
[ unclosed-quote ] unless ;
|
||||
|
||||
: parse-text ( -- string )
|
||||
CHAR: < parse-char ;
|
||||
|
||||
|
@ -114,14 +107,18 @@ SYMBOL: ns-stack
|
|||
get-char CHAR: / = dup [ next ] when
|
||||
parse-name swap ;
|
||||
|
||||
: parse-attr-value ( -- seq )
|
||||
get-char dup "'\"" member?
|
||||
[ next parse-quot ] [ quoteless-attr ] if ;
|
||||
: (parse-quote) ( ch -- string )
|
||||
parse-char get-char
|
||||
[ unclosed-quote ] unless ;
|
||||
|
||||
: parse-quote ( -- seq )
|
||||
pass-blank get-char dup "'\"" member?
|
||||
[ next (parse-quote) ] [ quoteless-attr ] if ;
|
||||
|
||||
: parse-attr ( -- )
|
||||
[ parse-name ] with-scope
|
||||
pass-blank CHAR: = expect pass-blank
|
||||
[ parse-attr-value ] with-scope
|
||||
parse-name
|
||||
pass-blank CHAR: = expect
|
||||
parse-quote
|
||||
2array , ;
|
||||
|
||||
: (middle-tag) ( -- )
|
||||
|
@ -157,7 +154,7 @@ SYMBOL: ns-stack
|
|||
: only-blanks ( str -- )
|
||||
[ blank? ] all? [ bad-doctype-decl ] unless ;
|
||||
|
||||
: take-system-literal ( -- str )
|
||||
: take-system-literal ( -- str ) ! replace with parse-quote?
|
||||
pass-blank get-char next {
|
||||
{ CHAR: ' [ "'" take-string ] }
|
||||
{ CHAR: " [ "\"" take-string ] }
|
||||
|
@ -211,15 +208,18 @@ DEFER: direct
|
|||
|
||||
: take-entity-def ( -- entity-name entity-def )
|
||||
" " take-string pass-blank get-char {
|
||||
{ CHAR: ' [ take-system-literal ] }
|
||||
{ CHAR: " [ take-system-literal ] }
|
||||
{ CHAR: ' [ parse-quote ] }
|
||||
{ CHAR: " [ parse-quote ] }
|
||||
[ drop take-external-id ]
|
||||
} case ;
|
||||
|
||||
: associate-entity ( entity-name entity-def -- )
|
||||
swap extra-entities [ ?set-at ] change ;
|
||||
|
||||
: take-entity-decl ( -- entity-decl )
|
||||
pass-blank get-char {
|
||||
{ CHAR: % [ next pass-blank take-entity-def ] }
|
||||
[ drop take-entity-def ]
|
||||
[ drop take-entity-def 2dup associate-entity ]
|
||||
} case
|
||||
">" take-string only-blanks <entity-decl> ;
|
||||
|
||||
|
@ -257,14 +257,22 @@ DEFER: direct
|
|||
: good-version ( version -- version )
|
||||
dup { "1.0" "1.1" } member? [ bad-version ] unless ;
|
||||
|
||||
: prolog-attrs ( alist -- prolog )
|
||||
[ T{ name f "" "version" f } swap at
|
||||
[ good-version ] [ versionless-prolog ] if* ] keep
|
||||
[ T{ name f "" "encoding" f } swap at
|
||||
"UTF-8" or ] keep
|
||||
: prolog-version ( alist -- version )
|
||||
T{ name f "" "version" f } swap at
|
||||
[ good-version ] [ versionless-prolog ] if* ;
|
||||
|
||||
: prolog-encoding ( alist -- encoding )
|
||||
T{ name f "" "encoding" f } swap at "UTF-8" or ;
|
||||
|
||||
: prolog-standalone ( alist -- version )
|
||||
T{ name f "" "standalone" f } swap at
|
||||
[ yes/no>bool ] [ f ] if*
|
||||
<prolog> ;
|
||||
[ yes/no>bool ] [ f ] if* ;
|
||||
|
||||
: prolog-attrs ( alist -- prolog )
|
||||
[ prolog-version ]
|
||||
[ prolog-encoding ]
|
||||
[ prolog-standalone ]
|
||||
tri <prolog> ;
|
||||
|
||||
SYMBOL: string-input?
|
||||
: decode-input-if ( encoding -- )
|
||||
|
@ -288,7 +296,7 @@ SYMBOL: string-input?
|
|||
: make-tag ( -- tag )
|
||||
{
|
||||
{ [ get-char dup CHAR: ! = ] [ drop next direct ] }
|
||||
{ [ CHAR: ? = ] [ next instruct ] }
|
||||
{ [ CHAR: ? = ] [ next instruct ] }
|
||||
[
|
||||
start-tag [ dup add-ns pop-ns <closer> ]
|
||||
[ middle-tag end-tag ] if
|
||||
|
@ -331,19 +339,17 @@ SYMBOL: string-input?
|
|||
"\u0000bb\u0000bf" expect utf8 decode-input
|
||||
CHAR: < expect make-tag ;
|
||||
|
||||
: decode-expecting ( encoding string -- tag )
|
||||
[ decode-input-if next ] [ expect-string ] bi* make-tag ;
|
||||
|
||||
: start-utf16be ( -- tag )
|
||||
utf16be decode-input-if
|
||||
next CHAR: < expect make-tag ;
|
||||
utf16be "<" decode-expecting ;
|
||||
|
||||
: skip-utf16le-bom ( -- tag )
|
||||
utf16le decode-input-if
|
||||
next HEX: FE expect
|
||||
CHAR: < expect make-tag ;
|
||||
utf16le "\u0000fe<" decode-expecting ;
|
||||
|
||||
: skip-utf16be-bom ( -- tag )
|
||||
utf16be decode-input-if
|
||||
next HEX: FF expect
|
||||
CHAR: < expect make-tag ;
|
||||
utf16be "\u0000ff<" decode-expecting ;
|
||||
|
||||
: start-document ( -- tag )
|
||||
get-char {
|
||||
|
@ -353,8 +359,6 @@ SYMBOL: string-input?
|
|||
{ HEX: FF [ skip-utf16le-bom ] }
|
||||
{ HEX: FE [ skip-utf16be-bom ] }
|
||||
{ f [ "" ] }
|
||||
[ dup blank?
|
||||
[ drop pass-blank utf8 decode-input-if CHAR: < expect make-tag ]
|
||||
[ 1string ] if ! Replace with proper error?
|
||||
]
|
||||
[ drop utf8 decode-input-if f ]
|
||||
! Same problem as with <e`>, in the case of XML chunks?
|
||||
} case ;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax kernel xml.data xml.errors
|
||||
xml.writer state-parser xml.tokenize xml.utilities xml.entities
|
||||
strings sequences io ;
|
||||
strings sequences io xml.entities.html ;
|
||||
IN: xml
|
||||
|
||||
HELP: string>xml
|
||||
|
|
|
@ -133,12 +133,12 @@ TUPLE: pull-xml scope ;
|
|||
: sax ( stream quot: ( xml-elem -- ) -- )
|
||||
swap [
|
||||
reset-prolog init-ns-stack
|
||||
start-document call-under
|
||||
start-document [ call-under ] when*
|
||||
sax-loop
|
||||
] state-parse ; inline recursive
|
||||
|
||||
: (read-xml) ( -- )
|
||||
start-document process
|
||||
start-document [ process ] when*
|
||||
[ process ] sax-loop ; inline
|
||||
|
||||
: (read-xml-chunk) ( stream -- prolog seq )
|
||||
|
|
Loading…
Reference in New Issue