Merge branch 'master' of git://factorcode.org/git/factor
commit
b919e1848c
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
|
@ -0,0 +1,18 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: editors io.launcher kernel io.directories.search.windows
|
||||
math.parser namespaces sequences io.files arrays windows.shell32
|
||||
io.directories.search ;
|
||||
IN: editors.notepad
|
||||
|
||||
: notepad-path ( -- path )
|
||||
\ notepad-path get [
|
||||
windows-directory t
|
||||
[ "notepad.exe" tail? ] find-file
|
||||
] unless* ;
|
||||
|
||||
: notepad ( file line -- )
|
||||
drop notepad-path swap 2array run-detached drop ;
|
||||
|
||||
[ notepad ] edit-hook set-global
|
||||
|
|
@ -9,6 +9,11 @@ IN: editors.scite
|
|||
\ scite-path get-global [
|
||||
"Scintilla Text Editor" t
|
||||
[ >lower "scite.exe" tail? ] find-in-program-files
|
||||
|
||||
[
|
||||
"SciTE Source Code Editor" t
|
||||
[ >lower "scite.exe" tail? ] find-in-program-files
|
||||
] unless*
|
||||
] unless* ;
|
||||
|
||||
: scite-command ( file line -- cmd )
|
||||
|
|
|
@ -32,21 +32,21 @@ HELP: find-file
|
|||
HELP: find-in-directories
|
||||
{ $values
|
||||
{ "directories" "a sequence of pathnames" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "path'" "a pathname string" }
|
||||
{ "path'/f" "a pathname string or f" }
|
||||
}
|
||||
{ $description "Finds the first file in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
HELP: find-all-files
|
||||
{ $values
|
||||
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "paths" "a sequence of pathname strings" }
|
||||
{ "paths/f" "a sequence of pathname strings or f" }
|
||||
}
|
||||
{ $description "Finds all files in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
HELP: find-all-in-directories
|
||||
{ $values
|
||||
{ "directories" "a sequence of directory paths" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "paths" "a sequence of pathname strings" }
|
||||
{ "paths/f" "a sequence of pathname strings or f" }
|
||||
}
|
||||
{ $description "Finds all files in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
|
|
|
@ -46,17 +46,21 @@ PRIVATE>
|
|||
[ ] accumulator [ each-file ] dip ;
|
||||
|
||||
: find-file ( path bfs? quot: ( obj -- ? ) -- path/f )
|
||||
[ <directory-iterator> ] dip
|
||||
[ keep and ] curry iterate-directory ; inline
|
||||
'[
|
||||
_ _ _ [ <directory-iterator> ] dip
|
||||
[ keep and ] curry iterate-directory
|
||||
] [ drop f ] recover ; inline
|
||||
|
||||
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths )
|
||||
[ <directory-iterator> ] dip
|
||||
pusher [ [ f ] compose iterate-directory drop ] dip ; inline
|
||||
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths/f )
|
||||
'[
|
||||
_ _ _ [ <directory-iterator> ] dip
|
||||
pusher [ [ f ] compose iterate-directory drop ] dip
|
||||
] [ drop f ] recover ; inline
|
||||
|
||||
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path' )
|
||||
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path'/f )
|
||||
'[ _ _ find-file ] attempt-all ;
|
||||
|
||||
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths )
|
||||
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths/f )
|
||||
'[ _ _ find-all-files ] map concat ;
|
||||
|
||||
os windows? [ "io.directories.search.windows" require ] when
|
||||
|
|
|
@ -5,7 +5,7 @@ io.directories.search ;
|
|||
IN: io.directories.search.windows
|
||||
|
||||
: program-files-directories ( -- array )
|
||||
program-files program-files-x86 2array ; inline
|
||||
program-files program-files-x86 2array harvest ; inline
|
||||
|
||||
: find-in-program-files ( base-directory bfs? quot -- path )
|
||||
[
|
||||
|
|
|
@ -46,3 +46,10 @@ pack strings tools.test ;
|
|||
[ f ] [ "" [ read-c-string ] with-string-reader ] unit-test
|
||||
[ 5 ] [ "FRAM\0\u000005\0\0\0\0\0\0\0" [ read-c-string drop read-u64 ] with-string-reader ] unit-test
|
||||
|
||||
[ 9 ] [ "iic" packed-length ] unit-test
|
||||
[ "iii" read-packed-le ] must-infer
|
||||
[ "iii" read-packed-be ] must-infer
|
||||
[ "iii" read-packed-native ] must-infer
|
||||
[ "iii" unpack-le ] must-infer
|
||||
[ "iii" unpack-be ] must-infer
|
||||
[ "iii" unpack-native ] must-infer
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien alien.c-types arrays assocs byte-arrays io
|
||||
io.binary io.streams.string kernel math math.parser namespaces
|
||||
make parser prettyprint quotations sequences strings vectors
|
||||
words macros math.functions math.bitwise ;
|
||||
words macros math.functions math.bitwise fry ;
|
||||
IN: pack
|
||||
|
||||
SYMBOL: big-endian
|
||||
|
@ -9,6 +11,13 @@ SYMBOL: big-endian
|
|||
: big-endian? ( -- ? )
|
||||
1 <int> *char zero? ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: set-big-endian ( -- )
|
||||
big-endian? big-endian set ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: >endian ( obj n -- str )
|
||||
big-endian get [ >be ] [ >le ] if ; inline
|
||||
|
||||
|
@ -39,6 +48,8 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
: double, ( n -- ) double>bits 8 b, ;
|
||||
: c-string, ( str -- ) % 0 u8, ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: (>128-ber) ( n -- )
|
||||
dup 0 > [
|
||||
[ HEX: 7f bitand HEX: 80 bitor , ] keep -7 shift
|
||||
|
@ -47,6 +58,8 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
drop
|
||||
] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: >128-ber ( n -- str )
|
||||
[
|
||||
[ HEX: 7f bitand , ] keep -7 shift
|
||||
|
@ -70,7 +83,7 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
: read-s32 ( -- n ) 4 read-signed ;
|
||||
: read-u32 ( -- n ) 4 read-unsigned ;
|
||||
: read-s64 ( -- n ) 8 read-signed ;
|
||||
: read-u64 ( -- n ) 8 read-signed ;
|
||||
: read-u64 ( -- n ) 8 read-unsigned ;
|
||||
: read-s128 ( -- n ) 16 read-signed ;
|
||||
: read-u128 ( -- n ) 16 read-unsigned ;
|
||||
|
||||
|
@ -81,7 +94,7 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
8 read endian> bits>double ;
|
||||
|
||||
: read-c-string ( -- str/f )
|
||||
"\0" read-until [ drop f ] unless ;
|
||||
"\0" read-until swap and ;
|
||||
|
||||
: read-c-string* ( n -- str/f )
|
||||
read [ zero? ] trim-right [ f ] when-empty ;
|
||||
|
@ -94,7 +107,9 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
: read-128-ber ( -- n )
|
||||
0 (read-128-ber) ;
|
||||
|
||||
: pack-table ( -- hash )
|
||||
<PRIVATE
|
||||
|
||||
CONSTANT: pack-table
|
||||
H{
|
||||
{ CHAR: c s8, }
|
||||
{ CHAR: C u8, }
|
||||
|
@ -110,9 +125,9 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
{ CHAR: F float, }
|
||||
{ CHAR: d double, }
|
||||
{ CHAR: D double, }
|
||||
} ;
|
||||
}
|
||||
|
||||
: unpack-table ( -- hash )
|
||||
CONSTANT: unpack-table
|
||||
H{
|
||||
{ CHAR: c read-s8 }
|
||||
{ CHAR: C read-u8 }
|
||||
|
@ -128,47 +143,79 @@ M: string b, ( n string -- ) heap-size b, ;
|
|||
{ CHAR: F read-float }
|
||||
{ CHAR: d read-double }
|
||||
{ CHAR: D read-double }
|
||||
} ;
|
||||
}
|
||||
|
||||
MACRO: (pack) ( seq str -- quot )
|
||||
[
|
||||
[
|
||||
[
|
||||
swap , pack-table at ,
|
||||
] 2each
|
||||
] [ ] make 1quotation %
|
||||
[ B{ } make ] %
|
||||
] [ ] make ;
|
||||
CONSTANT: packed-length-table
|
||||
H{
|
||||
{ CHAR: c 1 }
|
||||
{ CHAR: C 1 }
|
||||
{ CHAR: s 2 }
|
||||
{ CHAR: S 2 }
|
||||
{ CHAR: t 3 }
|
||||
{ CHAR: T 3 }
|
||||
{ CHAR: i 4 }
|
||||
{ CHAR: I 4 }
|
||||
{ CHAR: q 8 }
|
||||
{ CHAR: Q 8 }
|
||||
{ CHAR: f 4 }
|
||||
{ CHAR: F 4 }
|
||||
{ CHAR: d 8 }
|
||||
{ CHAR: D 8 }
|
||||
}
|
||||
|
||||
MACRO: pack ( seq str -- quot )
|
||||
[ pack-table at 1quotation '[ _ @ ] ] [ ] 2map-as concat
|
||||
'[ _ B{ } make ] ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: pack-native ( seq str -- seq )
|
||||
[
|
||||
big-endian? big-endian set (pack)
|
||||
] with-scope ;
|
||||
[ set-big-endian pack ] with-scope ; inline
|
||||
|
||||
: pack-be ( seq str -- seq )
|
||||
[ big-endian on (pack) ] with-scope ;
|
||||
[ big-endian on pack ] with-scope ; inline
|
||||
|
||||
: pack-le ( seq str -- seq )
|
||||
[ big-endian off (pack) ] with-scope ;
|
||||
[ big-endian off pack ] with-scope ; inline
|
||||
|
||||
<PRIVATE
|
||||
|
||||
MACRO: (unpack) ( str -- quot )
|
||||
[
|
||||
[
|
||||
[ unpack-table at , \ , , ] each
|
||||
] [ ] make
|
||||
1quotation [ { } make ] append
|
||||
1quotation %
|
||||
\ with-string-reader ,
|
||||
] [ ] make ;
|
||||
MACRO: unpack ( str -- quot )
|
||||
[ unpack-table at 1quotation '[ @ , ] ] { } map-as concat
|
||||
'[ [ _ { } make ] with-string-reader ] ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: unpack-native ( seq str -- seq )
|
||||
[
|
||||
big-endian? big-endian set (unpack)
|
||||
] with-scope ;
|
||||
[ set-big-endian unpack ] with-scope ; inline
|
||||
|
||||
: unpack-be ( seq str -- seq )
|
||||
[ big-endian on (unpack) ] with-scope ;
|
||||
[ big-endian on unpack ] with-scope ; inline
|
||||
|
||||
: unpack-le ( seq str -- seq )
|
||||
[ big-endian off (unpack) ] with-scope ;
|
||||
[ big-endian off unpack ] with-scope ; inline
|
||||
|
||||
: packed-length ( str -- n )
|
||||
[ packed-length-table at ] sigma ;
|
||||
|
||||
ERROR: packed-read-fail str bytes ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: read-packed-bytes ( str -- bytes )
|
||||
dup packed-length [ read dup length ] keep =
|
||||
[ nip ] [ packed-read-fail ] if ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: read-packed ( str quot -- seq )
|
||||
[ read-packed-bytes ] swap bi ; inline
|
||||
|
||||
: read-packed-le ( str -- seq )
|
||||
[ unpack-le ] read-packed ; inline
|
||||
|
||||
: read-packed-be ( str -- seq )
|
||||
[ unpack-be ] read-packed ; inline
|
||||
|
||||
: read-packed-native ( str -- seq )
|
||||
[ unpack-native ] read-packed ; inline
|
||||
|
|
|
@ -39,16 +39,14 @@ ERROR: roman-range-error n ;
|
|||
PRIVATE>
|
||||
|
||||
: >roman ( n -- str )
|
||||
dup roman-range-check [
|
||||
(>roman)
|
||||
] "" make ;
|
||||
dup roman-range-check
|
||||
[ (>roman) ] "" make ;
|
||||
|
||||
: >ROMAN ( n -- str ) >roman >upper ;
|
||||
|
||||
: roman> ( str -- n )
|
||||
>lower [ roman<= ] monotonic-split [
|
||||
(roman>)
|
||||
] map sum ;
|
||||
>lower [ roman<= ] monotonic-split
|
||||
[ (roman>) ] sigma ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml.utilities kernel assocs xml.generator math.order
|
||||
strings sequences xml.data xml.writer
|
||||
io.streams.string combinators xml xml.entities io.files io
|
||||
io.streams.string combinators xml xml.entities.html io.files io
|
||||
http.client namespaces make xml.generator hashtables
|
||||
calendar.format accessors continuations urls present ;
|
||||
IN: syndication
|
||||
|
|
|
@ -61,7 +61,7 @@ SYMBOL: table
|
|||
: eval-seq ( seq -- seq ) [ dup word? [ execute ] when ] map ;
|
||||
|
||||
: (set-table) ( class1 class2 val -- )
|
||||
-rot table get nth [ swap or ] change-nth ;
|
||||
[ table get nth ] dip '[ _ or ] change-nth ;
|
||||
|
||||
: set-table ( classes1 classes2 val -- )
|
||||
[ [ eval-seq ] bi@ ] dip
|
||||
|
@ -199,8 +199,8 @@ to: word-table
|
|||
: walk-down ( str i -- j )
|
||||
dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ;
|
||||
|
||||
: word-break? ( table-entry i str -- ? )
|
||||
spin {
|
||||
: word-break? ( str i table-entry -- ? )
|
||||
{
|
||||
{ t [ 2drop f ] }
|
||||
{ f [ 2drop t ] }
|
||||
{ check-letter-after
|
||||
|
@ -214,10 +214,10 @@ to: word-table
|
|||
} case ;
|
||||
|
||||
:: word-break-next ( old-class new-char i str -- next-class ? )
|
||||
new-char dup format/extended?
|
||||
[ drop old-class dup { 1 2 3 } member? ] [
|
||||
word-break-prop old-class over word-table-nth
|
||||
i str word-break?
|
||||
new-char format/extended?
|
||||
[ old-class dup { 1 2 3 } member? ] [
|
||||
new-char word-break-prop old-class over word-table-nth
|
||||
[ str i ] dip word-break?
|
||||
] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien alien.c-types alien.strings alien.syntax
|
||||
combinators io.encodings.utf16n io.files io.pathnames kernel
|
||||
windows windows.com windows.com.syntax windows.ole32
|
||||
windows.user32 ;
|
||||
windows windows.com windows.com.syntax windows.user32
|
||||
windows.ole32 ;
|
||||
IN: windows.shell32
|
||||
|
||||
CONSTANT: CSIDL_DESKTOP HEX: 00
|
||||
|
@ -88,13 +88,10 @@ ALIAS: ShellExecute ShellExecuteW
|
|||
: open-in-explorer ( dir -- )
|
||||
f "open" rot (normalize-path) f f SW_SHOWNORMAL ShellExecute drop ;
|
||||
|
||||
: shell32-error ( n -- )
|
||||
ole32-error ; inline
|
||||
|
||||
: shell32-directory ( n -- str )
|
||||
f swap f SHGFP_TYPE_DEFAULT
|
||||
MAX_UNICODE_PATH "ushort" <c-array>
|
||||
[ SHGetFolderPath shell32-error ] keep utf16n alien>string ;
|
||||
[ SHGetFolderPath drop ] keep utf16n alien>string ;
|
||||
|
||||
: desktop ( -- str )
|
||||
CSIDL_DESKTOPDIRECTORY shell32-directory ;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces make kernel assocs sequences fry ;
|
||||
USING: namespaces make kernel assocs sequences fry values
|
||||
io.files io.encodings.binary ;
|
||||
IN: xml.entities
|
||||
|
||||
: entities-out
|
||||
|
@ -36,265 +37,7 @@ IN: xml.entities
|
|||
{ "quot" CHAR: " }
|
||||
} ;
|
||||
|
||||
: html-entities
|
||||
#! generated from:
|
||||
#! http://www.w3.org/TR/REC-html40/sgml/entities.html
|
||||
H{
|
||||
{ "nbsp" 160 }
|
||||
{ "iexcl" 161 }
|
||||
{ "cent" 162 }
|
||||
{ "pound" 163 }
|
||||
{ "curren" 164 }
|
||||
{ "yen" 165 }
|
||||
{ "brvbar" 166 }
|
||||
{ "sect" 167 }
|
||||
{ "uml" 168 }
|
||||
{ "copy" 169 }
|
||||
{ "ordf" 170 }
|
||||
{ "laquo" 171 }
|
||||
{ "not" 172 }
|
||||
{ "shy" 173 }
|
||||
{ "reg" 174 }
|
||||
{ "macr" 175 }
|
||||
{ "deg" 176 }
|
||||
{ "plusmn" 177 }
|
||||
{ "sup2" 178 }
|
||||
{ "sup3" 179 }
|
||||
{ "acute" 180 }
|
||||
{ "micro" 181 }
|
||||
{ "para" 182 }
|
||||
{ "middot" 183 }
|
||||
{ "cedil" 184 }
|
||||
{ "sup1" 185 }
|
||||
{ "ordm" 186 }
|
||||
{ "raquo" 187 }
|
||||
{ "frac14" 188 }
|
||||
{ "frac12" 189 }
|
||||
{ "frac34" 190 }
|
||||
{ "iquest" 191 }
|
||||
{ "Agrave" 192 }
|
||||
{ "Aacute" 193 }
|
||||
{ "Acirc" 194 }
|
||||
{ "Atilde" 195 }
|
||||
{ "Auml" 196 }
|
||||
{ "Aring" 197 }
|
||||
{ "AElig" 198 }
|
||||
{ "Ccedil" 199 }
|
||||
{ "Egrave" 200 }
|
||||
{ "Eacute" 201 }
|
||||
{ "Ecirc" 202 }
|
||||
{ "Euml" 203 }
|
||||
{ "Igrave" 204 }
|
||||
{ "Iacute" 205 }
|
||||
{ "Icirc" 206 }
|
||||
{ "Iuml" 207 }
|
||||
{ "ETH" 208 }
|
||||
{ "Ntilde" 209 }
|
||||
{ "Ograve" 210 }
|
||||
{ "Oacute" 211 }
|
||||
{ "Ocirc" 212 }
|
||||
{ "Otilde" 213 }
|
||||
{ "Ouml" 214 }
|
||||
{ "times" 215 }
|
||||
{ "Oslash" 216 }
|
||||
{ "Ugrave" 217 }
|
||||
{ "Uacute" 218 }
|
||||
{ "Ucirc" 219 }
|
||||
{ "Uuml" 220 }
|
||||
{ "Yacute" 221 }
|
||||
{ "THORN" 222 }
|
||||
{ "szlig" 223 }
|
||||
{ "agrave" 224 }
|
||||
{ "aacute" 225 }
|
||||
{ "acirc" 226 }
|
||||
{ "atilde" 227 }
|
||||
{ "auml" 228 }
|
||||
{ "aring" 229 }
|
||||
{ "aelig" 230 }
|
||||
{ "ccedil" 231 }
|
||||
{ "egrave" 232 }
|
||||
{ "eacute" 233 }
|
||||
{ "ecirc" 234 }
|
||||
{ "euml" 235 }
|
||||
{ "igrave" 236 }
|
||||
{ "iacute" 237 }
|
||||
{ "icirc" 238 }
|
||||
{ "iuml" 239 }
|
||||
{ "eth" 240 }
|
||||
{ "ntilde" 241 }
|
||||
{ "ograve" 242 }
|
||||
{ "oacute" 243 }
|
||||
{ "ocirc" 244 }
|
||||
{ "otilde" 245 }
|
||||
{ "ouml" 246 }
|
||||
{ "divide" 247 }
|
||||
{ "oslash" 248 }
|
||||
{ "ugrave" 249 }
|
||||
{ "uacute" 250 }
|
||||
{ "ucirc" 251 }
|
||||
{ "uuml" 252 }
|
||||
{ "yacute" 253 }
|
||||
{ "thorn" 254 }
|
||||
{ "yuml" 255 }
|
||||
{ "fnof" 402 }
|
||||
{ "Alpha" 913 }
|
||||
{ "Beta" 914 }
|
||||
{ "Gamma" 915 }
|
||||
{ "Delta" 916 }
|
||||
{ "Epsilon" 917 }
|
||||
{ "Zeta" 918 }
|
||||
{ "Eta" 919 }
|
||||
{ "Theta" 920 }
|
||||
{ "Iota" 921 }
|
||||
{ "Kappa" 922 }
|
||||
{ "Lambda" 923 }
|
||||
{ "Mu" 924 }
|
||||
{ "Nu" 925 }
|
||||
{ "Xi" 926 }
|
||||
{ "Omicron" 927 }
|
||||
{ "Pi" 928 }
|
||||
{ "Rho" 929 }
|
||||
{ "Sigma" 931 }
|
||||
{ "Tau" 932 }
|
||||
{ "Upsilon" 933 }
|
||||
{ "Phi" 934 }
|
||||
{ "Chi" 935 }
|
||||
{ "Psi" 936 }
|
||||
{ "Omega" 937 }
|
||||
{ "alpha" 945 }
|
||||
{ "beta" 946 }
|
||||
{ "gamma" 947 }
|
||||
{ "delta" 948 }
|
||||
{ "epsilon" 949 }
|
||||
{ "zeta" 950 }
|
||||
{ "eta" 951 }
|
||||
{ "theta" 952 }
|
||||
{ "iota" 953 }
|
||||
{ "kappa" 954 }
|
||||
{ "lambda" 955 }
|
||||
{ "mu" 956 }
|
||||
{ "nu" 957 }
|
||||
{ "xi" 958 }
|
||||
{ "omicron" 959 }
|
||||
{ "pi" 960 }
|
||||
{ "rho" 961 }
|
||||
{ "sigmaf" 962 }
|
||||
{ "sigma" 963 }
|
||||
{ "tau" 964 }
|
||||
{ "upsilon" 965 }
|
||||
{ "phi" 966 }
|
||||
{ "chi" 967 }
|
||||
{ "psi" 968 }
|
||||
{ "omega" 969 }
|
||||
{ "thetasym" 977 }
|
||||
{ "upsih" 978 }
|
||||
{ "piv" 982 }
|
||||
{ "bull" 8226 }
|
||||
{ "hellip" 8230 }
|
||||
{ "prime" 8242 }
|
||||
{ "Prime" 8243 }
|
||||
{ "oline" 8254 }
|
||||
{ "frasl" 8260 }
|
||||
{ "weierp" 8472 }
|
||||
{ "image" 8465 }
|
||||
{ "real" 8476 }
|
||||
{ "trade" 8482 }
|
||||
{ "alefsym" 8501 }
|
||||
{ "larr" 8592 }
|
||||
{ "uarr" 8593 }
|
||||
{ "rarr" 8594 }
|
||||
{ "darr" 8595 }
|
||||
{ "harr" 8596 }
|
||||
{ "crarr" 8629 }
|
||||
{ "lArr" 8656 }
|
||||
{ "uArr" 8657 }
|
||||
{ "rArr" 8658 }
|
||||
{ "dArr" 8659 }
|
||||
{ "hArr" 8660 }
|
||||
{ "forall" 8704 }
|
||||
{ "part" 8706 }
|
||||
{ "exist" 8707 }
|
||||
{ "empty" 8709 }
|
||||
{ "nabla" 8711 }
|
||||
{ "isin" 8712 }
|
||||
{ "notin" 8713 }
|
||||
{ "ni" 8715 }
|
||||
{ "prod" 8719 }
|
||||
{ "sum" 8721 }
|
||||
{ "minus" 8722 }
|
||||
{ "lowast" 8727 }
|
||||
{ "radic" 8730 }
|
||||
{ "prop" 8733 }
|
||||
{ "infin" 8734 }
|
||||
{ "ang" 8736 }
|
||||
{ "and" 8743 }
|
||||
{ "or" 8744 }
|
||||
{ "cap" 8745 }
|
||||
{ "cup" 8746 }
|
||||
{ "int" 8747 }
|
||||
{ "there4" 8756 }
|
||||
{ "sim" 8764 }
|
||||
{ "cong" 8773 }
|
||||
{ "asymp" 8776 }
|
||||
{ "ne" 8800 }
|
||||
{ "equiv" 8801 }
|
||||
{ "le" 8804 }
|
||||
{ "ge" 8805 }
|
||||
{ "sub" 8834 }
|
||||
{ "sup" 8835 }
|
||||
{ "nsub" 8836 }
|
||||
{ "sube" 8838 }
|
||||
{ "supe" 8839 }
|
||||
{ "oplus" 8853 }
|
||||
{ "otimes" 8855 }
|
||||
{ "perp" 8869 }
|
||||
{ "sdot" 8901 }
|
||||
{ "lceil" 8968 }
|
||||
{ "rceil" 8969 }
|
||||
{ "lfloor" 8970 }
|
||||
{ "rfloor" 8971 }
|
||||
{ "lang" 9001 }
|
||||
{ "rang" 9002 }
|
||||
{ "loz" 9674 }
|
||||
{ "spades" 9824 }
|
||||
{ "clubs" 9827 }
|
||||
{ "hearts" 9829 }
|
||||
{ "diams" 9830 }
|
||||
{ "OElig" 338 }
|
||||
{ "oelig" 339 }
|
||||
{ "Scaron" 352 }
|
||||
{ "scaron" 353 }
|
||||
{ "Yuml" 376 }
|
||||
{ "circ" 710 }
|
||||
{ "tilde" 732 }
|
||||
{ "ensp" 8194 }
|
||||
{ "emsp" 8195 }
|
||||
{ "thinsp" 8201 }
|
||||
{ "zwnj" 8204 }
|
||||
{ "zwj" 8205 }
|
||||
{ "lrm" 8206 }
|
||||
{ "rlm" 8207 }
|
||||
{ "ndash" 8211 }
|
||||
{ "mdash" 8212 }
|
||||
{ "lsquo" 8216 }
|
||||
{ "rsquo" 8217 }
|
||||
{ "sbquo" 8218 }
|
||||
{ "ldquo" 8220 }
|
||||
{ "rdquo" 8221 }
|
||||
{ "bdquo" 8222 }
|
||||
{ "dagger" 8224 }
|
||||
{ "Dagger" 8225 }
|
||||
{ "permil" 8240 }
|
||||
{ "lsaquo" 8249 }
|
||||
{ "rsaquo" 8250 }
|
||||
{ "euro" 8364 }
|
||||
} ;
|
||||
|
||||
SYMBOL: extra-entities
|
||||
f extra-entities set-global
|
||||
|
||||
: with-entities ( entities quot -- )
|
||||
[ swap extra-entities set call ] with-scope ; inline
|
||||
|
||||
: with-html-entities ( quot -- )
|
||||
html-entities swap with-entities ; inline
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,4 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test xml.entities.html ;
|
||||
IN: xml.entities.html.tests
|
|
@ -0,0 +1,22 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs io.encodings.binary io.files kernel namespaces sequences
|
||||
values xml xml.entities ;
|
||||
IN: xml.entities.html
|
||||
|
||||
VALUE: html-entities
|
||||
|
||||
: read-entities-file ( file -- table )
|
||||
f swap binary <file-reader>
|
||||
[ 2drop extra-entities get ] sax ;
|
||||
|
||||
: get-html ( -- table )
|
||||
{ "lat1" "special" "symbol" } [
|
||||
"resource:basis/xml/entities/html/xhtml-"
|
||||
swap ".ent" 3append read-entities-file
|
||||
] map first3 assoc-union assoc-union ;
|
||||
|
||||
get-html to: html-entities
|
||||
|
||||
: with-html-entities ( quot -- )
|
||||
html-entities swap with-entities ; inline
|
|
@ -0,0 +1,196 @@
|
|||
<!-- Portions (C) International Organization for Standardization 1986
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLlat1 PUBLIC
|
||||
"-//W3C//ENTITIES Latin 1 for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
|
||||
%HTMLlat1;
|
||||
-->
|
||||
|
||||
<!ENTITY nbsp " "> <!-- no-break space = non-breaking space,
|
||||
U+00A0 ISOnum -->
|
||||
<!ENTITY iexcl "¡"> <!-- inverted exclamation mark, U+00A1 ISOnum -->
|
||||
<!ENTITY cent "¢"> <!-- cent sign, U+00A2 ISOnum -->
|
||||
<!ENTITY pound "£"> <!-- pound sign, U+00A3 ISOnum -->
|
||||
<!ENTITY curren "¤"> <!-- currency sign, U+00A4 ISOnum -->
|
||||
<!ENTITY yen "¥"> <!-- yen sign = yuan sign, U+00A5 ISOnum -->
|
||||
<!ENTITY brvbar "¦"> <!-- broken bar = broken vertical bar,
|
||||
U+00A6 ISOnum -->
|
||||
<!ENTITY sect "§"> <!-- section sign, U+00A7 ISOnum -->
|
||||
<!ENTITY uml "¨"> <!-- diaeresis = spacing diaeresis,
|
||||
U+00A8 ISOdia -->
|
||||
<!ENTITY copy "©"> <!-- copyright sign, U+00A9 ISOnum -->
|
||||
<!ENTITY ordf "ª"> <!-- feminine ordinal indicator, U+00AA ISOnum -->
|
||||
<!ENTITY laquo "«"> <!-- left-pointing double angle quotation mark
|
||||
= left pointing guillemet, U+00AB ISOnum -->
|
||||
<!ENTITY not "¬"> <!-- not sign = angled dash,
|
||||
U+00AC ISOnum -->
|
||||
<!ENTITY shy "­"> <!-- soft hyphen = discretionary hyphen,
|
||||
U+00AD ISOnum -->
|
||||
<!ENTITY reg "®"> <!-- registered sign = registered trade mark sign,
|
||||
U+00AE ISOnum -->
|
||||
<!ENTITY macr "¯"> <!-- macron = spacing macron = overline
|
||||
= APL overbar, U+00AF ISOdia -->
|
||||
<!ENTITY deg "°"> <!-- degree sign, U+00B0 ISOnum -->
|
||||
<!ENTITY plusmn "±"> <!-- plus-minus sign = plus-or-minus sign,
|
||||
U+00B1 ISOnum -->
|
||||
<!ENTITY sup2 "²"> <!-- superscript two = superscript digit two
|
||||
= squared, U+00B2 ISOnum -->
|
||||
<!ENTITY sup3 "³"> <!-- superscript three = superscript digit three
|
||||
= cubed, U+00B3 ISOnum -->
|
||||
<!ENTITY acute "´"> <!-- acute accent = spacing acute,
|
||||
U+00B4 ISOdia -->
|
||||
<!ENTITY micro "µ"> <!-- micro sign, U+00B5 ISOnum -->
|
||||
<!ENTITY para "¶"> <!-- pilcrow sign = paragraph sign,
|
||||
U+00B6 ISOnum -->
|
||||
<!ENTITY middot "·"> <!-- middle dot = Georgian comma
|
||||
= Greek middle dot, U+00B7 ISOnum -->
|
||||
<!ENTITY cedil "¸"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
|
||||
<!ENTITY sup1 "¹"> <!-- superscript one = superscript digit one,
|
||||
U+00B9 ISOnum -->
|
||||
<!ENTITY ordm "º"> <!-- masculine ordinal indicator,
|
||||
U+00BA ISOnum -->
|
||||
<!ENTITY raquo "»"> <!-- right-pointing double angle quotation mark
|
||||
= right pointing guillemet, U+00BB ISOnum -->
|
||||
<!ENTITY frac14 "¼"> <!-- vulgar fraction one quarter
|
||||
= fraction one quarter, U+00BC ISOnum -->
|
||||
<!ENTITY frac12 "½"> <!-- vulgar fraction one half
|
||||
= fraction one half, U+00BD ISOnum -->
|
||||
<!ENTITY frac34 "¾"> <!-- vulgar fraction three quarters
|
||||
= fraction three quarters, U+00BE ISOnum -->
|
||||
<!ENTITY iquest "¿"> <!-- inverted question mark
|
||||
= turned question mark, U+00BF ISOnum -->
|
||||
<!ENTITY Agrave "À"> <!-- latin capital letter A with grave
|
||||
= latin capital letter A grave,
|
||||
U+00C0 ISOlat1 -->
|
||||
<!ENTITY Aacute "Á"> <!-- latin capital letter A with acute,
|
||||
U+00C1 ISOlat1 -->
|
||||
<!ENTITY Acirc "Â"> <!-- latin capital letter A with circumflex,
|
||||
U+00C2 ISOlat1 -->
|
||||
<!ENTITY Atilde "Ã"> <!-- latin capital letter A with tilde,
|
||||
U+00C3 ISOlat1 -->
|
||||
<!ENTITY Auml "Ä"> <!-- latin capital letter A with diaeresis,
|
||||
U+00C4 ISOlat1 -->
|
||||
<!ENTITY Aring "Å"> <!-- latin capital letter A with ring above
|
||||
= latin capital letter A ring,
|
||||
U+00C5 ISOlat1 -->
|
||||
<!ENTITY AElig "Æ"> <!-- latin capital letter AE
|
||||
= latin capital ligature AE,
|
||||
U+00C6 ISOlat1 -->
|
||||
<!ENTITY Ccedil "Ç"> <!-- latin capital letter C with cedilla,
|
||||
U+00C7 ISOlat1 -->
|
||||
<!ENTITY Egrave "È"> <!-- latin capital letter E with grave,
|
||||
U+00C8 ISOlat1 -->
|
||||
<!ENTITY Eacute "É"> <!-- latin capital letter E with acute,
|
||||
U+00C9 ISOlat1 -->
|
||||
<!ENTITY Ecirc "Ê"> <!-- latin capital letter E with circumflex,
|
||||
U+00CA ISOlat1 -->
|
||||
<!ENTITY Euml "Ë"> <!-- latin capital letter E with diaeresis,
|
||||
U+00CB ISOlat1 -->
|
||||
<!ENTITY Igrave "Ì"> <!-- latin capital letter I with grave,
|
||||
U+00CC ISOlat1 -->
|
||||
<!ENTITY Iacute "Í"> <!-- latin capital letter I with acute,
|
||||
U+00CD ISOlat1 -->
|
||||
<!ENTITY Icirc "Î"> <!-- latin capital letter I with circumflex,
|
||||
U+00CE ISOlat1 -->
|
||||
<!ENTITY Iuml "Ï"> <!-- latin capital letter I with diaeresis,
|
||||
U+00CF ISOlat1 -->
|
||||
<!ENTITY ETH "Ð"> <!-- latin capital letter ETH, U+00D0 ISOlat1 -->
|
||||
<!ENTITY Ntilde "Ñ"> <!-- latin capital letter N with tilde,
|
||||
U+00D1 ISOlat1 -->
|
||||
<!ENTITY Ograve "Ò"> <!-- latin capital letter O with grave,
|
||||
U+00D2 ISOlat1 -->
|
||||
<!ENTITY Oacute "Ó"> <!-- latin capital letter O with acute,
|
||||
U+00D3 ISOlat1 -->
|
||||
<!ENTITY Ocirc "Ô"> <!-- latin capital letter O with circumflex,
|
||||
U+00D4 ISOlat1 -->
|
||||
<!ENTITY Otilde "Õ"> <!-- latin capital letter O with tilde,
|
||||
U+00D5 ISOlat1 -->
|
||||
<!ENTITY Ouml "Ö"> <!-- latin capital letter O with diaeresis,
|
||||
U+00D6 ISOlat1 -->
|
||||
<!ENTITY times "×"> <!-- multiplication sign, U+00D7 ISOnum -->
|
||||
<!ENTITY Oslash "Ø"> <!-- latin capital letter O with stroke
|
||||
= latin capital letter O slash,
|
||||
U+00D8 ISOlat1 -->
|
||||
<!ENTITY Ugrave "Ù"> <!-- latin capital letter U with grave,
|
||||
U+00D9 ISOlat1 -->
|
||||
<!ENTITY Uacute "Ú"> <!-- latin capital letter U with acute,
|
||||
U+00DA ISOlat1 -->
|
||||
<!ENTITY Ucirc "Û"> <!-- latin capital letter U with circumflex,
|
||||
U+00DB ISOlat1 -->
|
||||
<!ENTITY Uuml "Ü"> <!-- latin capital letter U with diaeresis,
|
||||
U+00DC ISOlat1 -->
|
||||
<!ENTITY Yacute "Ý"> <!-- latin capital letter Y with acute,
|
||||
U+00DD ISOlat1 -->
|
||||
<!ENTITY THORN "Þ"> <!-- latin capital letter THORN,
|
||||
U+00DE ISOlat1 -->
|
||||
<!ENTITY szlig "ß"> <!-- latin small letter sharp s = ess-zed,
|
||||
U+00DF ISOlat1 -->
|
||||
<!ENTITY agrave "à"> <!-- latin small letter a with grave
|
||||
= latin small letter a grave,
|
||||
U+00E0 ISOlat1 -->
|
||||
<!ENTITY aacute "á"> <!-- latin small letter a with acute,
|
||||
U+00E1 ISOlat1 -->
|
||||
<!ENTITY acirc "â"> <!-- latin small letter a with circumflex,
|
||||
U+00E2 ISOlat1 -->
|
||||
<!ENTITY atilde "ã"> <!-- latin small letter a with tilde,
|
||||
U+00E3 ISOlat1 -->
|
||||
<!ENTITY auml "ä"> <!-- latin small letter a with diaeresis,
|
||||
U+00E4 ISOlat1 -->
|
||||
<!ENTITY aring "å"> <!-- latin small letter a with ring above
|
||||
= latin small letter a ring,
|
||||
U+00E5 ISOlat1 -->
|
||||
<!ENTITY aelig "æ"> <!-- latin small letter ae
|
||||
= latin small ligature ae, U+00E6 ISOlat1 -->
|
||||
<!ENTITY ccedil "ç"> <!-- latin small letter c with cedilla,
|
||||
U+00E7 ISOlat1 -->
|
||||
<!ENTITY egrave "è"> <!-- latin small letter e with grave,
|
||||
U+00E8 ISOlat1 -->
|
||||
<!ENTITY eacute "é"> <!-- latin small letter e with acute,
|
||||
U+00E9 ISOlat1 -->
|
||||
<!ENTITY ecirc "ê"> <!-- latin small letter e with circumflex,
|
||||
U+00EA ISOlat1 -->
|
||||
<!ENTITY euml "ë"> <!-- latin small letter e with diaeresis,
|
||||
U+00EB ISOlat1 -->
|
||||
<!ENTITY igrave "ì"> <!-- latin small letter i with grave,
|
||||
U+00EC ISOlat1 -->
|
||||
<!ENTITY iacute "í"> <!-- latin small letter i with acute,
|
||||
U+00ED ISOlat1 -->
|
||||
<!ENTITY icirc "î"> <!-- latin small letter i with circumflex,
|
||||
U+00EE ISOlat1 -->
|
||||
<!ENTITY iuml "ï"> <!-- latin small letter i with diaeresis,
|
||||
U+00EF ISOlat1 -->
|
||||
<!ENTITY eth "ð"> <!-- latin small letter eth, U+00F0 ISOlat1 -->
|
||||
<!ENTITY ntilde "ñ"> <!-- latin small letter n with tilde,
|
||||
U+00F1 ISOlat1 -->
|
||||
<!ENTITY ograve "ò"> <!-- latin small letter o with grave,
|
||||
U+00F2 ISOlat1 -->
|
||||
<!ENTITY oacute "ó"> <!-- latin small letter o with acute,
|
||||
U+00F3 ISOlat1 -->
|
||||
<!ENTITY ocirc "ô"> <!-- latin small letter o with circumflex,
|
||||
U+00F4 ISOlat1 -->
|
||||
<!ENTITY otilde "õ"> <!-- latin small letter o with tilde,
|
||||
U+00F5 ISOlat1 -->
|
||||
<!ENTITY ouml "ö"> <!-- latin small letter o with diaeresis,
|
||||
U+00F6 ISOlat1 -->
|
||||
<!ENTITY divide "÷"> <!-- division sign, U+00F7 ISOnum -->
|
||||
<!ENTITY oslash "ø"> <!-- latin small letter o with stroke,
|
||||
= latin small letter o slash,
|
||||
U+00F8 ISOlat1 -->
|
||||
<!ENTITY ugrave "ù"> <!-- latin small letter u with grave,
|
||||
U+00F9 ISOlat1 -->
|
||||
<!ENTITY uacute "ú"> <!-- latin small letter u with acute,
|
||||
U+00FA ISOlat1 -->
|
||||
<!ENTITY ucirc "û"> <!-- latin small letter u with circumflex,
|
||||
U+00FB ISOlat1 -->
|
||||
<!ENTITY uuml "ü"> <!-- latin small letter u with diaeresis,
|
||||
U+00FC ISOlat1 -->
|
||||
<!ENTITY yacute "ý"> <!-- latin small letter y with acute,
|
||||
U+00FD ISOlat1 -->
|
||||
<!ENTITY thorn "þ"> <!-- latin small letter thorn,
|
||||
U+00FE ISOlat1 -->
|
||||
<!ENTITY yuml "ÿ"> <!-- latin small letter y with diaeresis,
|
||||
U+00FF ISOlat1 -->
|
|
@ -0,0 +1,80 @@
|
|||
<!-- Special characters for XHTML -->
|
||||
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLspecial PUBLIC
|
||||
"-//W3C//ENTITIES Special for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent">
|
||||
%HTMLspecial;
|
||||
-->
|
||||
|
||||
<!-- Portions (C) International Organization for Standardization 1986:
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
|
||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
|
||||
New names (i.e., not in ISO 8879 list) do not clash with any
|
||||
existing ISO 8879 entity names. ISO 10646 character numbers
|
||||
are given for each character, in hex. values are decimal
|
||||
conversions of the ISO 10646 values and refer to the document
|
||||
character set. Names are Unicode names.
|
||||
-->
|
||||
|
||||
<!-- C0 Controls and Basic Latin -->
|
||||
<!ENTITY quot """> <!-- quotation mark, U+0022 ISOnum -->
|
||||
<!ENTITY amp "&#38;"> <!-- ampersand, U+0026 ISOnum -->
|
||||
<!ENTITY lt "&#60;"> <!-- less-than sign, U+003C ISOnum -->
|
||||
<!ENTITY gt ">"> <!-- greater-than sign, U+003E ISOnum -->
|
||||
<!ENTITY apos "'"> <!-- apostrophe = APL quote, U+0027 ISOnum -->
|
||||
|
||||
<!-- Latin Extended-A -->
|
||||
<!ENTITY OElig "Œ"> <!-- latin capital ligature OE,
|
||||
U+0152 ISOlat2 -->
|
||||
<!ENTITY oelig "œ"> <!-- latin small ligature oe, U+0153 ISOlat2 -->
|
||||
<!-- ligature is a misnomer, this is a separate character in some languages -->
|
||||
<!ENTITY Scaron "Š"> <!-- latin capital letter S with caron,
|
||||
U+0160 ISOlat2 -->
|
||||
<!ENTITY scaron "š"> <!-- latin small letter s with caron,
|
||||
U+0161 ISOlat2 -->
|
||||
<!ENTITY Yuml "Ÿ"> <!-- latin capital letter Y with diaeresis,
|
||||
U+0178 ISOlat2 -->
|
||||
|
||||
<!-- Spacing Modifier Letters -->
|
||||
<!ENTITY circ "ˆ"> <!-- modifier letter circumflex accent,
|
||||
U+02C6 ISOpub -->
|
||||
<!ENTITY tilde "˜"> <!-- small tilde, U+02DC ISOdia -->
|
||||
|
||||
<!-- General Punctuation -->
|
||||
<!ENTITY ensp " "> <!-- en space, U+2002 ISOpub -->
|
||||
<!ENTITY emsp " "> <!-- em space, U+2003 ISOpub -->
|
||||
<!ENTITY thinsp " "> <!-- thin space, U+2009 ISOpub -->
|
||||
<!ENTITY zwnj "‌"> <!-- zero width non-joiner,
|
||||
U+200C NEW RFC 2070 -->
|
||||
<!ENTITY zwj "‍"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
|
||||
<!ENTITY lrm "‎"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
|
||||
<!ENTITY rlm "‏"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
|
||||
<!ENTITY ndash "–"> <!-- en dash, U+2013 ISOpub -->
|
||||
<!ENTITY mdash "—"> <!-- em dash, U+2014 ISOpub -->
|
||||
<!ENTITY lsquo "‘"> <!-- left single quotation mark,
|
||||
U+2018 ISOnum -->
|
||||
<!ENTITY rsquo "’"> <!-- right single quotation mark,
|
||||
U+2019 ISOnum -->
|
||||
<!ENTITY sbquo "‚"> <!-- single low-9 quotation mark, U+201A NEW -->
|
||||
<!ENTITY ldquo "“"> <!-- left double quotation mark,
|
||||
U+201C ISOnum -->
|
||||
<!ENTITY rdquo "”"> <!-- right double quotation mark,
|
||||
U+201D ISOnum -->
|
||||
<!ENTITY bdquo "„"> <!-- double low-9 quotation mark, U+201E NEW -->
|
||||
<!ENTITY dagger "†"> <!-- dagger, U+2020 ISOpub -->
|
||||
<!ENTITY Dagger "‡"> <!-- double dagger, U+2021 ISOpub -->
|
||||
<!ENTITY permil "‰"> <!-- per mille sign, U+2030 ISOtech -->
|
||||
<!ENTITY lsaquo "‹"> <!-- single left-pointing angle quotation mark,
|
||||
U+2039 ISO proposed -->
|
||||
<!-- lsaquo is proposed but not yet ISO standardized -->
|
||||
<!ENTITY rsaquo "›"> <!-- single right-pointing angle quotation mark,
|
||||
U+203A ISO proposed -->
|
||||
<!-- rsaquo is proposed but not yet ISO standardized -->
|
||||
|
||||
<!-- Currency Symbols -->
|
||||
<!ENTITY euro "€"> <!-- euro sign, U+20AC NEW -->
|
|
@ -0,0 +1,237 @@
|
|||
<!-- Mathematical, Greek and Symbolic characters for XHTML -->
|
||||
|
||||
<!-- Character entity set. Typical invocation:
|
||||
<!ENTITY % HTMLsymbol PUBLIC
|
||||
"-//W3C//ENTITIES Symbols for XHTML//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent">
|
||||
%HTMLsymbol;
|
||||
-->
|
||||
|
||||
<!-- Portions (C) International Organization for Standardization 1986:
|
||||
Permission to copy in any form is granted for use with
|
||||
conforming SGML systems and applications as defined in
|
||||
ISO 8879, provided this notice is included in all copies.
|
||||
-->
|
||||
|
||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
|
||||
New names (i.e., not in ISO 8879 list) do not clash with any
|
||||
existing ISO 8879 entity names. ISO 10646 character numbers
|
||||
are given for each character, in hex. values are decimal
|
||||
conversions of the ISO 10646 values and refer to the document
|
||||
character set. Names are Unicode names.
|
||||
-->
|
||||
|
||||
<!-- Latin Extended-B -->
|
||||
<!ENTITY fnof "ƒ"> <!-- latin small letter f with hook = function
|
||||
= florin, U+0192 ISOtech -->
|
||||
|
||||
<!-- Greek -->
|
||||
<!ENTITY Alpha "Α"> <!-- greek capital letter alpha, U+0391 -->
|
||||
<!ENTITY Beta "Β"> <!-- greek capital letter beta, U+0392 -->
|
||||
<!ENTITY Gamma "Γ"> <!-- greek capital letter gamma,
|
||||
U+0393 ISOgrk3 -->
|
||||
<!ENTITY Delta "Δ"> <!-- greek capital letter delta,
|
||||
U+0394 ISOgrk3 -->
|
||||
<!ENTITY Epsilon "Ε"> <!-- greek capital letter epsilon, U+0395 -->
|
||||
<!ENTITY Zeta "Ζ"> <!-- greek capital letter zeta, U+0396 -->
|
||||
<!ENTITY Eta "Η"> <!-- greek capital letter eta, U+0397 -->
|
||||
<!ENTITY Theta "Θ"> <!-- greek capital letter theta,
|
||||
U+0398 ISOgrk3 -->
|
||||
<!ENTITY Iota "Ι"> <!-- greek capital letter iota, U+0399 -->
|
||||
<!ENTITY Kappa "Κ"> <!-- greek capital letter kappa, U+039A -->
|
||||
<!ENTITY Lambda "Λ"> <!-- greek capital letter lamda,
|
||||
U+039B ISOgrk3 -->
|
||||
<!ENTITY Mu "Μ"> <!-- greek capital letter mu, U+039C -->
|
||||
<!ENTITY Nu "Ν"> <!-- greek capital letter nu, U+039D -->
|
||||
<!ENTITY Xi "Ξ"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
|
||||
<!ENTITY Omicron "Ο"> <!-- greek capital letter omicron, U+039F -->
|
||||
<!ENTITY Pi "Π"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
|
||||
<!ENTITY Rho "Ρ"> <!-- greek capital letter rho, U+03A1 -->
|
||||
<!-- there is no Sigmaf, and no U+03A2 character either -->
|
||||
<!ENTITY Sigma "Σ"> <!-- greek capital letter sigma,
|
||||
U+03A3 ISOgrk3 -->
|
||||
<!ENTITY Tau "Τ"> <!-- greek capital letter tau, U+03A4 -->
|
||||
<!ENTITY Upsilon "Υ"> <!-- greek capital letter upsilon,
|
||||
U+03A5 ISOgrk3 -->
|
||||
<!ENTITY Phi "Φ"> <!-- greek capital letter phi,
|
||||
U+03A6 ISOgrk3 -->
|
||||
<!ENTITY Chi "Χ"> <!-- greek capital letter chi, U+03A7 -->
|
||||
<!ENTITY Psi "Ψ"> <!-- greek capital letter psi,
|
||||
U+03A8 ISOgrk3 -->
|
||||
<!ENTITY Omega "Ω"> <!-- greek capital letter omega,
|
||||
U+03A9 ISOgrk3 -->
|
||||
|
||||
<!ENTITY alpha "α"> <!-- greek small letter alpha,
|
||||
U+03B1 ISOgrk3 -->
|
||||
<!ENTITY beta "β"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
|
||||
<!ENTITY gamma "γ"> <!-- greek small letter gamma,
|
||||
U+03B3 ISOgrk3 -->
|
||||
<!ENTITY delta "δ"> <!-- greek small letter delta,
|
||||
U+03B4 ISOgrk3 -->
|
||||
<!ENTITY epsilon "ε"> <!-- greek small letter epsilon,
|
||||
U+03B5 ISOgrk3 -->
|
||||
<!ENTITY zeta "ζ"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
|
||||
<!ENTITY eta "η"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
|
||||
<!ENTITY theta "θ"> <!-- greek small letter theta,
|
||||
U+03B8 ISOgrk3 -->
|
||||
<!ENTITY iota "ι"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
|
||||
<!ENTITY kappa "κ"> <!-- greek small letter kappa,
|
||||
U+03BA ISOgrk3 -->
|
||||
<!ENTITY lambda "λ"> <!-- greek small letter lamda,
|
||||
U+03BB ISOgrk3 -->
|
||||
<!ENTITY mu "μ"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
|
||||
<!ENTITY nu "ν"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
|
||||
<!ENTITY xi "ξ"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
|
||||
<!ENTITY omicron "ο"> <!-- greek small letter omicron, U+03BF NEW -->
|
||||
<!ENTITY pi "π"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
|
||||
<!ENTITY rho "ρ"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
|
||||
<!ENTITY sigmaf "ς"> <!-- greek small letter final sigma,
|
||||
U+03C2 ISOgrk3 -->
|
||||
<!ENTITY sigma "σ"> <!-- greek small letter sigma,
|
||||
U+03C3 ISOgrk3 -->
|
||||
<!ENTITY tau "τ"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
|
||||
<!ENTITY upsilon "υ"> <!-- greek small letter upsilon,
|
||||
U+03C5 ISOgrk3 -->
|
||||
<!ENTITY phi "φ"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
|
||||
<!ENTITY chi "χ"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
|
||||
<!ENTITY psi "ψ"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
|
||||
<!ENTITY omega "ω"> <!-- greek small letter omega,
|
||||
U+03C9 ISOgrk3 -->
|
||||
<!ENTITY thetasym "ϑ"> <!-- greek theta symbol,
|
||||
U+03D1 NEW -->
|
||||
<!ENTITY upsih "ϒ"> <!-- greek upsilon with hook symbol,
|
||||
U+03D2 NEW -->
|
||||
<!ENTITY piv "ϖ"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
|
||||
|
||||
<!-- General Punctuation -->
|
||||
<!ENTITY bull "•"> <!-- bullet = black small circle,
|
||||
U+2022 ISOpub -->
|
||||
<!-- bullet is NOT the same as bullet operator, U+2219 -->
|
||||
<!ENTITY hellip "…"> <!-- horizontal ellipsis = three dot leader,
|
||||
U+2026 ISOpub -->
|
||||
<!ENTITY prime "′"> <!-- prime = minutes = feet, U+2032 ISOtech -->
|
||||
<!ENTITY Prime "″"> <!-- double prime = seconds = inches,
|
||||
U+2033 ISOtech -->
|
||||
<!ENTITY oline "‾"> <!-- overline = spacing overscore,
|
||||
U+203E NEW -->
|
||||
<!ENTITY frasl "⁄"> <!-- fraction slash, U+2044 NEW -->
|
||||
|
||||
<!-- Letterlike Symbols -->
|
||||
<!ENTITY weierp "℘"> <!-- script capital P = power set
|
||||
= Weierstrass p, U+2118 ISOamso -->
|
||||
<!ENTITY image "ℑ"> <!-- black-letter capital I = imaginary part,
|
||||
U+2111 ISOamso -->
|
||||
<!ENTITY real "ℜ"> <!-- black-letter capital R = real part symbol,
|
||||
U+211C ISOamso -->
|
||||
<!ENTITY trade "™"> <!-- trade mark sign, U+2122 ISOnum -->
|
||||
<!ENTITY alefsym "ℵ"> <!-- alef symbol = first transfinite cardinal,
|
||||
U+2135 NEW -->
|
||||
<!-- alef symbol is NOT the same as hebrew letter alef,
|
||||
U+05D0 although the same glyph could be used to depict both characters -->
|
||||
|
||||
<!-- Arrows -->
|
||||
<!ENTITY larr "←"> <!-- leftwards arrow, U+2190 ISOnum -->
|
||||
<!ENTITY uarr "↑"> <!-- upwards arrow, U+2191 ISOnum-->
|
||||
<!ENTITY rarr "→"> <!-- rightwards arrow, U+2192 ISOnum -->
|
||||
<!ENTITY darr "↓"> <!-- downwards arrow, U+2193 ISOnum -->
|
||||
<!ENTITY harr "↔"> <!-- left right arrow, U+2194 ISOamsa -->
|
||||
<!ENTITY crarr "↵"> <!-- downwards arrow with corner leftwards
|
||||
= carriage return, U+21B5 NEW -->
|
||||
<!ENTITY lArr "⇐"> <!-- leftwards double arrow, U+21D0 ISOtech -->
|
||||
<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
|
||||
but also does not have any other character for that function. So lArr can
|
||||
be used for 'is implied by' as ISOtech suggests -->
|
||||
<!ENTITY uArr "⇑"> <!-- upwards double arrow, U+21D1 ISOamsa -->
|
||||
<!ENTITY rArr "⇒"> <!-- rightwards double arrow,
|
||||
U+21D2 ISOtech -->
|
||||
<!-- Unicode does not say this is the 'implies' character but does not have
|
||||
another character with this function so rArr can be used for 'implies'
|
||||
as ISOtech suggests -->
|
||||
<!ENTITY dArr "⇓"> <!-- downwards double arrow, U+21D3 ISOamsa -->
|
||||
<!ENTITY hArr "⇔"> <!-- left right double arrow,
|
||||
U+21D4 ISOamsa -->
|
||||
|
||||
<!-- Mathematical Operators -->
|
||||
<!ENTITY forall "∀"> <!-- for all, U+2200 ISOtech -->
|
||||
<!ENTITY part "∂"> <!-- partial differential, U+2202 ISOtech -->
|
||||
<!ENTITY exist "∃"> <!-- there exists, U+2203 ISOtech -->
|
||||
<!ENTITY empty "∅"> <!-- empty set = null set, U+2205 ISOamso -->
|
||||
<!ENTITY nabla "∇"> <!-- nabla = backward difference,
|
||||
U+2207 ISOtech -->
|
||||
<!ENTITY isin "∈"> <!-- element of, U+2208 ISOtech -->
|
||||
<!ENTITY notin "∉"> <!-- not an element of, U+2209 ISOtech -->
|
||||
<!ENTITY ni "∋"> <!-- contains as member, U+220B ISOtech -->
|
||||
<!ENTITY prod "∏"> <!-- n-ary product = product sign,
|
||||
U+220F ISOamsb -->
|
||||
<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
|
||||
the same glyph might be used for both -->
|
||||
<!ENTITY sum "∑"> <!-- n-ary summation, U+2211 ISOamsb -->
|
||||
<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
|
||||
though the same glyph might be used for both -->
|
||||
<!ENTITY minus "−"> <!-- minus sign, U+2212 ISOtech -->
|
||||
<!ENTITY lowast "∗"> <!-- asterisk operator, U+2217 ISOtech -->
|
||||
<!ENTITY radic "√"> <!-- square root = radical sign,
|
||||
U+221A ISOtech -->
|
||||
<!ENTITY prop "∝"> <!-- proportional to, U+221D ISOtech -->
|
||||
<!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech -->
|
||||
<!ENTITY ang "∠"> <!-- angle, U+2220 ISOamso -->
|
||||
<!ENTITY and "∧"> <!-- logical and = wedge, U+2227 ISOtech -->
|
||||
<!ENTITY or "∨"> <!-- logical or = vee, U+2228 ISOtech -->
|
||||
<!ENTITY cap "∩"> <!-- intersection = cap, U+2229 ISOtech -->
|
||||
<!ENTITY cup "∪"> <!-- union = cup, U+222A ISOtech -->
|
||||
<!ENTITY int "∫"> <!-- integral, U+222B ISOtech -->
|
||||
<!ENTITY there4 "∴"> <!-- therefore, U+2234 ISOtech -->
|
||||
<!ENTITY sim "∼"> <!-- tilde operator = varies with = similar to,
|
||||
U+223C ISOtech -->
|
||||
<!-- tilde operator is NOT the same character as the tilde, U+007E,
|
||||
although the same glyph might be used to represent both -->
|
||||
<!ENTITY cong "≅"> <!-- approximately equal to, U+2245 ISOtech -->
|
||||
<!ENTITY asymp "≈"> <!-- almost equal to = asymptotic to,
|
||||
U+2248 ISOamsr -->
|
||||
<!ENTITY ne "≠"> <!-- not equal to, U+2260 ISOtech -->
|
||||
<!ENTITY equiv "≡"> <!-- identical to, U+2261 ISOtech -->
|
||||
<!ENTITY le "≤"> <!-- less-than or equal to, U+2264 ISOtech -->
|
||||
<!ENTITY ge "≥"> <!-- greater-than or equal to,
|
||||
U+2265 ISOtech -->
|
||||
<!ENTITY sub "⊂"> <!-- subset of, U+2282 ISOtech -->
|
||||
<!ENTITY sup "⊃"> <!-- superset of, U+2283 ISOtech -->
|
||||
<!ENTITY nsub "⊄"> <!-- not a subset of, U+2284 ISOamsn -->
|
||||
<!ENTITY sube "⊆"> <!-- subset of or equal to, U+2286 ISOtech -->
|
||||
<!ENTITY supe "⊇"> <!-- superset of or equal to,
|
||||
U+2287 ISOtech -->
|
||||
<!ENTITY oplus "⊕"> <!-- circled plus = direct sum,
|
||||
U+2295 ISOamsb -->
|
||||
<!ENTITY otimes "⊗"> <!-- circled times = vector product,
|
||||
U+2297 ISOamsb -->
|
||||
<!ENTITY perp "⊥"> <!-- up tack = orthogonal to = perpendicular,
|
||||
U+22A5 ISOtech -->
|
||||
<!ENTITY sdot "⋅"> <!-- dot operator, U+22C5 ISOamsb -->
|
||||
<!-- dot operator is NOT the same character as U+00B7 middle dot -->
|
||||
|
||||
<!-- Miscellaneous Technical -->
|
||||
<!ENTITY lceil "⌈"> <!-- left ceiling = APL upstile,
|
||||
U+2308 ISOamsc -->
|
||||
<!ENTITY rceil "⌉"> <!-- right ceiling, U+2309 ISOamsc -->
|
||||
<!ENTITY lfloor "⌊"> <!-- left floor = APL downstile,
|
||||
U+230A ISOamsc -->
|
||||
<!ENTITY rfloor "⌋"> <!-- right floor, U+230B ISOamsc -->
|
||||
<!ENTITY lang "〈"> <!-- left-pointing angle bracket = bra,
|
||||
U+2329 ISOtech -->
|
||||
<!-- lang is NOT the same character as U+003C 'less than sign'
|
||||
or U+2039 'single left-pointing angle quotation mark' -->
|
||||
<!ENTITY rang "〉"> <!-- right-pointing angle bracket = ket,
|
||||
U+232A ISOtech -->
|
||||
<!-- rang is NOT the same character as U+003E 'greater than sign'
|
||||
or U+203A 'single right-pointing angle quotation mark' -->
|
||||
|
||||
<!-- Geometric Shapes -->
|
||||
<!ENTITY loz "◊"> <!-- lozenge, U+25CA ISOpub -->
|
||||
|
||||
<!-- Miscellaneous Symbols -->
|
||||
<!ENTITY spades "♠"> <!-- black spade suit, U+2660 ISOpub -->
|
||||
<!-- black here seems to mean filled as opposed to hollow -->
|
||||
<!ENTITY clubs "♣"> <!-- black club suit = shamrock,
|
||||
U+2663 ISOpub -->
|
||||
<!ENTITY hearts "♥"> <!-- black heart suit = valentine,
|
||||
U+2665 ISOpub -->
|
||||
<!ENTITY diams "♦"> <!-- black diamond suit, U+2666 ISOpub -->
|
|
@ -6,22 +6,27 @@ IN: xml.errors.tests
|
|||
'[ _ string>xml ] swap '[ _ = ] must-fail-with ;
|
||||
|
||||
T{ no-entity f 1 10 "nbsp" } "<x> </x>" xml-error-test
|
||||
T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" }
|
||||
} "<x></y>" xml-error-test
|
||||
T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } }
|
||||
"<x></y>" xml-error-test
|
||||
T{ unclosed f 1 4 V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
|
||||
T{ nonexist-ns f 1 5 "x" } "<x:y/>" xml-error-test
|
||||
T{ unopened f 1 5 } "</x>" xml-error-test
|
||||
T{ not-yes/no f 1 41 "maybe" } "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
|
||||
T{ not-yes/no f 1 41 "maybe" }
|
||||
"<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
|
||||
T{ extra-attrs f 1 32 V{ T{ name f "" "foo" f } }
|
||||
} "<?xml version='1.1' foo='bar'?><x/>" xml-error-test
|
||||
T{ bad-version f 1 28 "5 million" } "<?xml version='5 million'?><x/>" xml-error-test
|
||||
T{ bad-version f 1 28 "5 million" }
|
||||
"<?xml version='5 million'?><x/>" xml-error-test
|
||||
T{ notags f } "" xml-error-test
|
||||
T{ multitags } "<x/><y/>" xml-error-test
|
||||
T{ bad-prolog f 1 26 T{ prolog f "1.0" "UTF-8" f }
|
||||
} "<x/><?xml version='1.0'?>" xml-error-test
|
||||
T{ bad-prolog f 1 26 T{ prolog f "1.0" "UTF-8" f } }
|
||||
"<x/><?xml version='1.0'?>" xml-error-test
|
||||
T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
|
||||
xml-error-test
|
||||
xml-error-test
|
||||
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
|
||||
T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
|
||||
T{ bad-instruction f 1 11 T{ instruction f "xsl" }
|
||||
} "<x><?xsl?></x>" xml-error-test
|
||||
T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
|
||||
"<x><?xsl?></x>" xml-error-test
|
||||
T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
|
||||
T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
|
||||
T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
|
|
@ -32,17 +32,6 @@ M: no-entity summary ( obj -- str )
|
|||
"Entity does not exist: &" write thing>> write ";" print
|
||||
] with-string-writer ;
|
||||
|
||||
TUPLE: xml-string-error < parsing-error string ; ! this should not exist
|
||||
|
||||
: xml-string-error ( string -- * )
|
||||
\ xml-string-error parsing-error swap >>string throw ;
|
||||
|
||||
M: xml-string-error summary ( obj -- str )
|
||||
[
|
||||
dup call-next-method write
|
||||
string>> print
|
||||
] with-string-writer ;
|
||||
|
||||
TUPLE: mismatched < parsing-error open close ;
|
||||
|
||||
: mismatched ( open close -- * )
|
||||
|
@ -233,7 +222,34 @@ M: misplaced-directive summary ( obj -- str )
|
|||
dir>> write-xml-chunk nl
|
||||
] with-string-writer ;
|
||||
|
||||
TUPLE: bad-name < parsing-error name ;
|
||||
|
||||
: bad-name ( name -- * )
|
||||
\ bad-name parsing-error swap >>name throw ;
|
||||
|
||||
M: bad-name summary ( obj -- str )
|
||||
[ call-next-method ]
|
||||
[ "Invalid name: " swap name>> "\n" 3append ]
|
||||
bi append ;
|
||||
|
||||
TUPLE: unclosed-quote < parsing-error ;
|
||||
|
||||
: unclosed-quote ( -- * )
|
||||
\ unclosed-quote parsing-error throw ;
|
||||
|
||||
M: unclosed-quote summary
|
||||
call-next-method
|
||||
"XML document ends with quote still open\n" append ;
|
||||
|
||||
TUPLE: quoteless-attr < parsing-error ;
|
||||
|
||||
: quoteless-attr ( -- * )
|
||||
\ quoteless-attr parsing-error throw ;
|
||||
|
||||
M: quoteless-attr summary
|
||||
call-next-method "Attribute lacks quotes around value\n" append ;
|
||||
|
||||
UNION: xml-parse-error multitags notags extra-attrs nonexist-ns
|
||||
not-yes/no unclosed mismatched xml-string-error expected no-entity
|
||||
not-yes/no unclosed mismatched expected no-entity
|
||||
bad-prolog versionless-prolog capitalized-prolog bad-instruction
|
||||
bad-directive ;
|
||||
bad-directive bad-name unclosed-quote quoteless-attr ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ASCII'?><x>e</x>
|
|
@ -0,0 +1,14 @@
|
|||
USING: xml xml.data xml.utilities tools.test accessors kernel ;
|
||||
|
||||
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/spaces.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
||||
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ISO-8859-1'?><x>é</x>
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ISO-8859-9'?><x>ý</x>
|
|
@ -0,0 +1 @@
|
|||
<x>é</x>
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
|
||||
<x>é</x>
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: xml.tests
|
||||
USING: kernel xml tools.test io namespaces make sequences
|
||||
xml.errors xml.entities parser strings xml.data io.files
|
||||
xml.errors xml.entities.html parser strings xml.data io.files
|
||||
xml.writer xml.utilities state-parser continuations assocs
|
||||
sequences.deep accessors io.streams.string ;
|
||||
|
||||
|
@ -53,12 +53,15 @@ SYMBOL: xml-file
|
|||
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
|
||||
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
||||
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
|
||||
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
|
||||
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
|
||||
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
|
|
@ -0,0 +1 @@
|
|||
<é>x</é>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?><x/>
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?><x>é</x>
|
|
@ -1,9 +1,10 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml.errors xml.data xml.utilities xml.char-classes sets
|
||||
xml.entities kernel state-parser kernel namespaces make strings
|
||||
math math.parser sequences assocs arrays splitting combinators
|
||||
unicode.case accessors fry ascii ;
|
||||
USING: accessors arrays ascii assocs combinators
|
||||
combinators.short-circuit fry io.encodings io.encodings.iana
|
||||
io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
|
||||
math math.parser namespaces sequences sets splitting state-parser
|
||||
strings xml.char-classes xml.data xml.entities xml.errors hashtables ;
|
||||
IN: xml.tokenize
|
||||
|
||||
! XML namespace processing: ns = namespace
|
||||
|
@ -53,34 +54,37 @@ SYMBOL: ns-stack
|
|||
|
||||
! version=1.0? is calculated once and passed around for efficiency
|
||||
|
||||
: (parse-name) ( -- str )
|
||||
version=1.0? dup
|
||||
get-char name-start? [
|
||||
[ dup get-char name-char? not ] take-until nip
|
||||
] [
|
||||
"Malformed name" xml-string-error
|
||||
] if ;
|
||||
: assure-name ( str version=1.0? -- str )
|
||||
over {
|
||||
[ first name-start? ]
|
||||
[ rest-slice [ name-char? ] with all? ]
|
||||
} 2&& [ bad-name ] unless ;
|
||||
|
||||
: (parse-name) ( start -- str )
|
||||
version=1.0?
|
||||
[ [ get-char name-char? not ] curry take-until append ]
|
||||
[ assure-name ] bi ;
|
||||
|
||||
: parse-name-starting ( start -- name )
|
||||
(parse-name) get-char CHAR: : =
|
||||
[ next "" (parse-name) ] [ "" swap ] if f <name> ;
|
||||
|
||||
: parse-name ( -- name )
|
||||
(parse-name) get-char CHAR: : =
|
||||
[ next (parse-name) ] [ "" swap ] if f <name> ;
|
||||
"" parse-name-starting ;
|
||||
|
||||
! -- Parsing strings
|
||||
|
||||
: (parse-entity) ( string -- )
|
||||
: parse-named-entity ( string -- )
|
||||
dup entities at [ , ] [
|
||||
prolog-data get standalone>>
|
||||
[ no-entity ] [
|
||||
dup extra-entities get at
|
||||
[ , ] [ no-entity ] ?if
|
||||
] if
|
||||
dup extra-entities get at
|
||||
[ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish
|
||||
] ?if ;
|
||||
|
||||
: parse-entity ( -- )
|
||||
next CHAR: ; take-char next
|
||||
"#" ?head [
|
||||
"x" ?head 16 10 ? base> ,
|
||||
] [ (parse-entity) ] if ;
|
||||
] [ parse-named-entity ] if ;
|
||||
|
||||
: (parse-char) ( ch -- )
|
||||
get-char {
|
||||
|
@ -93,13 +97,9 @@ SYMBOL: ns-stack
|
|||
: parse-char ( ch -- string )
|
||||
[ (parse-char) ] "" make ;
|
||||
|
||||
: parse-quot ( ch -- string )
|
||||
parse-char get-char
|
||||
[ "XML file ends in a quote" xml-string-error ] unless ;
|
||||
|
||||
: parse-text ( -- string )
|
||||
CHAR: < parse-char ;
|
||||
|
||||
|
||||
! Parsing tags
|
||||
|
||||
: start-tag ( -- name ? )
|
||||
|
@ -107,17 +107,18 @@ SYMBOL: ns-stack
|
|||
get-char CHAR: / = dup [ next ] when
|
||||
parse-name swap ;
|
||||
|
||||
: parse-attr-value ( -- seq )
|
||||
get-char dup "'\"" member? [
|
||||
next parse-quot
|
||||
] [
|
||||
"Attribute lacks quote" xml-string-error
|
||||
] if ;
|
||||
: (parse-quote) ( ch -- string )
|
||||
parse-char get-char
|
||||
[ unclosed-quote ] unless ;
|
||||
|
||||
: parse-quote ( -- seq )
|
||||
pass-blank get-char dup "'\"" member?
|
||||
[ next (parse-quote) ] [ quoteless-attr ] if ;
|
||||
|
||||
: parse-attr ( -- )
|
||||
[ parse-name ] with-scope
|
||||
pass-blank CHAR: = expect pass-blank
|
||||
[ parse-attr-value ] with-scope
|
||||
parse-name
|
||||
pass-blank CHAR: = expect
|
||||
parse-quote
|
||||
2array , ;
|
||||
|
||||
: (middle-tag) ( -- )
|
||||
|
@ -153,7 +154,7 @@ SYMBOL: ns-stack
|
|||
: only-blanks ( str -- )
|
||||
[ blank? ] all? [ bad-doctype-decl ] unless ;
|
||||
|
||||
: take-system-literal ( -- str )
|
||||
: take-system-literal ( -- str ) ! replace with parse-quote?
|
||||
pass-blank get-char next {
|
||||
{ CHAR: ' [ "'" take-string ] }
|
||||
{ CHAR: " [ "\"" take-string ] }
|
||||
|
@ -207,15 +208,18 @@ DEFER: direct
|
|||
|
||||
: take-entity-def ( -- entity-name entity-def )
|
||||
" " take-string pass-blank get-char {
|
||||
{ CHAR: ' [ take-system-literal ] }
|
||||
{ CHAR: " [ take-system-literal ] }
|
||||
{ CHAR: ' [ parse-quote ] }
|
||||
{ CHAR: " [ parse-quote ] }
|
||||
[ drop take-external-id ]
|
||||
} case ;
|
||||
|
||||
: associate-entity ( entity-name entity-def -- )
|
||||
swap extra-entities [ ?set-at ] change ;
|
||||
|
||||
: take-entity-decl ( -- entity-decl )
|
||||
pass-blank get-char {
|
||||
{ CHAR: % [ next pass-blank take-entity-def ] }
|
||||
[ drop take-entity-def ]
|
||||
[ drop take-entity-def 2dup associate-entity ]
|
||||
} case
|
||||
">" take-string only-blanks <entity-decl> ;
|
||||
|
||||
|
@ -253,22 +257,36 @@ DEFER: direct
|
|||
: good-version ( version -- version )
|
||||
dup { "1.0" "1.1" } member? [ bad-version ] unless ;
|
||||
|
||||
: prolog-attrs ( alist -- prolog )
|
||||
[ T{ name f "" "version" f } swap at
|
||||
[ good-version ] [ versionless-prolog ] if* ] keep
|
||||
[ T{ name f "" "encoding" f } swap at
|
||||
"UTF-8" or ] keep
|
||||
: prolog-version ( alist -- version )
|
||||
T{ name f "" "version" f } swap at
|
||||
[ good-version ] [ versionless-prolog ] if* ;
|
||||
|
||||
: prolog-encoding ( alist -- encoding )
|
||||
T{ name f "" "encoding" f } swap at "UTF-8" or ;
|
||||
|
||||
: prolog-standalone ( alist -- version )
|
||||
T{ name f "" "standalone" f } swap at
|
||||
[ yes/no>bool ] [ f ] if*
|
||||
<prolog> ;
|
||||
[ yes/no>bool ] [ f ] if* ;
|
||||
|
||||
: prolog-attrs ( alist -- prolog )
|
||||
[ prolog-version ]
|
||||
[ prolog-encoding ]
|
||||
[ prolog-standalone ]
|
||||
tri <prolog> ;
|
||||
|
||||
SYMBOL: string-input?
|
||||
: decode-input-if ( encoding -- )
|
||||
string-input? get [ drop ] [ decode-input ] if ;
|
||||
|
||||
: parse-prolog ( -- prolog )
|
||||
pass-blank middle-tag "?>" expect-string
|
||||
dup assure-no-extra prolog-attrs
|
||||
dup encoding>> dup "UTF-16" =
|
||||
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
|
||||
dup prolog-data set ;
|
||||
|
||||
: instruct ( -- instruction )
|
||||
(parse-name) dup "xml" =
|
||||
"" (parse-name) dup "xml" =
|
||||
[ drop parse-prolog ] [
|
||||
dup >lower "xml" =
|
||||
[ capitalized-prolog ]
|
||||
|
@ -278,10 +296,69 @@ DEFER: direct
|
|||
: make-tag ( -- tag )
|
||||
{
|
||||
{ [ get-char dup CHAR: ! = ] [ drop next direct ] }
|
||||
{ [ CHAR: ? = ] [ next instruct ] }
|
||||
{ [ CHAR: ? = ] [ next instruct ] }
|
||||
[
|
||||
start-tag [ dup add-ns pop-ns <closer> ]
|
||||
[ middle-tag end-tag ] if
|
||||
CHAR: > expect
|
||||
]
|
||||
} cond ;
|
||||
|
||||
! Autodetecting encodings
|
||||
|
||||
: continue-make-tag ( str -- tag )
|
||||
parse-name-starting middle-tag end-tag CHAR: > expect ;
|
||||
|
||||
: start-utf16le ( -- tag )
|
||||
utf16le decode-input-if
|
||||
CHAR: ? expect
|
||||
0 expect instruct ;
|
||||
|
||||
: 10xxxxxx? ( ch -- ? )
|
||||
-6 shift 3 bitand 2 = ;
|
||||
|
||||
: start<name ( ch -- tag )
|
||||
ascii?
|
||||
[ utf8 decode-input-if next make-tag ] [
|
||||
next
|
||||
[ get-next 10xxxxxx? not ] take-until
|
||||
get-char suffix utf8 decode
|
||||
utf8 decode-input-if next
|
||||
continue-make-tag
|
||||
] if ;
|
||||
|
||||
: start< ( -- tag )
|
||||
get-next {
|
||||
{ 0 [ next next start-utf16le ] }
|
||||
{ CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
|
||||
{ CHAR: ! [ utf8 decode-input next next direct ] }
|
||||
[ start<name ]
|
||||
} case ;
|
||||
|
||||
: skip-utf8-bom ( -- tag )
|
||||
"\u0000bb\u0000bf" expect utf8 decode-input
|
||||
CHAR: < expect make-tag ;
|
||||
|
||||
: decode-expecting ( encoding string -- tag )
|
||||
[ decode-input-if next ] [ expect-string ] bi* make-tag ;
|
||||
|
||||
: start-utf16be ( -- tag )
|
||||
utf16be "<" decode-expecting ;
|
||||
|
||||
: skip-utf16le-bom ( -- tag )
|
||||
utf16le "\u0000fe<" decode-expecting ;
|
||||
|
||||
: skip-utf16be-bom ( -- tag )
|
||||
utf16be "\u0000ff<" decode-expecting ;
|
||||
|
||||
: start-document ( -- tag )
|
||||
get-char {
|
||||
{ CHAR: < [ start< ] }
|
||||
{ 0 [ start-utf16be ] }
|
||||
{ HEX: EF [ skip-utf8-bom ] }
|
||||
{ HEX: FF [ skip-utf16le-bom ] }
|
||||
{ HEX: FE [ skip-utf16be-bom ] }
|
||||
{ f [ "" ] }
|
||||
[ drop utf8 decode-input-if f ]
|
||||
! Same problem as with <e`>, in the case of XML chunks?
|
||||
} case ;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax kernel xml.data xml.errors
|
||||
xml.writer state-parser xml.tokenize xml.utilities xml.entities
|
||||
strings sequences io ;
|
||||
strings sequences io xml.entities.html ;
|
||||
IN: xml
|
||||
|
||||
HELP: string>xml
|
||||
|
@ -295,9 +295,6 @@ HELP: expected
|
|||
HELP: no-entity
|
||||
{ $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link parsing-error } ". Contains one slot, thing, containing a string representing the entity." } ;
|
||||
|
||||
HELP: xml-string-error
|
||||
{ $class-description "XML parsing error that delegates to " { $link parsing-error } " and represents an other, unspecified error, which is represented by the slot string, containing a string describing the error." } ;
|
||||
|
||||
HELP: open-tag
|
||||
{ $class-description "represents a tag that does have children, ie is not a contained tag" }
|
||||
{ $notes "the constructor used for this class is simply " { $link <tag> } "." }
|
||||
|
@ -324,6 +321,15 @@ HELP: state-parse
|
|||
HELP: pre/post-content
|
||||
{ $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;
|
||||
|
||||
HELP: unclosed-quote
|
||||
{ $class-description "describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;
|
||||
|
||||
HELP: bad-name
|
||||
{ $class-description "describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;
|
||||
|
||||
HELP: quoteless-attr
|
||||
{ $class-description "describes the error where an attribute of an XML tag is missing quotes around a value." } ;
|
||||
|
||||
HELP: entities
|
||||
{ $description "a hash table from default XML entity names (like & and <) to the characters they represent. This is automatically included when parsing any XML document." }
|
||||
{ $see-also html-entities } ;
|
||||
|
@ -444,6 +450,9 @@ ARTICLE: { "xml" "errors" } "XML parsing errors"
|
|||
{ $subsection expected }
|
||||
{ $subsection no-entity }
|
||||
{ $subsection pre/post-content }
|
||||
{ $subsection unclosed-quote }
|
||||
{ $subsection bad-name }
|
||||
{ $subsection quoteless-attr }
|
||||
"Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information"
|
||||
$nl
|
||||
"Note that, in parsing an XML document, only the first error is reported." ;
|
||||
|
@ -456,7 +465,7 @@ ARTICLE: { "xml" "entities" } "XML entities"
|
|||
{ $subsection with-html-entities } ;
|
||||
|
||||
ARTICLE: "xml" "XML parser"
|
||||
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.1 standard, converting strings of text into XML and vice versa."
|
||||
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa."
|
||||
{ $subsection { "xml" "reading" } }
|
||||
{ $subsection { "xml" "writing" } }
|
||||
{ $subsection { "xml" "classes" } }
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.streams.string io.files kernel math namespaces
|
||||
prettyprint sequences arrays generic strings vectors
|
||||
xml.char-classes xml.data xml.errors xml.tokenize xml.writer
|
||||
xml.utilities state-parser assocs ascii io.encodings.utf8
|
||||
accessors xml.backend ;
|
||||
USING: accessors arrays io io.encodings.binary io.files
|
||||
io.streams.string kernel namespaces sequences state-parser strings
|
||||
xml.backend xml.data xml.errors xml.tokenize ascii
|
||||
xml.writer ;
|
||||
IN: xml
|
||||
|
||||
! -- Overall parser with data tree
|
||||
|
@ -23,7 +22,7 @@ GENERIC: process ( object -- )
|
|||
M: object process add-child ;
|
||||
|
||||
M: prolog process
|
||||
xml-stack get V{ { f V{ "" } } } =
|
||||
xml-stack get V{ { f V{ } } } =
|
||||
[ bad-prolog ] unless drop ;
|
||||
|
||||
M: instruction process
|
||||
|
@ -101,6 +100,7 @@ TUPLE: pull-xml scope ;
|
|||
text-now? on
|
||||
] H{ } make-assoc
|
||||
pull-xml boa ;
|
||||
! pull-xml needs to call start-document somewhere
|
||||
|
||||
: pull-event ( pull -- xml-event/f )
|
||||
scope>> [
|
||||
|
@ -133,11 +133,12 @@ TUPLE: pull-xml scope ;
|
|||
: sax ( stream quot: ( xml-elem -- ) -- )
|
||||
swap [
|
||||
reset-prolog init-ns-stack
|
||||
prolog-data get call-under
|
||||
start-document [ call-under ] when*
|
||||
sax-loop
|
||||
] state-parse ; inline recursive
|
||||
|
||||
: (read-xml) ( -- )
|
||||
start-document [ process ] when*
|
||||
[ process ] sax-loop ; inline
|
||||
|
||||
: (read-xml-chunk) ( stream -- prolog seq )
|
||||
|
@ -159,11 +160,12 @@ TUPLE: pull-xml scope ;
|
|||
<string-reader> read-xml ;
|
||||
|
||||
: string>xml-chunk ( string -- xml )
|
||||
<string-reader> read-xml-chunk ;
|
||||
t string-input?
|
||||
[ <string-reader> read-xml-chunk ] with-variable ;
|
||||
|
||||
: file>xml ( filename -- xml )
|
||||
! Autodetect encoding!
|
||||
utf8 <file-reader> read-xml ;
|
||||
binary <file-reader> read-xml ;
|
||||
|
||||
: xml-reprint ( string -- )
|
||||
string>xml print-xml ;
|
||||
|
|
|
@ -141,7 +141,7 @@ PRIVATE>
|
|||
|
||||
: fuel-get-article ( name -- ) article fuel-eval-set-result ;
|
||||
|
||||
MEMO: fuel-get-article-title ( name -- )
|
||||
: fuel-get-article-title ( name -- )
|
||||
articles get at [ article-title ] [ f ] if* fuel-eval-set-result ;
|
||||
|
||||
: fuel-word-help ( name -- ) (fuel-word-help) fuel-eval-set-result ;
|
||||
|
|
|
@ -18,16 +18,11 @@ beast.
|
|||
|
||||
(load-file "<path/to/factor/installation>/misc/fuel/fu.el")
|
||||
|
||||
or
|
||||
|
||||
(add-to-list load-path "<path/to/factor/installation>/fuel")
|
||||
(require 'fuel)
|
||||
|
||||
If all you want is a major mode for editing Factor code with pretty
|
||||
font colors and indentation, without running the factor listener
|
||||
inside Emacs, you can use instead:
|
||||
|
||||
(add-to-list load-path "<path/to/factor/installation>/fuel")
|
||||
(add-to-list 'load-path "<path/to/factor/installation>/fuel")
|
||||
(setq factor-mode-use-fuel nil)
|
||||
(require 'factor-mode)
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
((listp sexp)
|
||||
(case (car sexp)
|
||||
(:array (factor--seq 'V{ '} (cdr sexp)))
|
||||
(:seq (factor--seq '{ '} (cdr sexp)))
|
||||
(:quote (format "\\ %s" (factor `(:factor ,(cadr sexp)))))
|
||||
(:quotation (factor--seq '\[ '\] (cdr sexp)))
|
||||
(:using (factor `(USING: ,@(cdr sexp) :end)))
|
||||
|
|
|
@ -72,11 +72,21 @@
|
|||
|
||||
;;; Font lock:
|
||||
|
||||
(defun fuel-font-lock--syntactic-face (state)
|
||||
(cond ((nth 3 state) 'factor-font-lock-string)
|
||||
((char-equal (char-after (nth 8 state)) ?\ )
|
||||
(save-excursion
|
||||
(goto-char (nth 8 state))
|
||||
(beginning-of-line)
|
||||
(cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name)
|
||||
((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ")
|
||||
'factor-font-lock-symbol)
|
||||
(t 'default))))
|
||||
(t 'factor-font-lock-comment)))
|
||||
|
||||
(defconst fuel-font-lock--font-lock-keywords
|
||||
`((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect)
|
||||
(,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)
|
||||
(,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word)
|
||||
("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
|
||||
(,fuel-syntax--vocab-ref-regexp 2 'factor-font-lock-vocabulary-name)
|
||||
(,fuel-syntax--declaration-words-regex . 'factor-font-lock-declaration)
|
||||
(,fuel-syntax--word-definition-regex 2 'factor-font-lock-word)
|
||||
|
@ -89,24 +99,26 @@
|
|||
(,fuel-syntax--type-definition-regex 2 'factor-font-lock-type-name)
|
||||
(,fuel-syntax--method-definition-regex (1 'factor-font-lock-type-name)
|
||||
(2 'factor-font-lock-word))
|
||||
(,fuel-syntax--parent-type-regex 2 'factor-font-lock-type-name)
|
||||
(,fuel-syntax--tuple-decl-regex 2 'factor-font-lock-type-name)
|
||||
(,fuel-syntax--constructor-regex . 'factor-font-lock-constructor)
|
||||
(,fuel-syntax--setter-regex . 'factor-font-lock-setter-word)
|
||||
(,fuel-syntax--getter-regex . 'factor-font-lock-getter-word)
|
||||
(,fuel-syntax--symbol-definition-regex 2 'factor-font-lock-symbol)
|
||||
(,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)))
|
||||
(,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)
|
||||
("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
|
||||
(,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)))
|
||||
|
||||
(defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax)
|
||||
(set (make-local-variable 'comment-start) "! ")
|
||||
(set (make-local-variable 'parse-sexp-lookup-properties) t)
|
||||
(set (make-local-variable 'font-lock-comment-face) 'factor-font-lock-comment)
|
||||
(set (make-local-variable 'font-lock-string-face) 'factor-font-lock-string)
|
||||
(set (make-local-variable 'font-lock-defaults)
|
||||
`(,(or keywords 'fuel-font-lock--font-lock-keywords)
|
||||
nil nil nil nil
|
||||
,@(if no-syntax nil
|
||||
(list (cons 'font-lock-syntactic-keywords
|
||||
fuel-syntax--syntactic-keywords))))))
|
||||
fuel-syntax--syntactic-keywords)
|
||||
(cons 'font-lock-syntactic-face-function
|
||||
'fuel-font-lock--syntactic-face))))))
|
||||
|
||||
|
||||
;;; Fontify strings as Factor code:
|
||||
|
|
|
@ -137,7 +137,8 @@
|
|||
|
||||
(defun fuel-help--get-article (name label)
|
||||
(message "Retrieving article ...")
|
||||
(let* ((cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
|
||||
(let* ((name (if (listp name) (cons :seq name) name))
|
||||
(cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
|
||||
(ret (fuel-eval--send/wait cmd))
|
||||
(res (fuel-eval--retort-result ret)))
|
||||
(if (not res)
|
||||
|
|
|
@ -31,7 +31,12 @@
|
|||
:group 'fuel)
|
||||
|
||||
(defcustom fuel-listener-factor-binary
|
||||
(expand-file-name "factor" fuel-factor-root-dir)
|
||||
(expand-file-name (cond ((eq system-type 'windows-nt)
|
||||
"factor.exe")
|
||||
((eq system-type 'darwin)
|
||||
"Factor.app/Contents/MacOS/factor")
|
||||
(t "factor"))
|
||||
fuel-factor-root-dir)
|
||||
"Full path to the factor executable to use when starting a listener."
|
||||
:type '(file :must-match t)
|
||||
:group 'fuel-listener)
|
||||
|
@ -132,8 +137,7 @@ buffer."
|
|||
|
||||
(defun fuel-listener--setup-completion ()
|
||||
(setq fuel-syntax--current-vocab-function 'fuel-listener--current-vocab)
|
||||
(setq fuel-syntax--usings-function 'fuel-listener--usings)
|
||||
(set-syntax-table fuel-syntax--syntax-table))
|
||||
(setq fuel-syntax--usings-function 'fuel-listener--usings))
|
||||
|
||||
|
||||
;;; Stack mode support
|
||||
|
@ -160,7 +164,6 @@ buffer."
|
|||
(set (make-local-variable 'comint-prompt-regexp) fuel-con--prompt-regex)
|
||||
(set (make-local-variable 'comint-use-prompt-regexp) t)
|
||||
(set (make-local-variable 'comint-prompt-read-only) t)
|
||||
(set-syntax-table fuel-syntax--syntax-table)
|
||||
(fuel-listener--setup-completion)
|
||||
(fuel-listener--setup-stack-mode))
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
|
||||
(defun fuel-markup--insert-button (label link type)
|
||||
(let ((label (format "%s" label))
|
||||
(link (format "%s" link)))
|
||||
(link (if (listp link) link (format "%s" link))))
|
||||
(insert-text-button label
|
||||
:type 'fuel-markup--button
|
||||
'markup-link link
|
||||
|
@ -70,8 +70,9 @@
|
|||
'help-echo (format "%s (%s)" label type))))
|
||||
|
||||
(defun fuel-markup--article-title (name)
|
||||
(fuel-eval--retort-result
|
||||
(fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel"))))
|
||||
(let ((name (if (listp name) (cons :seq name) name)))
|
||||
(fuel-eval--retort-result
|
||||
(fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel")))))
|
||||
|
||||
(defun fuel-markup--link-at-point ()
|
||||
(let ((button (condition-case nil (forward-button 0) (error nil))))
|
||||
|
|
|
@ -79,7 +79,7 @@
|
|||
(regexp-opt fuel-syntax--declaration-words 'words))
|
||||
|
||||
(defsubst fuel-syntax--second-word-regex (prefixes)
|
||||
(format "^%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
|
||||
(format "%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
|
||||
|
||||
(defconst fuel-syntax--method-definition-regex
|
||||
"^M: +\\([^ ]+\\) +\\([^ ]+\\)")
|
||||
|
@ -87,14 +87,22 @@
|
|||
(defconst fuel-syntax--integer-regex
|
||||
"\\_<-?[0-9]+\\_>")
|
||||
|
||||
(defconst fuel-syntax--ratio-regex
|
||||
"\\_<-?\\([0-9]+\\+\\)?[0-9]+/-?[0-9]+\\_>")
|
||||
(defconst fuel-syntax--raw-float-regex
|
||||
"[0-9]*\\.[0-9]*\\([eE][+-]?[0-9]+\\)?")
|
||||
|
||||
(defconst fuel-syntax--float-regex
|
||||
"\\_<-?[0-9]+\\.[0-9]*\\([eE][+-]?[0-9]+\\)?\\_>")
|
||||
(format "\\_<-?%s\\_>" fuel-syntax--raw-float-regex))
|
||||
|
||||
(defconst fuel-syntax--number-regex
|
||||
(format "\\([0-9]+\\|%s\\)" fuel-syntax--raw-float-regex))
|
||||
|
||||
(defconst fuel-syntax--ratio-regex
|
||||
(format "\\_<[+-]?%s/-?%s\\_>"
|
||||
fuel-syntax--number-regex
|
||||
fuel-syntax--number-regex))
|
||||
|
||||
(defconst fuel-syntax--bad-string-regex
|
||||
"\"\\([^\"]\\|\\\\\"\\)*\n")
|
||||
"\\_<\"[^>]\\([^\"\n]\\|\\\\\"\\)*\n")
|
||||
|
||||
(defconst fuel-syntax--word-definition-regex
|
||||
(fuel-syntax--second-word-regex
|
||||
|
@ -114,8 +122,8 @@
|
|||
(defconst fuel-syntax--type-definition-regex
|
||||
(fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:")))
|
||||
|
||||
(defconst fuel-syntax--parent-type-regex
|
||||
"^\\(TUPLE\\|PREDICTE\\): +[^ ]+ +< +\\([^ ]+\\)")
|
||||
(defconst fuel-syntax--tuple-decl-regex
|
||||
"^TUPLE: +\\([^ \n]+\\) +< +\\([^ \n]+\\)\\_>")
|
||||
|
||||
(defconst fuel-syntax--constructor-regex "<[^ >]+>")
|
||||
|
||||
|
@ -125,7 +133,8 @@
|
|||
(defconst fuel-syntax--symbol-definition-regex
|
||||
(fuel-syntax--second-word-regex '("SYMBOL:" "VAR:")))
|
||||
|
||||
(defconst fuel-syntax--stack-effect-regex " ( .* )")
|
||||
(defconst fuel-syntax--stack-effect-regex
|
||||
"\\( ( .* )\\)\\|\\( (( .* ))\\)")
|
||||
|
||||
(defconst fuel-syntax--using-lines-regex "^USING: +\\([^;]+\\);")
|
||||
|
||||
|
@ -163,26 +172,26 @@
|
|||
fuel-syntax--declaration-words-regex))
|
||||
|
||||
(defconst fuel-syntax--single-liner-regex
|
||||
(format "^%s" (regexp-opt '("ABOUT:"
|
||||
"ARTICLE:"
|
||||
"ALIAS:"
|
||||
"CONSTANT:" "C:"
|
||||
"DEFER:"
|
||||
"FORGET:"
|
||||
"GENERIC:" "GENERIC#"
|
||||
"HELP:" "HEX:" "HOOK:"
|
||||
"IN:" "INSTANCE:"
|
||||
"MAIN:" "MATH:" "MIXIN:"
|
||||
"OCT:"
|
||||
"POSTPONE:" "PRIVATE>" "<PRIVATE"
|
||||
"QUALIFIED-WITH:" "QUALIFIED:"
|
||||
"RENAME:"
|
||||
"SINGLETON:" "SLOT:" "SYMBOL:"
|
||||
"USE:"
|
||||
"VAR:"))))
|
||||
(regexp-opt '("ABOUT:"
|
||||
"ARTICLE:"
|
||||
"ALIAS:"
|
||||
"CONSTANT:" "C:"
|
||||
"DEFER:"
|
||||
"FORGET:"
|
||||
"GENERIC:" "GENERIC#"
|
||||
"HELP:" "HEX:" "HOOK:"
|
||||
"IN:" "INSTANCE:"
|
||||
"MAIN:" "MATH:" "MIXIN:"
|
||||
"OCT:"
|
||||
"POSTPONE:" "PRIVATE>" "<PRIVATE"
|
||||
"QUALIFIED-WITH:" "QUALIFIED:"
|
||||
"RENAME:"
|
||||
"SINGLETON:" "SLOT:" "SYMBOL:"
|
||||
"USE:"
|
||||
"VAR:")))
|
||||
|
||||
(defconst fuel-syntax--begin-of-def-regex
|
||||
(format "^USING: \\|\\(%s\\)\\|\\(%s .*\\)"
|
||||
(format "^USING: \\|\\(%s\\)\\|\\(^%s .*\\)"
|
||||
fuel-syntax--definition-start-regex
|
||||
fuel-syntax--single-liner-regex))
|
||||
|
||||
|
@ -190,7 +199,7 @@
|
|||
(format "^.*%s" fuel-syntax--definition-end-regex))
|
||||
|
||||
(defconst fuel-syntax--end-of-def-regex
|
||||
(format "\\(%s\\)\\|\\(%s .*\\)"
|
||||
(format "\\(%s\\)\\|\\(^%s .*\\)"
|
||||
fuel-syntax--end-of-def-line-regex
|
||||
fuel-syntax--single-liner-regex))
|
||||
|
||||
|
@ -220,13 +229,21 @@
|
|||
table))
|
||||
|
||||
(defconst fuel-syntax--syntactic-keywords
|
||||
`(;; Comments:
|
||||
`(;; CHARs:
|
||||
("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
|
||||
;; Comments:
|
||||
("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">"))
|
||||
("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">"))
|
||||
;; CHARs:
|
||||
("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
|
||||
;; Strings
|
||||
("\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)*\\(\"\\)" (1 "\"") (3 "\""))
|
||||
("\\_<\\(\"\\)\\([^\n\r\f\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\""))
|
||||
("\\_<<\\(\"\\)\\_>" (1 "\""))
|
||||
("\\_<\\(\"\\)>\\_>" (1 "\""))
|
||||
;; Multiline constructs
|
||||
("\\_<USING:\\( \\)\\(;\\)" (1 "<b") (2 ">b"))
|
||||
("\\_<USING:\\( \\)" (1 "<b"))
|
||||
("\\_<TUPLE: +\\w+? +< +\\w+? *\\( \\)" (1 "<b"))
|
||||
("\\_<\\(TUPLE\\|SYMBOLS\\|VARS\\): +\\w+? *\\( \\)\\([^<]\\|\\_>\\)" (2 "<b"))
|
||||
("\\(\n\\| \\);\\_>" (1 ">b"))
|
||||
;; Let and lambda:
|
||||
("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">"))
|
||||
("\\(\\[\\)\\(let\\|wlet\\|let\\*\\)\\( \\|$\\)" (1 "(]"))
|
||||
|
|
Loading…
Reference in New Issue