Merge branch 'master' of git://factorcode.org/git/factor
						commit
						b919e1848c
					
				| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Doug Coleman
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
! Copyright (C) 2009 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: editors io.launcher kernel io.directories.search.windows
 | 
			
		||||
math.parser namespaces sequences io.files arrays windows.shell32
 | 
			
		||||
io.directories.search ;
 | 
			
		||||
IN: editors.notepad
 | 
			
		||||
 | 
			
		||||
: notepad-path ( -- path )
 | 
			
		||||
    \ notepad-path get [
 | 
			
		||||
        windows-directory t
 | 
			
		||||
        [ "notepad.exe" tail? ] find-file
 | 
			
		||||
    ] unless* ;
 | 
			
		||||
 | 
			
		||||
: notepad ( file line -- )
 | 
			
		||||
    drop notepad-path swap 2array run-detached drop ;
 | 
			
		||||
 | 
			
		||||
[ notepad ] edit-hook set-global
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -9,6 +9,11 @@ IN: editors.scite
 | 
			
		|||
    \ scite-path get-global [
 | 
			
		||||
        "Scintilla Text Editor" t
 | 
			
		||||
        [ >lower "scite.exe" tail? ] find-in-program-files
 | 
			
		||||
 | 
			
		||||
        [
 | 
			
		||||
            "SciTE Source Code Editor" t
 | 
			
		||||
            [ >lower "scite.exe" tail? ] find-in-program-files
 | 
			
		||||
        ] unless*
 | 
			
		||||
    ] unless* ;
 | 
			
		||||
 | 
			
		||||
: scite-command ( file line -- cmd )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,21 +32,21 @@ HELP: find-file
 | 
			
		|||
HELP: find-in-directories
 | 
			
		||||
{ $values
 | 
			
		||||
     { "directories" "a sequence of pathnames" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
 | 
			
		||||
     { "path'" "a pathname string" }
 | 
			
		||||
     { "path'/f" "a pathname string or f" }
 | 
			
		||||
}
 | 
			
		||||
{ $description "Finds the first file in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
 | 
			
		||||
 | 
			
		||||
HELP: find-all-files
 | 
			
		||||
{ $values
 | 
			
		||||
     { "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
 | 
			
		||||
     { "paths" "a sequence of pathname strings" }
 | 
			
		||||
     { "paths/f" "a sequence of pathname strings or f" }
 | 
			
		||||
}
 | 
			
		||||
{ $description "Finds all files in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
 | 
			
		||||
 | 
			
		||||
HELP: find-all-in-directories
 | 
			
		||||
{ $values
 | 
			
		||||
     { "directories" "a sequence of directory paths" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
 | 
			
		||||
     { "paths" "a sequence of pathname strings" }
 | 
			
		||||
     { "paths/f" "a sequence of pathname strings or f" }
 | 
			
		||||
}
 | 
			
		||||
{ $description "Finds all files in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,17 +46,21 @@ PRIVATE>
 | 
			
		|||
    [ ] accumulator [ each-file ] dip ;
 | 
			
		||||
 | 
			
		||||
: find-file ( path bfs? quot: ( obj -- ? ) -- path/f )
 | 
			
		||||
    [ <directory-iterator> ] dip
 | 
			
		||||
    [ keep and ] curry iterate-directory ; inline
 | 
			
		||||
    '[
 | 
			
		||||
        _ _ _ [ <directory-iterator> ] dip
 | 
			
		||||
        [ keep and ] curry iterate-directory
 | 
			
		||||
    ] [ drop f ] recover ; inline
 | 
			
		||||
 | 
			
		||||
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths )
 | 
			
		||||
    [ <directory-iterator> ] dip
 | 
			
		||||
    pusher [ [ f ] compose iterate-directory drop ] dip ; inline
 | 
			
		||||
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths/f )
 | 
			
		||||
    '[
 | 
			
		||||
        _ _ _ [ <directory-iterator> ] dip
 | 
			
		||||
        pusher [ [ f ] compose iterate-directory drop ] dip
 | 
			
		||||
    ] [ drop f ] recover ; inline
 | 
			
		||||
 | 
			
		||||
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path' )
 | 
			
		||||
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path'/f )
 | 
			
		||||
    '[ _ _ find-file ] attempt-all ;
 | 
			
		||||
 | 
			
		||||
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths )
 | 
			
		||||
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths/f )
 | 
			
		||||
    '[ _ _ find-all-files ] map concat ;
 | 
			
		||||
 | 
			
		||||
os windows? [ "io.directories.search.windows" require ] when
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,7 +5,7 @@ io.directories.search ;
 | 
			
		|||
IN: io.directories.search.windows
 | 
			
		||||
 | 
			
		||||
: program-files-directories ( -- array )
 | 
			
		||||
    program-files program-files-x86 2array ; inline
 | 
			
		||||
    program-files program-files-x86 2array harvest ; inline
 | 
			
		||||
 | 
			
		||||
: find-in-program-files ( base-directory bfs? quot -- path )
 | 
			
		||||
    [
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,3 +46,10 @@ pack strings tools.test ;
 | 
			
		|||
[ f ] [ "" [ read-c-string ] with-string-reader ] unit-test
 | 
			
		||||
[ 5 ] [ "FRAM\0\u000005\0\0\0\0\0\0\0" [ read-c-string drop read-u64 ] with-string-reader ] unit-test
 | 
			
		||||
 | 
			
		||||
[ 9 ] [ "iic" packed-length ] unit-test
 | 
			
		||||
[ "iii" read-packed-le ] must-infer
 | 
			
		||||
[ "iii" read-packed-be ] must-infer
 | 
			
		||||
[ "iii" read-packed-native ] must-infer
 | 
			
		||||
[ "iii" unpack-le ] must-infer
 | 
			
		||||
[ "iii" unpack-be ] must-infer
 | 
			
		||||
[ "iii" unpack-native ] must-infer
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,9 @@
 | 
			
		|||
! Copyright (C) 2009 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: alien alien.c-types arrays assocs byte-arrays io
 | 
			
		||||
io.binary io.streams.string kernel math math.parser namespaces
 | 
			
		||||
make parser prettyprint quotations sequences strings vectors
 | 
			
		||||
words macros math.functions math.bitwise ;
 | 
			
		||||
words macros math.functions math.bitwise fry ;
 | 
			
		||||
IN: pack
 | 
			
		||||
 | 
			
		||||
SYMBOL: big-endian
 | 
			
		||||
| 
						 | 
				
			
			@ -9,6 +11,13 @@ SYMBOL: big-endian
 | 
			
		|||
: big-endian? ( -- ? )
 | 
			
		||||
    1 <int> *char zero? ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: set-big-endian ( -- )
 | 
			
		||||
    big-endian? big-endian set ; inline
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: >endian ( obj n -- str )
 | 
			
		||||
    big-endian get [ >be ] [ >le ] if ; inline
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -39,6 +48,8 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
: double, ( n -- ) double>bits 8 b, ;
 | 
			
		||||
: c-string, ( str -- ) % 0 u8, ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: (>128-ber) ( n -- )
 | 
			
		||||
    dup 0 > [
 | 
			
		||||
        [ HEX: 7f bitand HEX: 80 bitor , ] keep -7 shift
 | 
			
		||||
| 
						 | 
				
			
			@ -47,6 +58,8 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
        drop
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: >128-ber ( n -- str )
 | 
			
		||||
    [
 | 
			
		||||
        [ HEX: 7f bitand , ] keep -7 shift
 | 
			
		||||
| 
						 | 
				
			
			@ -70,7 +83,7 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
: read-s32 ( -- n ) 4 read-signed ;
 | 
			
		||||
: read-u32 ( -- n ) 4 read-unsigned ;
 | 
			
		||||
: read-s64 ( -- n ) 8 read-signed ;
 | 
			
		||||
: read-u64 ( -- n ) 8 read-signed ;
 | 
			
		||||
: read-u64 ( -- n ) 8 read-unsigned ;
 | 
			
		||||
: read-s128 ( -- n ) 16 read-signed ;
 | 
			
		||||
: read-u128 ( -- n ) 16 read-unsigned ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -81,7 +94,7 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
    8 read endian> bits>double ;
 | 
			
		||||
 | 
			
		||||
: read-c-string ( -- str/f )
 | 
			
		||||
    "\0" read-until [ drop f ] unless ;
 | 
			
		||||
    "\0" read-until swap and ;
 | 
			
		||||
 | 
			
		||||
: read-c-string* ( n -- str/f )
 | 
			
		||||
    read [ zero? ] trim-right [ f ] when-empty ;
 | 
			
		||||
| 
						 | 
				
			
			@ -94,7 +107,9 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
: read-128-ber ( -- n )
 | 
			
		||||
    0 (read-128-ber) ;
 | 
			
		||||
 | 
			
		||||
: pack-table ( -- hash )
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
CONSTANT: pack-table
 | 
			
		||||
    H{
 | 
			
		||||
        { CHAR: c s8, }
 | 
			
		||||
        { CHAR: C u8, }
 | 
			
		||||
| 
						 | 
				
			
			@ -110,9 +125,9 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
        { CHAR: F float, }
 | 
			
		||||
        { CHAR: d double, }
 | 
			
		||||
        { CHAR: D double, }
 | 
			
		||||
    } ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
: unpack-table ( -- hash )
 | 
			
		||||
CONSTANT: unpack-table
 | 
			
		||||
    H{
 | 
			
		||||
        { CHAR: c read-s8 }
 | 
			
		||||
        { CHAR: C read-u8 }
 | 
			
		||||
| 
						 | 
				
			
			@ -128,47 +143,79 @@ M: string b, ( n string -- ) heap-size b, ;
 | 
			
		|||
        { CHAR: F read-float }
 | 
			
		||||
        { CHAR: d read-double }
 | 
			
		||||
        { CHAR: D read-double }
 | 
			
		||||
    } ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
MACRO: (pack) ( seq str -- quot )
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            [
 | 
			
		||||
                swap , pack-table at ,
 | 
			
		||||
            ] 2each
 | 
			
		||||
        ] [ ] make 1quotation %
 | 
			
		||||
       [ B{ } make ] %
 | 
			
		||||
    ] [ ] make ;
 | 
			
		||||
CONSTANT: packed-length-table
 | 
			
		||||
    H{
 | 
			
		||||
        { CHAR: c 1 }
 | 
			
		||||
        { CHAR: C 1 }
 | 
			
		||||
        { CHAR: s 2 }
 | 
			
		||||
        { CHAR: S 2 }
 | 
			
		||||
        { CHAR: t 3 }
 | 
			
		||||
        { CHAR: T 3 }
 | 
			
		||||
        { CHAR: i 4 }
 | 
			
		||||
        { CHAR: I 4 }
 | 
			
		||||
        { CHAR: q 8 }
 | 
			
		||||
        { CHAR: Q 8 }
 | 
			
		||||
        { CHAR: f 4 }
 | 
			
		||||
        { CHAR: F 4 }
 | 
			
		||||
        { CHAR: d 8 }
 | 
			
		||||
        { CHAR: D 8 }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
MACRO: pack ( seq str -- quot )
 | 
			
		||||
    [ pack-table at 1quotation '[ _ @ ] ] [ ] 2map-as concat
 | 
			
		||||
    '[ _ B{ } make ] ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 
 | 
			
		||||
: pack-native ( seq str -- seq )
 | 
			
		||||
    [
 | 
			
		||||
        big-endian? big-endian set (pack)
 | 
			
		||||
    ] with-scope ;
 | 
			
		||||
    [ set-big-endian pack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: pack-be ( seq str -- seq )
 | 
			
		||||
    [ big-endian on (pack) ] with-scope ;
 | 
			
		||||
    [ big-endian on pack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: pack-le ( seq str -- seq )
 | 
			
		||||
    [ big-endian off (pack) ] with-scope ;
 | 
			
		||||
    [ big-endian off pack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
MACRO: (unpack) ( str -- quot )
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            [ unpack-table at , \ , , ] each
 | 
			
		||||
        ] [ ] make
 | 
			
		||||
        1quotation [ { } make ] append
 | 
			
		||||
        1quotation %
 | 
			
		||||
        \ with-string-reader ,
 | 
			
		||||
    ] [ ] make ;
 | 
			
		||||
MACRO: unpack ( str -- quot )
 | 
			
		||||
    [ unpack-table at 1quotation '[ @ , ] ] { } map-as concat
 | 
			
		||||
    '[ [ _ { } make ] with-string-reader ] ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: unpack-native ( seq str -- seq )
 | 
			
		||||
    [
 | 
			
		||||
        big-endian? big-endian set (unpack)
 | 
			
		||||
    ] with-scope ;
 | 
			
		||||
    [ set-big-endian unpack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: unpack-be ( seq str -- seq )
 | 
			
		||||
    [ big-endian on (unpack) ] with-scope ;
 | 
			
		||||
    [ big-endian on unpack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: unpack-le ( seq str -- seq )
 | 
			
		||||
    [ big-endian off (unpack) ] with-scope ;
 | 
			
		||||
    [ big-endian off unpack ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: packed-length ( str -- n )
 | 
			
		||||
    [ packed-length-table at ] sigma ;
 | 
			
		||||
 | 
			
		||||
ERROR: packed-read-fail str bytes ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: read-packed-bytes ( str -- bytes )
 | 
			
		||||
    dup packed-length [ read dup length ] keep =
 | 
			
		||||
    [ nip ] [ packed-read-fail ] if ; inline
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: read-packed ( str quot -- seq )
 | 
			
		||||
    [ read-packed-bytes ] swap bi ; inline
 | 
			
		||||
 | 
			
		||||
: read-packed-le ( str -- seq )
 | 
			
		||||
    [ unpack-le ] read-packed ; inline
 | 
			
		||||
 | 
			
		||||
: read-packed-be ( str -- seq )
 | 
			
		||||
    [ unpack-be ] read-packed ; inline
 | 
			
		||||
 | 
			
		||||
: read-packed-native ( str -- seq )
 | 
			
		||||
    [ unpack-native ] read-packed ; inline
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -39,16 +39,14 @@ ERROR: roman-range-error n ;
 | 
			
		|||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: >roman ( n -- str )
 | 
			
		||||
    dup roman-range-check [
 | 
			
		||||
        (>roman)
 | 
			
		||||
    ] "" make ;
 | 
			
		||||
    dup roman-range-check
 | 
			
		||||
    [ (>roman) ] "" make ;
 | 
			
		||||
 | 
			
		||||
: >ROMAN ( n -- str ) >roman >upper ;
 | 
			
		||||
 | 
			
		||||
: roman> ( str -- n )
 | 
			
		||||
    >lower [ roman<= ] monotonic-split [
 | 
			
		||||
        (roman>)
 | 
			
		||||
    ] map sum ;
 | 
			
		||||
    >lower [ roman<= ] monotonic-split
 | 
			
		||||
    [ (roman>) ] sigma ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: xml.utilities kernel assocs xml.generator math.order
 | 
			
		||||
    strings sequences xml.data xml.writer
 | 
			
		||||
    io.streams.string combinators xml xml.entities io.files io
 | 
			
		||||
    io.streams.string combinators xml xml.entities.html io.files io
 | 
			
		||||
    http.client namespaces make xml.generator hashtables
 | 
			
		||||
    calendar.format accessors continuations urls present ;
 | 
			
		||||
IN: syndication
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,7 +61,7 @@ SYMBOL: table
 | 
			
		|||
: eval-seq ( seq -- seq ) [ dup word? [ execute ] when ] map ;
 | 
			
		||||
 | 
			
		||||
: (set-table) ( class1 class2 val -- )
 | 
			
		||||
    -rot table get nth [ swap or ] change-nth ;
 | 
			
		||||
    [ table get nth ] dip '[ _ or ] change-nth ;
 | 
			
		||||
 | 
			
		||||
: set-table ( classes1 classes2 val -- )
 | 
			
		||||
    [ [ eval-seq ] bi@ ] dip
 | 
			
		||||
| 
						 | 
				
			
			@ -199,8 +199,8 @@ to: word-table
 | 
			
		|||
: walk-down ( str i -- j )
 | 
			
		||||
    dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ;
 | 
			
		||||
 | 
			
		||||
: word-break? ( table-entry i str -- ? )
 | 
			
		||||
    spin {
 | 
			
		||||
: word-break? ( str i table-entry -- ? )
 | 
			
		||||
    {
 | 
			
		||||
        { t [ 2drop f ] }
 | 
			
		||||
        { f [ 2drop t ] }
 | 
			
		||||
        { check-letter-after
 | 
			
		||||
| 
						 | 
				
			
			@ -214,10 +214,10 @@ to: word-table
 | 
			
		|||
    } case ;
 | 
			
		||||
 | 
			
		||||
:: word-break-next ( old-class new-char i str -- next-class ? )
 | 
			
		||||
    new-char dup format/extended?
 | 
			
		||||
    [ drop old-class dup { 1 2 3 } member? ] [
 | 
			
		||||
        word-break-prop old-class over word-table-nth
 | 
			
		||||
        i str word-break?
 | 
			
		||||
    new-char format/extended?
 | 
			
		||||
    [ old-class dup { 1 2 3 } member? ] [
 | 
			
		||||
        new-char word-break-prop old-class over word-table-nth
 | 
			
		||||
        [ str i ] dip word-break?
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,8 +2,8 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: alien alien.c-types alien.strings alien.syntax
 | 
			
		||||
combinators io.encodings.utf16n io.files io.pathnames kernel
 | 
			
		||||
windows windows.com windows.com.syntax windows.ole32
 | 
			
		||||
windows.user32 ;
 | 
			
		||||
windows windows.com windows.com.syntax windows.user32
 | 
			
		||||
windows.ole32 ;
 | 
			
		||||
IN: windows.shell32
 | 
			
		||||
 | 
			
		||||
CONSTANT: CSIDL_DESKTOP HEX: 00
 | 
			
		||||
| 
						 | 
				
			
			@ -88,13 +88,10 @@ ALIAS: ShellExecute ShellExecuteW
 | 
			
		|||
: open-in-explorer ( dir -- )
 | 
			
		||||
    f "open" rot (normalize-path) f f SW_SHOWNORMAL ShellExecute drop ;
 | 
			
		||||
 | 
			
		||||
: shell32-error ( n -- )
 | 
			
		||||
    ole32-error ; inline
 | 
			
		||||
 | 
			
		||||
: shell32-directory ( n -- str )
 | 
			
		||||
    f swap f SHGFP_TYPE_DEFAULT
 | 
			
		||||
    MAX_UNICODE_PATH "ushort" <c-array>
 | 
			
		||||
    [ SHGetFolderPath shell32-error ] keep utf16n alien>string ;
 | 
			
		||||
    [ SHGetFolderPath drop ] keep utf16n alien>string ;
 | 
			
		||||
 | 
			
		||||
: desktop ( -- str )
 | 
			
		||||
    CSIDL_DESKTOPDIRECTORY shell32-directory ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: namespaces make kernel assocs sequences fry ;
 | 
			
		||||
USING: namespaces make kernel assocs sequences fry values
 | 
			
		||||
io.files io.encodings.binary ;
 | 
			
		||||
IN: xml.entities
 | 
			
		||||
 | 
			
		||||
: entities-out
 | 
			
		||||
| 
						 | 
				
			
			@ -36,265 +37,7 @@ IN: xml.entities
 | 
			
		|||
        { "quot"  CHAR: "  }
 | 
			
		||||
    } ;
 | 
			
		||||
 | 
			
		||||
: html-entities
 | 
			
		||||
    #! generated from:
 | 
			
		||||
    #! http://www.w3.org/TR/REC-html40/sgml/entities.html
 | 
			
		||||
    H{
 | 
			
		||||
        { "nbsp"   160 }
 | 
			
		||||
        { "iexcl"  161 }
 | 
			
		||||
        { "cent"   162 }
 | 
			
		||||
        { "pound"  163 }
 | 
			
		||||
        { "curren" 164 }
 | 
			
		||||
        { "yen"    165 }
 | 
			
		||||
        { "brvbar" 166 }
 | 
			
		||||
        { "sect"   167 }
 | 
			
		||||
        { "uml"    168 }
 | 
			
		||||
        { "copy"   169 }
 | 
			
		||||
        { "ordf"   170 }
 | 
			
		||||
        { "laquo"  171 }
 | 
			
		||||
        { "not"    172 }
 | 
			
		||||
        { "shy"    173 }
 | 
			
		||||
        { "reg"    174 }
 | 
			
		||||
        { "macr"   175 }
 | 
			
		||||
        { "deg"    176 }
 | 
			
		||||
        { "plusmn" 177 }
 | 
			
		||||
        { "sup2"   178 }
 | 
			
		||||
        { "sup3"   179 }
 | 
			
		||||
        { "acute"  180 }
 | 
			
		||||
        { "micro"  181 }
 | 
			
		||||
        { "para"   182 }
 | 
			
		||||
        { "middot" 183 }
 | 
			
		||||
        { "cedil"  184 }
 | 
			
		||||
        { "sup1"   185 }
 | 
			
		||||
        { "ordm"   186 }
 | 
			
		||||
        { "raquo"  187 }
 | 
			
		||||
        { "frac14" 188 }
 | 
			
		||||
        { "frac12" 189 }
 | 
			
		||||
        { "frac34" 190 }
 | 
			
		||||
        { "iquest" 191 }
 | 
			
		||||
        { "Agrave" 192 }
 | 
			
		||||
        { "Aacute" 193 }
 | 
			
		||||
        { "Acirc"  194 }
 | 
			
		||||
        { "Atilde" 195 }
 | 
			
		||||
        { "Auml"   196 }
 | 
			
		||||
        { "Aring"  197 }
 | 
			
		||||
        { "AElig"  198 }
 | 
			
		||||
        { "Ccedil" 199 }
 | 
			
		||||
        { "Egrave" 200 }
 | 
			
		||||
        { "Eacute" 201 }
 | 
			
		||||
        { "Ecirc"  202 }
 | 
			
		||||
        { "Euml"   203 }
 | 
			
		||||
        { "Igrave" 204 }
 | 
			
		||||
        { "Iacute" 205 }
 | 
			
		||||
        { "Icirc"  206 }
 | 
			
		||||
        { "Iuml"   207 }
 | 
			
		||||
        { "ETH"    208 }
 | 
			
		||||
        { "Ntilde" 209 }
 | 
			
		||||
        { "Ograve" 210 }
 | 
			
		||||
        { "Oacute" 211 }
 | 
			
		||||
        { "Ocirc"  212 }
 | 
			
		||||
        { "Otilde" 213 }
 | 
			
		||||
        { "Ouml"   214 }
 | 
			
		||||
        { "times"  215 }
 | 
			
		||||
        { "Oslash" 216 }
 | 
			
		||||
        { "Ugrave" 217 }
 | 
			
		||||
        { "Uacute" 218 }
 | 
			
		||||
        { "Ucirc"  219 }
 | 
			
		||||
        { "Uuml"   220 }
 | 
			
		||||
        { "Yacute" 221 }
 | 
			
		||||
        { "THORN"  222 }
 | 
			
		||||
        { "szlig"  223 }
 | 
			
		||||
        { "agrave" 224 }
 | 
			
		||||
        { "aacute" 225 }
 | 
			
		||||
        { "acirc"  226 }
 | 
			
		||||
        { "atilde" 227 }
 | 
			
		||||
        { "auml"   228 }
 | 
			
		||||
        { "aring"  229 }
 | 
			
		||||
        { "aelig"  230 }
 | 
			
		||||
        { "ccedil" 231 }
 | 
			
		||||
        { "egrave" 232 }
 | 
			
		||||
        { "eacute" 233 }
 | 
			
		||||
        { "ecirc"  234 }
 | 
			
		||||
        { "euml"   235 }
 | 
			
		||||
        { "igrave" 236 }
 | 
			
		||||
        { "iacute" 237 }
 | 
			
		||||
        { "icirc"  238 }
 | 
			
		||||
        { "iuml"   239 }
 | 
			
		||||
        { "eth"    240 }
 | 
			
		||||
        { "ntilde" 241 }
 | 
			
		||||
        { "ograve" 242 }
 | 
			
		||||
        { "oacute" 243 }
 | 
			
		||||
        { "ocirc"  244 }
 | 
			
		||||
        { "otilde" 245 }
 | 
			
		||||
        { "ouml"   246 }
 | 
			
		||||
        { "divide" 247 }
 | 
			
		||||
        { "oslash" 248 }
 | 
			
		||||
        { "ugrave" 249 }
 | 
			
		||||
        { "uacute" 250 }
 | 
			
		||||
        { "ucirc"  251 }
 | 
			
		||||
        { "uuml"   252 }
 | 
			
		||||
        { "yacute" 253 }
 | 
			
		||||
        { "thorn"  254 }
 | 
			
		||||
        { "yuml"   255 }
 | 
			
		||||
        { "fnof"     402 }
 | 
			
		||||
        { "Alpha"    913 }
 | 
			
		||||
        { "Beta"     914 }
 | 
			
		||||
        { "Gamma"    915 }
 | 
			
		||||
        { "Delta"    916 }
 | 
			
		||||
        { "Epsilon"  917 }
 | 
			
		||||
        { "Zeta"     918 }
 | 
			
		||||
        { "Eta"      919 }
 | 
			
		||||
        { "Theta"    920 }
 | 
			
		||||
        { "Iota"     921 }
 | 
			
		||||
        { "Kappa"    922 }
 | 
			
		||||
        { "Lambda"   923 }
 | 
			
		||||
        { "Mu"       924 }
 | 
			
		||||
        { "Nu"       925 }
 | 
			
		||||
        { "Xi"       926 }
 | 
			
		||||
        { "Omicron"  927 }
 | 
			
		||||
        { "Pi"       928 }
 | 
			
		||||
        { "Rho"      929 }
 | 
			
		||||
        { "Sigma"    931 }
 | 
			
		||||
        { "Tau"      932 }
 | 
			
		||||
        { "Upsilon"  933 }
 | 
			
		||||
        { "Phi"      934 }
 | 
			
		||||
        { "Chi"      935 }
 | 
			
		||||
        { "Psi"      936 }
 | 
			
		||||
        { "Omega"    937 }
 | 
			
		||||
        { "alpha"    945 }
 | 
			
		||||
        { "beta"     946 }
 | 
			
		||||
        { "gamma"    947 }
 | 
			
		||||
        { "delta"    948 }
 | 
			
		||||
        { "epsilon"  949 }
 | 
			
		||||
        { "zeta"     950 }
 | 
			
		||||
        { "eta"      951 }
 | 
			
		||||
        { "theta"    952 }
 | 
			
		||||
        { "iota"     953 }
 | 
			
		||||
        { "kappa"    954 }
 | 
			
		||||
        { "lambda"   955 }
 | 
			
		||||
        { "mu"       956 }
 | 
			
		||||
        { "nu"       957 }
 | 
			
		||||
        { "xi"       958 }
 | 
			
		||||
        { "omicron"  959 }
 | 
			
		||||
        { "pi"       960 }
 | 
			
		||||
        { "rho"      961 }
 | 
			
		||||
        { "sigmaf"   962 }
 | 
			
		||||
        { "sigma"    963 }
 | 
			
		||||
        { "tau"      964 }
 | 
			
		||||
        { "upsilon"  965 }
 | 
			
		||||
        { "phi"      966 }
 | 
			
		||||
        { "chi"      967 }
 | 
			
		||||
        { "psi"      968 }
 | 
			
		||||
        { "omega"    969 }
 | 
			
		||||
        { "thetasym" 977 }
 | 
			
		||||
        { "upsih"    978 }
 | 
			
		||||
        { "piv"      982 }
 | 
			
		||||
        { "bull"     8226 }
 | 
			
		||||
        { "hellip"   8230 }
 | 
			
		||||
        { "prime"    8242 }
 | 
			
		||||
        { "Prime"    8243 }
 | 
			
		||||
        { "oline"    8254 }
 | 
			
		||||
        { "frasl"    8260 }
 | 
			
		||||
        { "weierp"   8472 }
 | 
			
		||||
        { "image"    8465 }
 | 
			
		||||
        { "real"     8476 }
 | 
			
		||||
        { "trade"    8482 }
 | 
			
		||||
        { "alefsym"  8501 }
 | 
			
		||||
        { "larr"     8592 }
 | 
			
		||||
        { "uarr"     8593 }
 | 
			
		||||
        { "rarr"     8594 }
 | 
			
		||||
        { "darr"     8595 }
 | 
			
		||||
        { "harr"     8596 }
 | 
			
		||||
        { "crarr"    8629 }
 | 
			
		||||
        { "lArr"     8656 }
 | 
			
		||||
        { "uArr"     8657 }
 | 
			
		||||
        { "rArr"     8658 }
 | 
			
		||||
        { "dArr"     8659 }
 | 
			
		||||
        { "hArr"     8660 }
 | 
			
		||||
        { "forall"   8704 }
 | 
			
		||||
        { "part"     8706 }
 | 
			
		||||
        { "exist"    8707 }
 | 
			
		||||
        { "empty"    8709 }
 | 
			
		||||
        { "nabla"    8711 }
 | 
			
		||||
        { "isin"     8712 }
 | 
			
		||||
        { "notin"    8713 }
 | 
			
		||||
        { "ni"       8715 }
 | 
			
		||||
        { "prod"     8719 }
 | 
			
		||||
        { "sum"      8721 }
 | 
			
		||||
        { "minus"    8722 }
 | 
			
		||||
        { "lowast"   8727 }
 | 
			
		||||
        { "radic"    8730 }
 | 
			
		||||
        { "prop"     8733 }
 | 
			
		||||
        { "infin"    8734 }
 | 
			
		||||
        { "ang"      8736 }
 | 
			
		||||
        { "and"      8743 }
 | 
			
		||||
        { "or"       8744 }
 | 
			
		||||
        { "cap"      8745 }
 | 
			
		||||
        { "cup"      8746 }
 | 
			
		||||
        { "int"      8747 }
 | 
			
		||||
        { "there4"   8756 }
 | 
			
		||||
        { "sim"      8764 }
 | 
			
		||||
        { "cong"     8773 }
 | 
			
		||||
        { "asymp"    8776 }
 | 
			
		||||
        { "ne"       8800 }
 | 
			
		||||
        { "equiv"    8801 }
 | 
			
		||||
        { "le"       8804 }
 | 
			
		||||
        { "ge"       8805 }
 | 
			
		||||
        { "sub"      8834 }
 | 
			
		||||
        { "sup"      8835 }
 | 
			
		||||
        { "nsub"     8836 }
 | 
			
		||||
        { "sube"     8838 }
 | 
			
		||||
        { "supe"     8839 }
 | 
			
		||||
        { "oplus"    8853 }
 | 
			
		||||
        { "otimes"   8855 }
 | 
			
		||||
        { "perp"     8869 }
 | 
			
		||||
        { "sdot"     8901 }
 | 
			
		||||
        { "lceil"    8968 }
 | 
			
		||||
        { "rceil"    8969 }
 | 
			
		||||
        { "lfloor"   8970 }
 | 
			
		||||
        { "rfloor"   8971 }
 | 
			
		||||
        { "lang"     9001 }
 | 
			
		||||
        { "rang"     9002 }
 | 
			
		||||
        { "loz"      9674 }
 | 
			
		||||
        { "spades"   9824 }
 | 
			
		||||
        { "clubs"    9827 }
 | 
			
		||||
        { "hearts"   9829 }
 | 
			
		||||
        { "diams"    9830 }
 | 
			
		||||
        { "OElig"   338 }
 | 
			
		||||
        { "oelig"   339 }
 | 
			
		||||
        { "Scaron"  352 }
 | 
			
		||||
        { "scaron"  353 }
 | 
			
		||||
        { "Yuml"    376 }
 | 
			
		||||
        { "circ"    710 }
 | 
			
		||||
        { "tilde"   732 }
 | 
			
		||||
        { "ensp"    8194 }
 | 
			
		||||
        { "emsp"    8195 }
 | 
			
		||||
        { "thinsp"  8201 }
 | 
			
		||||
        { "zwnj"    8204 }
 | 
			
		||||
        { "zwj"     8205 }
 | 
			
		||||
        { "lrm"     8206 }
 | 
			
		||||
        { "rlm"     8207 }
 | 
			
		||||
        { "ndash"   8211 }
 | 
			
		||||
        { "mdash"   8212 }
 | 
			
		||||
        { "lsquo"   8216 }
 | 
			
		||||
        { "rsquo"   8217 }
 | 
			
		||||
        { "sbquo"   8218 }
 | 
			
		||||
        { "ldquo"   8220 }
 | 
			
		||||
        { "rdquo"   8221 }
 | 
			
		||||
        { "bdquo"   8222 }
 | 
			
		||||
        { "dagger"  8224 }
 | 
			
		||||
        { "Dagger"  8225 }
 | 
			
		||||
        { "permil"  8240 }
 | 
			
		||||
        { "lsaquo"  8249 }
 | 
			
		||||
        { "rsaquo"  8250 }
 | 
			
		||||
        { "euro"   8364 }
 | 
			
		||||
    } ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: extra-entities
 | 
			
		||||
f extra-entities set-global
 | 
			
		||||
 | 
			
		||||
: with-entities ( entities quot -- )
 | 
			
		||||
    [ swap extra-entities set call ] with-scope ; inline
 | 
			
		||||
 | 
			
		||||
: with-html-entities ( quot -- )
 | 
			
		||||
    html-entities swap with-entities ; inline
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Daniel Ehrenberg
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,4 @@
 | 
			
		|||
! Copyright (C) 2009 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: tools.test xml.entities.html ;
 | 
			
		||||
IN: xml.entities.html.tests
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
! Copyright (C) 2009 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: assocs io.encodings.binary io.files kernel namespaces sequences
 | 
			
		||||
values xml xml.entities ;
 | 
			
		||||
IN: xml.entities.html
 | 
			
		||||
 | 
			
		||||
VALUE: html-entities
 | 
			
		||||
 | 
			
		||||
: read-entities-file ( file -- table )
 | 
			
		||||
    f swap binary <file-reader>
 | 
			
		||||
    [ 2drop extra-entities get ] sax ;
 | 
			
		||||
 | 
			
		||||
: get-html ( -- table )
 | 
			
		||||
    { "lat1" "special" "symbol" } [
 | 
			
		||||
        "resource:basis/xml/entities/html/xhtml-"
 | 
			
		||||
        swap ".ent" 3append read-entities-file
 | 
			
		||||
    ] map first3 assoc-union assoc-union ;
 | 
			
		||||
 | 
			
		||||
get-html to: html-entities
 | 
			
		||||
 | 
			
		||||
: with-html-entities ( quot -- )
 | 
			
		||||
    html-entities swap with-entities ; inline
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,196 @@
 | 
			
		|||
<!-- Portions (C) International Organization for Standardization 1986
 | 
			
		||||
     Permission to copy in any form is granted for use with
 | 
			
		||||
     conforming SGML systems and applications as defined in
 | 
			
		||||
     ISO 8879, provided this notice is included in all copies.
 | 
			
		||||
-->
 | 
			
		||||
<!-- Character entity set. Typical invocation:
 | 
			
		||||
    <!ENTITY % HTMLlat1 PUBLIC
 | 
			
		||||
       "-//W3C//ENTITIES Latin 1 for XHTML//EN"
 | 
			
		||||
       "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
 | 
			
		||||
    %HTMLlat1;
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!ENTITY nbsp   " "> <!-- no-break space = non-breaking space,
 | 
			
		||||
                                  U+00A0 ISOnum -->
 | 
			
		||||
<!ENTITY iexcl  "¡"> <!-- inverted exclamation mark, U+00A1 ISOnum -->
 | 
			
		||||
<!ENTITY cent   "¢"> <!-- cent sign, U+00A2 ISOnum -->
 | 
			
		||||
<!ENTITY pound  "£"> <!-- pound sign, U+00A3 ISOnum -->
 | 
			
		||||
<!ENTITY curren "¤"> <!-- currency sign, U+00A4 ISOnum -->
 | 
			
		||||
<!ENTITY yen    "¥"> <!-- yen sign = yuan sign, U+00A5 ISOnum -->
 | 
			
		||||
<!ENTITY brvbar "¦"> <!-- broken bar = broken vertical bar,
 | 
			
		||||
                                  U+00A6 ISOnum -->
 | 
			
		||||
<!ENTITY sect   "§"> <!-- section sign, U+00A7 ISOnum -->
 | 
			
		||||
<!ENTITY uml    "¨"> <!-- diaeresis = spacing diaeresis,
 | 
			
		||||
                                  U+00A8 ISOdia -->
 | 
			
		||||
<!ENTITY copy   "©"> <!-- copyright sign, U+00A9 ISOnum -->
 | 
			
		||||
<!ENTITY ordf   "ª"> <!-- feminine ordinal indicator, U+00AA ISOnum -->
 | 
			
		||||
<!ENTITY laquo  "«"> <!-- left-pointing double angle quotation mark
 | 
			
		||||
                                  = left pointing guillemet, U+00AB ISOnum -->
 | 
			
		||||
<!ENTITY not    "¬"> <!-- not sign = angled dash,
 | 
			
		||||
                                  U+00AC ISOnum -->
 | 
			
		||||
<!ENTITY shy    "­"> <!-- soft hyphen = discretionary hyphen,
 | 
			
		||||
                                  U+00AD ISOnum -->
 | 
			
		||||
<!ENTITY reg    "®"> <!-- registered sign = registered trade mark sign,
 | 
			
		||||
                                  U+00AE ISOnum -->
 | 
			
		||||
<!ENTITY macr   "¯"> <!-- macron = spacing macron = overline
 | 
			
		||||
                                  = APL overbar, U+00AF ISOdia -->
 | 
			
		||||
<!ENTITY deg    "°"> <!-- degree sign, U+00B0 ISOnum -->
 | 
			
		||||
<!ENTITY plusmn "±"> <!-- plus-minus sign = plus-or-minus sign,
 | 
			
		||||
                                  U+00B1 ISOnum -->
 | 
			
		||||
<!ENTITY sup2   "²"> <!-- superscript two = superscript digit two
 | 
			
		||||
                                  = squared, U+00B2 ISOnum -->
 | 
			
		||||
<!ENTITY sup3   "³"> <!-- superscript three = superscript digit three
 | 
			
		||||
                                  = cubed, U+00B3 ISOnum -->
 | 
			
		||||
<!ENTITY acute  "´"> <!-- acute accent = spacing acute,
 | 
			
		||||
                                  U+00B4 ISOdia -->
 | 
			
		||||
<!ENTITY micro  "µ"> <!-- micro sign, U+00B5 ISOnum -->
 | 
			
		||||
<!ENTITY para   "¶"> <!-- pilcrow sign = paragraph sign,
 | 
			
		||||
                                  U+00B6 ISOnum -->
 | 
			
		||||
<!ENTITY middot "·"> <!-- middle dot = Georgian comma
 | 
			
		||||
                                  = Greek middle dot, U+00B7 ISOnum -->
 | 
			
		||||
<!ENTITY cedil  "¸"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
 | 
			
		||||
<!ENTITY sup1   "¹"> <!-- superscript one = superscript digit one,
 | 
			
		||||
                                  U+00B9 ISOnum -->
 | 
			
		||||
<!ENTITY ordm   "º"> <!-- masculine ordinal indicator,
 | 
			
		||||
                                  U+00BA ISOnum -->
 | 
			
		||||
<!ENTITY raquo  "»"> <!-- right-pointing double angle quotation mark
 | 
			
		||||
                                  = right pointing guillemet, U+00BB ISOnum -->
 | 
			
		||||
<!ENTITY frac14 "¼"> <!-- vulgar fraction one quarter
 | 
			
		||||
                                  = fraction one quarter, U+00BC ISOnum -->
 | 
			
		||||
<!ENTITY frac12 "½"> <!-- vulgar fraction one half
 | 
			
		||||
                                  = fraction one half, U+00BD ISOnum -->
 | 
			
		||||
<!ENTITY frac34 "¾"> <!-- vulgar fraction three quarters
 | 
			
		||||
                                  = fraction three quarters, U+00BE ISOnum -->
 | 
			
		||||
<!ENTITY iquest "¿"> <!-- inverted question mark
 | 
			
		||||
                                  = turned question mark, U+00BF ISOnum -->
 | 
			
		||||
<!ENTITY Agrave "À"> <!-- latin capital letter A with grave
 | 
			
		||||
                                  = latin capital letter A grave,
 | 
			
		||||
                                  U+00C0 ISOlat1 -->
 | 
			
		||||
<!ENTITY Aacute "Á"> <!-- latin capital letter A with acute,
 | 
			
		||||
                                  U+00C1 ISOlat1 -->
 | 
			
		||||
<!ENTITY Acirc  "Â"> <!-- latin capital letter A with circumflex,
 | 
			
		||||
                                  U+00C2 ISOlat1 -->
 | 
			
		||||
<!ENTITY Atilde "Ã"> <!-- latin capital letter A with tilde,
 | 
			
		||||
                                  U+00C3 ISOlat1 -->
 | 
			
		||||
<!ENTITY Auml   "Ä"> <!-- latin capital letter A with diaeresis,
 | 
			
		||||
                                  U+00C4 ISOlat1 -->
 | 
			
		||||
<!ENTITY Aring  "Å"> <!-- latin capital letter A with ring above
 | 
			
		||||
                                  = latin capital letter A ring,
 | 
			
		||||
                                  U+00C5 ISOlat1 -->
 | 
			
		||||
<!ENTITY AElig  "Æ"> <!-- latin capital letter AE
 | 
			
		||||
                                  = latin capital ligature AE,
 | 
			
		||||
                                  U+00C6 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ccedil "Ç"> <!-- latin capital letter C with cedilla,
 | 
			
		||||
                                  U+00C7 ISOlat1 -->
 | 
			
		||||
<!ENTITY Egrave "È"> <!-- latin capital letter E with grave,
 | 
			
		||||
                                  U+00C8 ISOlat1 -->
 | 
			
		||||
<!ENTITY Eacute "É"> <!-- latin capital letter E with acute,
 | 
			
		||||
                                  U+00C9 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ecirc  "Ê"> <!-- latin capital letter E with circumflex,
 | 
			
		||||
                                  U+00CA ISOlat1 -->
 | 
			
		||||
<!ENTITY Euml   "Ë"> <!-- latin capital letter E with diaeresis,
 | 
			
		||||
                                  U+00CB ISOlat1 -->
 | 
			
		||||
<!ENTITY Igrave "Ì"> <!-- latin capital letter I with grave,
 | 
			
		||||
                                  U+00CC ISOlat1 -->
 | 
			
		||||
<!ENTITY Iacute "Í"> <!-- latin capital letter I with acute,
 | 
			
		||||
                                  U+00CD ISOlat1 -->
 | 
			
		||||
<!ENTITY Icirc  "Î"> <!-- latin capital letter I with circumflex,
 | 
			
		||||
                                  U+00CE ISOlat1 -->
 | 
			
		||||
<!ENTITY Iuml   "Ï"> <!-- latin capital letter I with diaeresis,
 | 
			
		||||
                                  U+00CF ISOlat1 -->
 | 
			
		||||
<!ENTITY ETH    "Ð"> <!-- latin capital letter ETH, U+00D0 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ntilde "Ñ"> <!-- latin capital letter N with tilde,
 | 
			
		||||
                                  U+00D1 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ograve "Ò"> <!-- latin capital letter O with grave,
 | 
			
		||||
                                  U+00D2 ISOlat1 -->
 | 
			
		||||
<!ENTITY Oacute "Ó"> <!-- latin capital letter O with acute,
 | 
			
		||||
                                  U+00D3 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ocirc  "Ô"> <!-- latin capital letter O with circumflex,
 | 
			
		||||
                                  U+00D4 ISOlat1 -->
 | 
			
		||||
<!ENTITY Otilde "Õ"> <!-- latin capital letter O with tilde,
 | 
			
		||||
                                  U+00D5 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ouml   "Ö"> <!-- latin capital letter O with diaeresis,
 | 
			
		||||
                                  U+00D6 ISOlat1 -->
 | 
			
		||||
<!ENTITY times  "×"> <!-- multiplication sign, U+00D7 ISOnum -->
 | 
			
		||||
<!ENTITY Oslash "Ø"> <!-- latin capital letter O with stroke
 | 
			
		||||
                                  = latin capital letter O slash,
 | 
			
		||||
                                  U+00D8 ISOlat1 -->
 | 
			
		||||
<!ENTITY Ugrave "Ù"> <!-- latin capital letter U with grave,
 | 
			
		||||
                                  U+00D9 ISOlat1 -->
 | 
			
		||||
<!ENTITY Uacute "Ú"> <!-- latin capital letter U with acute,
 | 
			
		||||
                                  U+00DA ISOlat1 -->
 | 
			
		||||
<!ENTITY Ucirc  "Û"> <!-- latin capital letter U with circumflex,
 | 
			
		||||
                                  U+00DB ISOlat1 -->
 | 
			
		||||
<!ENTITY Uuml   "Ü"> <!-- latin capital letter U with diaeresis,
 | 
			
		||||
                                  U+00DC ISOlat1 -->
 | 
			
		||||
<!ENTITY Yacute "Ý"> <!-- latin capital letter Y with acute,
 | 
			
		||||
                                  U+00DD ISOlat1 -->
 | 
			
		||||
<!ENTITY THORN  "Þ"> <!-- latin capital letter THORN,
 | 
			
		||||
                                  U+00DE ISOlat1 -->
 | 
			
		||||
<!ENTITY szlig  "ß"> <!-- latin small letter sharp s = ess-zed,
 | 
			
		||||
                                  U+00DF ISOlat1 -->
 | 
			
		||||
<!ENTITY agrave "à"> <!-- latin small letter a with grave
 | 
			
		||||
                                  = latin small letter a grave,
 | 
			
		||||
                                  U+00E0 ISOlat1 -->
 | 
			
		||||
<!ENTITY aacute "á"> <!-- latin small letter a with acute,
 | 
			
		||||
                                  U+00E1 ISOlat1 -->
 | 
			
		||||
<!ENTITY acirc  "â"> <!-- latin small letter a with circumflex,
 | 
			
		||||
                                  U+00E2 ISOlat1 -->
 | 
			
		||||
<!ENTITY atilde "ã"> <!-- latin small letter a with tilde,
 | 
			
		||||
                                  U+00E3 ISOlat1 -->
 | 
			
		||||
<!ENTITY auml   "ä"> <!-- latin small letter a with diaeresis,
 | 
			
		||||
                                  U+00E4 ISOlat1 -->
 | 
			
		||||
<!ENTITY aring  "å"> <!-- latin small letter a with ring above
 | 
			
		||||
                                  = latin small letter a ring,
 | 
			
		||||
                                  U+00E5 ISOlat1 -->
 | 
			
		||||
<!ENTITY aelig  "æ"> <!-- latin small letter ae
 | 
			
		||||
                                  = latin small ligature ae, U+00E6 ISOlat1 -->
 | 
			
		||||
<!ENTITY ccedil "ç"> <!-- latin small letter c with cedilla,
 | 
			
		||||
                                  U+00E7 ISOlat1 -->
 | 
			
		||||
<!ENTITY egrave "è"> <!-- latin small letter e with grave,
 | 
			
		||||
                                  U+00E8 ISOlat1 -->
 | 
			
		||||
<!ENTITY eacute "é"> <!-- latin small letter e with acute,
 | 
			
		||||
                                  U+00E9 ISOlat1 -->
 | 
			
		||||
<!ENTITY ecirc  "ê"> <!-- latin small letter e with circumflex,
 | 
			
		||||
                                  U+00EA ISOlat1 -->
 | 
			
		||||
<!ENTITY euml   "ë"> <!-- latin small letter e with diaeresis,
 | 
			
		||||
                                  U+00EB ISOlat1 -->
 | 
			
		||||
<!ENTITY igrave "ì"> <!-- latin small letter i with grave,
 | 
			
		||||
                                  U+00EC ISOlat1 -->
 | 
			
		||||
<!ENTITY iacute "í"> <!-- latin small letter i with acute,
 | 
			
		||||
                                  U+00ED ISOlat1 -->
 | 
			
		||||
<!ENTITY icirc  "î"> <!-- latin small letter i with circumflex,
 | 
			
		||||
                                  U+00EE ISOlat1 -->
 | 
			
		||||
<!ENTITY iuml   "ï"> <!-- latin small letter i with diaeresis,
 | 
			
		||||
                                  U+00EF ISOlat1 -->
 | 
			
		||||
<!ENTITY eth    "ð"> <!-- latin small letter eth, U+00F0 ISOlat1 -->
 | 
			
		||||
<!ENTITY ntilde "ñ"> <!-- latin small letter n with tilde,
 | 
			
		||||
                                  U+00F1 ISOlat1 -->
 | 
			
		||||
<!ENTITY ograve "ò"> <!-- latin small letter o with grave,
 | 
			
		||||
                                  U+00F2 ISOlat1 -->
 | 
			
		||||
<!ENTITY oacute "ó"> <!-- latin small letter o with acute,
 | 
			
		||||
                                  U+00F3 ISOlat1 -->
 | 
			
		||||
<!ENTITY ocirc  "ô"> <!-- latin small letter o with circumflex,
 | 
			
		||||
                                  U+00F4 ISOlat1 -->
 | 
			
		||||
<!ENTITY otilde "õ"> <!-- latin small letter o with tilde,
 | 
			
		||||
                                  U+00F5 ISOlat1 -->
 | 
			
		||||
<!ENTITY ouml   "ö"> <!-- latin small letter o with diaeresis,
 | 
			
		||||
                                  U+00F6 ISOlat1 -->
 | 
			
		||||
<!ENTITY divide "÷"> <!-- division sign, U+00F7 ISOnum -->
 | 
			
		||||
<!ENTITY oslash "ø"> <!-- latin small letter o with stroke,
 | 
			
		||||
                                  = latin small letter o slash,
 | 
			
		||||
                                  U+00F8 ISOlat1 -->
 | 
			
		||||
<!ENTITY ugrave "ù"> <!-- latin small letter u with grave,
 | 
			
		||||
                                  U+00F9 ISOlat1 -->
 | 
			
		||||
<!ENTITY uacute "ú"> <!-- latin small letter u with acute,
 | 
			
		||||
                                  U+00FA ISOlat1 -->
 | 
			
		||||
<!ENTITY ucirc  "û"> <!-- latin small letter u with circumflex,
 | 
			
		||||
                                  U+00FB ISOlat1 -->
 | 
			
		||||
<!ENTITY uuml   "ü"> <!-- latin small letter u with diaeresis,
 | 
			
		||||
                                  U+00FC ISOlat1 -->
 | 
			
		||||
<!ENTITY yacute "ý"> <!-- latin small letter y with acute,
 | 
			
		||||
                                  U+00FD ISOlat1 -->
 | 
			
		||||
<!ENTITY thorn  "þ"> <!-- latin small letter thorn,
 | 
			
		||||
                                  U+00FE ISOlat1 -->
 | 
			
		||||
<!ENTITY yuml   "ÿ"> <!-- latin small letter y with diaeresis,
 | 
			
		||||
                                  U+00FF ISOlat1 -->
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,80 @@
 | 
			
		|||
<!-- Special characters for XHTML -->
 | 
			
		||||
 | 
			
		||||
<!-- Character entity set. Typical invocation:
 | 
			
		||||
     <!ENTITY % HTMLspecial PUBLIC
 | 
			
		||||
        "-//W3C//ENTITIES Special for XHTML//EN"
 | 
			
		||||
        "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent">
 | 
			
		||||
     %HTMLspecial;
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- Portions (C) International Organization for Standardization 1986:
 | 
			
		||||
     Permission to copy in any form is granted for use with
 | 
			
		||||
     conforming SGML systems and applications as defined in
 | 
			
		||||
     ISO 8879, provided this notice is included in all copies.
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
 | 
			
		||||
     New names (i.e., not in ISO 8879 list) do not clash with any
 | 
			
		||||
     existing ISO 8879 entity names. ISO 10646 character numbers
 | 
			
		||||
     are given for each character, in hex. values are decimal
 | 
			
		||||
     conversions of the ISO 10646 values and refer to the document
 | 
			
		||||
     character set. Names are Unicode names. 
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- C0 Controls and Basic Latin -->
 | 
			
		||||
<!ENTITY quot    """> <!--  quotation mark, U+0022 ISOnum -->
 | 
			
		||||
<!ENTITY amp     "&#38;"> <!--  ampersand, U+0026 ISOnum -->
 | 
			
		||||
<!ENTITY lt      "&#60;"> <!--  less-than sign, U+003C ISOnum -->
 | 
			
		||||
<!ENTITY gt      ">"> <!--  greater-than sign, U+003E ISOnum -->
 | 
			
		||||
<!ENTITY apos	 "'"> <!--  apostrophe = APL quote, U+0027 ISOnum -->
 | 
			
		||||
 | 
			
		||||
<!-- Latin Extended-A -->
 | 
			
		||||
<!ENTITY OElig   "Œ"> <!--  latin capital ligature OE,
 | 
			
		||||
                                    U+0152 ISOlat2 -->
 | 
			
		||||
<!ENTITY oelig   "œ"> <!--  latin small ligature oe, U+0153 ISOlat2 -->
 | 
			
		||||
<!-- ligature is a misnomer, this is a separate character in some languages -->
 | 
			
		||||
<!ENTITY Scaron  "Š"> <!--  latin capital letter S with caron,
 | 
			
		||||
                                    U+0160 ISOlat2 -->
 | 
			
		||||
<!ENTITY scaron  "š"> <!--  latin small letter s with caron,
 | 
			
		||||
                                    U+0161 ISOlat2 -->
 | 
			
		||||
<!ENTITY Yuml    "Ÿ"> <!--  latin capital letter Y with diaeresis,
 | 
			
		||||
                                    U+0178 ISOlat2 -->
 | 
			
		||||
 | 
			
		||||
<!-- Spacing Modifier Letters -->
 | 
			
		||||
<!ENTITY circ    "ˆ"> <!--  modifier letter circumflex accent,
 | 
			
		||||
                                    U+02C6 ISOpub -->
 | 
			
		||||
<!ENTITY tilde   "˜"> <!--  small tilde, U+02DC ISOdia -->
 | 
			
		||||
 | 
			
		||||
<!-- General Punctuation -->
 | 
			
		||||
<!ENTITY ensp    " "> <!-- en space, U+2002 ISOpub -->
 | 
			
		||||
<!ENTITY emsp    " "> <!-- em space, U+2003 ISOpub -->
 | 
			
		||||
<!ENTITY thinsp  " "> <!-- thin space, U+2009 ISOpub -->
 | 
			
		||||
<!ENTITY zwnj    "‌"> <!-- zero width non-joiner,
 | 
			
		||||
                                    U+200C NEW RFC 2070 -->
 | 
			
		||||
<!ENTITY zwj     "‍"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
 | 
			
		||||
<!ENTITY lrm     "‎"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
 | 
			
		||||
<!ENTITY rlm     "‏"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
 | 
			
		||||
<!ENTITY ndash   "–"> <!-- en dash, U+2013 ISOpub -->
 | 
			
		||||
<!ENTITY mdash   "—"> <!-- em dash, U+2014 ISOpub -->
 | 
			
		||||
<!ENTITY lsquo   "‘"> <!-- left single quotation mark,
 | 
			
		||||
                                    U+2018 ISOnum -->
 | 
			
		||||
<!ENTITY rsquo   "’"> <!-- right single quotation mark,
 | 
			
		||||
                                    U+2019 ISOnum -->
 | 
			
		||||
<!ENTITY sbquo   "‚"> <!-- single low-9 quotation mark, U+201A NEW -->
 | 
			
		||||
<!ENTITY ldquo   "“"> <!-- left double quotation mark,
 | 
			
		||||
                                    U+201C ISOnum -->
 | 
			
		||||
<!ENTITY rdquo   "”"> <!-- right double quotation mark,
 | 
			
		||||
                                    U+201D ISOnum -->
 | 
			
		||||
<!ENTITY bdquo   "„"> <!-- double low-9 quotation mark, U+201E NEW -->
 | 
			
		||||
<!ENTITY dagger  "†"> <!-- dagger, U+2020 ISOpub -->
 | 
			
		||||
<!ENTITY Dagger  "‡"> <!-- double dagger, U+2021 ISOpub -->
 | 
			
		||||
<!ENTITY permil  "‰"> <!-- per mille sign, U+2030 ISOtech -->
 | 
			
		||||
<!ENTITY lsaquo  "‹"> <!-- single left-pointing angle quotation mark,
 | 
			
		||||
                                    U+2039 ISO proposed -->
 | 
			
		||||
<!-- lsaquo is proposed but not yet ISO standardized -->
 | 
			
		||||
<!ENTITY rsaquo  "›"> <!-- single right-pointing angle quotation mark,
 | 
			
		||||
                                    U+203A ISO proposed -->
 | 
			
		||||
<!-- rsaquo is proposed but not yet ISO standardized -->
 | 
			
		||||
 | 
			
		||||
<!-- Currency Symbols -->
 | 
			
		||||
<!ENTITY euro   "€"> <!--  euro sign, U+20AC NEW -->
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,237 @@
 | 
			
		|||
<!-- Mathematical, Greek and Symbolic characters for XHTML -->
 | 
			
		||||
 | 
			
		||||
<!-- Character entity set. Typical invocation:
 | 
			
		||||
     <!ENTITY % HTMLsymbol PUBLIC
 | 
			
		||||
        "-//W3C//ENTITIES Symbols for XHTML//EN"
 | 
			
		||||
        "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent">
 | 
			
		||||
     %HTMLsymbol;
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- Portions (C) International Organization for Standardization 1986:
 | 
			
		||||
     Permission to copy in any form is granted for use with
 | 
			
		||||
     conforming SGML systems and applications as defined in
 | 
			
		||||
     ISO 8879, provided this notice is included in all copies.
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- Relevant ISO entity set is given unless names are newly introduced.
 | 
			
		||||
     New names (i.e., not in ISO 8879 list) do not clash with any
 | 
			
		||||
     existing ISO 8879 entity names. ISO 10646 character numbers
 | 
			
		||||
     are given for each character, in hex. values are decimal
 | 
			
		||||
     conversions of the ISO 10646 values and refer to the document
 | 
			
		||||
     character set. Names are Unicode names. 
 | 
			
		||||
-->
 | 
			
		||||
 | 
			
		||||
<!-- Latin Extended-B -->
 | 
			
		||||
<!ENTITY fnof     "ƒ"> <!-- latin small letter f with hook = function
 | 
			
		||||
                                    = florin, U+0192 ISOtech -->
 | 
			
		||||
 | 
			
		||||
<!-- Greek -->
 | 
			
		||||
<!ENTITY Alpha    "Α"> <!-- greek capital letter alpha, U+0391 -->
 | 
			
		||||
<!ENTITY Beta     "Β"> <!-- greek capital letter beta, U+0392 -->
 | 
			
		||||
<!ENTITY Gamma    "Γ"> <!-- greek capital letter gamma,
 | 
			
		||||
                                    U+0393 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Delta    "Δ"> <!-- greek capital letter delta,
 | 
			
		||||
                                    U+0394 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Epsilon  "Ε"> <!-- greek capital letter epsilon, U+0395 -->
 | 
			
		||||
<!ENTITY Zeta     "Ζ"> <!-- greek capital letter zeta, U+0396 -->
 | 
			
		||||
<!ENTITY Eta      "Η"> <!-- greek capital letter eta, U+0397 -->
 | 
			
		||||
<!ENTITY Theta    "Θ"> <!-- greek capital letter theta,
 | 
			
		||||
                                    U+0398 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Iota     "Ι"> <!-- greek capital letter iota, U+0399 -->
 | 
			
		||||
<!ENTITY Kappa    "Κ"> <!-- greek capital letter kappa, U+039A -->
 | 
			
		||||
<!ENTITY Lambda   "Λ"> <!-- greek capital letter lamda,
 | 
			
		||||
                                    U+039B ISOgrk3 -->
 | 
			
		||||
<!ENTITY Mu       "Μ"> <!-- greek capital letter mu, U+039C -->
 | 
			
		||||
<!ENTITY Nu       "Ν"> <!-- greek capital letter nu, U+039D -->
 | 
			
		||||
<!ENTITY Xi       "Ξ"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
 | 
			
		||||
<!ENTITY Omicron  "Ο"> <!-- greek capital letter omicron, U+039F -->
 | 
			
		||||
<!ENTITY Pi       "Π"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Rho      "Ρ"> <!-- greek capital letter rho, U+03A1 -->
 | 
			
		||||
<!-- there is no Sigmaf, and no U+03A2 character either -->
 | 
			
		||||
<!ENTITY Sigma    "Σ"> <!-- greek capital letter sigma,
 | 
			
		||||
                                    U+03A3 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Tau      "Τ"> <!-- greek capital letter tau, U+03A4 -->
 | 
			
		||||
<!ENTITY Upsilon  "Υ"> <!-- greek capital letter upsilon,
 | 
			
		||||
                                    U+03A5 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Phi      "Φ"> <!-- greek capital letter phi,
 | 
			
		||||
                                    U+03A6 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Chi      "Χ"> <!-- greek capital letter chi, U+03A7 -->
 | 
			
		||||
<!ENTITY Psi      "Ψ"> <!-- greek capital letter psi,
 | 
			
		||||
                                    U+03A8 ISOgrk3 -->
 | 
			
		||||
<!ENTITY Omega    "Ω"> <!-- greek capital letter omega,
 | 
			
		||||
                                    U+03A9 ISOgrk3 -->
 | 
			
		||||
 | 
			
		||||
<!ENTITY alpha    "α"> <!-- greek small letter alpha,
 | 
			
		||||
                                    U+03B1 ISOgrk3 -->
 | 
			
		||||
<!ENTITY beta     "β"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
 | 
			
		||||
<!ENTITY gamma    "γ"> <!-- greek small letter gamma,
 | 
			
		||||
                                    U+03B3 ISOgrk3 -->
 | 
			
		||||
<!ENTITY delta    "δ"> <!-- greek small letter delta,
 | 
			
		||||
                                    U+03B4 ISOgrk3 -->
 | 
			
		||||
<!ENTITY epsilon  "ε"> <!-- greek small letter epsilon,
 | 
			
		||||
                                    U+03B5 ISOgrk3 -->
 | 
			
		||||
<!ENTITY zeta     "ζ"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
 | 
			
		||||
<!ENTITY eta      "η"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
 | 
			
		||||
<!ENTITY theta    "θ"> <!-- greek small letter theta,
 | 
			
		||||
                                    U+03B8 ISOgrk3 -->
 | 
			
		||||
<!ENTITY iota     "ι"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
 | 
			
		||||
<!ENTITY kappa    "κ"> <!-- greek small letter kappa,
 | 
			
		||||
                                    U+03BA ISOgrk3 -->
 | 
			
		||||
<!ENTITY lambda   "λ"> <!-- greek small letter lamda,
 | 
			
		||||
                                    U+03BB ISOgrk3 -->
 | 
			
		||||
<!ENTITY mu       "μ"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
 | 
			
		||||
<!ENTITY nu       "ν"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
 | 
			
		||||
<!ENTITY xi       "ξ"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
 | 
			
		||||
<!ENTITY omicron  "ο"> <!-- greek small letter omicron, U+03BF NEW -->
 | 
			
		||||
<!ENTITY pi       "π"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
 | 
			
		||||
<!ENTITY rho      "ρ"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
 | 
			
		||||
<!ENTITY sigmaf   "ς"> <!-- greek small letter final sigma,
 | 
			
		||||
                                    U+03C2 ISOgrk3 -->
 | 
			
		||||
<!ENTITY sigma    "σ"> <!-- greek small letter sigma,
 | 
			
		||||
                                    U+03C3 ISOgrk3 -->
 | 
			
		||||
<!ENTITY tau      "τ"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
 | 
			
		||||
<!ENTITY upsilon  "υ"> <!-- greek small letter upsilon,
 | 
			
		||||
                                    U+03C5 ISOgrk3 -->
 | 
			
		||||
<!ENTITY phi      "φ"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
 | 
			
		||||
<!ENTITY chi      "χ"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
 | 
			
		||||
<!ENTITY psi      "ψ"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
 | 
			
		||||
<!ENTITY omega    "ω"> <!-- greek small letter omega,
 | 
			
		||||
                                    U+03C9 ISOgrk3 -->
 | 
			
		||||
<!ENTITY thetasym "ϑ"> <!-- greek theta symbol,
 | 
			
		||||
                                    U+03D1 NEW -->
 | 
			
		||||
<!ENTITY upsih    "ϒ"> <!-- greek upsilon with hook symbol,
 | 
			
		||||
                                    U+03D2 NEW -->
 | 
			
		||||
<!ENTITY piv      "ϖ"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
 | 
			
		||||
 | 
			
		||||
<!-- General Punctuation -->
 | 
			
		||||
<!ENTITY bull     "•"> <!-- bullet = black small circle,
 | 
			
		||||
                                     U+2022 ISOpub  -->
 | 
			
		||||
<!-- bullet is NOT the same as bullet operator, U+2219 -->
 | 
			
		||||
<!ENTITY hellip   "…"> <!-- horizontal ellipsis = three dot leader,
 | 
			
		||||
                                     U+2026 ISOpub  -->
 | 
			
		||||
<!ENTITY prime    "′"> <!-- prime = minutes = feet, U+2032 ISOtech -->
 | 
			
		||||
<!ENTITY Prime    "″"> <!-- double prime = seconds = inches,
 | 
			
		||||
                                     U+2033 ISOtech -->
 | 
			
		||||
<!ENTITY oline    "‾"> <!-- overline = spacing overscore,
 | 
			
		||||
                                     U+203E NEW -->
 | 
			
		||||
<!ENTITY frasl    "⁄"> <!-- fraction slash, U+2044 NEW -->
 | 
			
		||||
 | 
			
		||||
<!-- Letterlike Symbols -->
 | 
			
		||||
<!ENTITY weierp   "℘"> <!-- script capital P = power set
 | 
			
		||||
                                     = Weierstrass p, U+2118 ISOamso -->
 | 
			
		||||
<!ENTITY image    "ℑ"> <!-- black-letter capital I = imaginary part,
 | 
			
		||||
                                     U+2111 ISOamso -->
 | 
			
		||||
<!ENTITY real     "ℜ"> <!-- black-letter capital R = real part symbol,
 | 
			
		||||
                                     U+211C ISOamso -->
 | 
			
		||||
<!ENTITY trade    "™"> <!-- trade mark sign, U+2122 ISOnum -->
 | 
			
		||||
<!ENTITY alefsym  "ℵ"> <!-- alef symbol = first transfinite cardinal,
 | 
			
		||||
                                     U+2135 NEW -->
 | 
			
		||||
<!-- alef symbol is NOT the same as hebrew letter alef,
 | 
			
		||||
     U+05D0 although the same glyph could be used to depict both characters -->
 | 
			
		||||
 | 
			
		||||
<!-- Arrows -->
 | 
			
		||||
<!ENTITY larr     "←"> <!-- leftwards arrow, U+2190 ISOnum -->
 | 
			
		||||
<!ENTITY uarr     "↑"> <!-- upwards arrow, U+2191 ISOnum-->
 | 
			
		||||
<!ENTITY rarr     "→"> <!-- rightwards arrow, U+2192 ISOnum -->
 | 
			
		||||
<!ENTITY darr     "↓"> <!-- downwards arrow, U+2193 ISOnum -->
 | 
			
		||||
<!ENTITY harr     "↔"> <!-- left right arrow, U+2194 ISOamsa -->
 | 
			
		||||
<!ENTITY crarr    "↵"> <!-- downwards arrow with corner leftwards
 | 
			
		||||
                                     = carriage return, U+21B5 NEW -->
 | 
			
		||||
<!ENTITY lArr     "⇐"> <!-- leftwards double arrow, U+21D0 ISOtech -->
 | 
			
		||||
<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
 | 
			
		||||
    but also does not have any other character for that function. So lArr can
 | 
			
		||||
    be used for 'is implied by' as ISOtech suggests -->
 | 
			
		||||
<!ENTITY uArr     "⇑"> <!-- upwards double arrow, U+21D1 ISOamsa -->
 | 
			
		||||
<!ENTITY rArr     "⇒"> <!-- rightwards double arrow,
 | 
			
		||||
                                     U+21D2 ISOtech -->
 | 
			
		||||
<!-- Unicode does not say this is the 'implies' character but does not have 
 | 
			
		||||
     another character with this function so rArr can be used for 'implies'
 | 
			
		||||
     as ISOtech suggests -->
 | 
			
		||||
<!ENTITY dArr     "⇓"> <!-- downwards double arrow, U+21D3 ISOamsa -->
 | 
			
		||||
<!ENTITY hArr     "⇔"> <!-- left right double arrow,
 | 
			
		||||
                                     U+21D4 ISOamsa -->
 | 
			
		||||
 | 
			
		||||
<!-- Mathematical Operators -->
 | 
			
		||||
<!ENTITY forall   "∀"> <!-- for all, U+2200 ISOtech -->
 | 
			
		||||
<!ENTITY part     "∂"> <!-- partial differential, U+2202 ISOtech  -->
 | 
			
		||||
<!ENTITY exist    "∃"> <!-- there exists, U+2203 ISOtech -->
 | 
			
		||||
<!ENTITY empty    "∅"> <!-- empty set = null set, U+2205 ISOamso -->
 | 
			
		||||
<!ENTITY nabla    "∇"> <!-- nabla = backward difference,
 | 
			
		||||
                                     U+2207 ISOtech -->
 | 
			
		||||
<!ENTITY isin     "∈"> <!-- element of, U+2208 ISOtech -->
 | 
			
		||||
<!ENTITY notin    "∉"> <!-- not an element of, U+2209 ISOtech -->
 | 
			
		||||
<!ENTITY ni       "∋"> <!-- contains as member, U+220B ISOtech -->
 | 
			
		||||
<!ENTITY prod     "∏"> <!-- n-ary product = product sign,
 | 
			
		||||
                                     U+220F ISOamsb -->
 | 
			
		||||
<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
 | 
			
		||||
     the same glyph might be used for both -->
 | 
			
		||||
<!ENTITY sum      "∑"> <!-- n-ary summation, U+2211 ISOamsb -->
 | 
			
		||||
<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
 | 
			
		||||
     though the same glyph might be used for both -->
 | 
			
		||||
<!ENTITY minus    "−"> <!-- minus sign, U+2212 ISOtech -->
 | 
			
		||||
<!ENTITY lowast   "∗"> <!-- asterisk operator, U+2217 ISOtech -->
 | 
			
		||||
<!ENTITY radic    "√"> <!-- square root = radical sign,
 | 
			
		||||
                                     U+221A ISOtech -->
 | 
			
		||||
<!ENTITY prop     "∝"> <!-- proportional to, U+221D ISOtech -->
 | 
			
		||||
<!ENTITY infin    "∞"> <!-- infinity, U+221E ISOtech -->
 | 
			
		||||
<!ENTITY ang      "∠"> <!-- angle, U+2220 ISOamso -->
 | 
			
		||||
<!ENTITY and      "∧"> <!-- logical and = wedge, U+2227 ISOtech -->
 | 
			
		||||
<!ENTITY or       "∨"> <!-- logical or = vee, U+2228 ISOtech -->
 | 
			
		||||
<!ENTITY cap      "∩"> <!-- intersection = cap, U+2229 ISOtech -->
 | 
			
		||||
<!ENTITY cup      "∪"> <!-- union = cup, U+222A ISOtech -->
 | 
			
		||||
<!ENTITY int      "∫"> <!-- integral, U+222B ISOtech -->
 | 
			
		||||
<!ENTITY there4   "∴"> <!-- therefore, U+2234 ISOtech -->
 | 
			
		||||
<!ENTITY sim      "∼"> <!-- tilde operator = varies with = similar to,
 | 
			
		||||
                                     U+223C ISOtech -->
 | 
			
		||||
<!-- tilde operator is NOT the same character as the tilde, U+007E,
 | 
			
		||||
     although the same glyph might be used to represent both  -->
 | 
			
		||||
<!ENTITY cong     "≅"> <!-- approximately equal to, U+2245 ISOtech -->
 | 
			
		||||
<!ENTITY asymp    "≈"> <!-- almost equal to = asymptotic to,
 | 
			
		||||
                                     U+2248 ISOamsr -->
 | 
			
		||||
<!ENTITY ne       "≠"> <!-- not equal to, U+2260 ISOtech -->
 | 
			
		||||
<!ENTITY equiv    "≡"> <!-- identical to, U+2261 ISOtech -->
 | 
			
		||||
<!ENTITY le       "≤"> <!-- less-than or equal to, U+2264 ISOtech -->
 | 
			
		||||
<!ENTITY ge       "≥"> <!-- greater-than or equal to,
 | 
			
		||||
                                     U+2265 ISOtech -->
 | 
			
		||||
<!ENTITY sub      "⊂"> <!-- subset of, U+2282 ISOtech -->
 | 
			
		||||
<!ENTITY sup      "⊃"> <!-- superset of, U+2283 ISOtech -->
 | 
			
		||||
<!ENTITY nsub     "⊄"> <!-- not a subset of, U+2284 ISOamsn -->
 | 
			
		||||
<!ENTITY sube     "⊆"> <!-- subset of or equal to, U+2286 ISOtech -->
 | 
			
		||||
<!ENTITY supe     "⊇"> <!-- superset of or equal to,
 | 
			
		||||
                                     U+2287 ISOtech -->
 | 
			
		||||
<!ENTITY oplus    "⊕"> <!-- circled plus = direct sum,
 | 
			
		||||
                                     U+2295 ISOamsb -->
 | 
			
		||||
<!ENTITY otimes   "⊗"> <!-- circled times = vector product,
 | 
			
		||||
                                     U+2297 ISOamsb -->
 | 
			
		||||
<!ENTITY perp     "⊥"> <!-- up tack = orthogonal to = perpendicular,
 | 
			
		||||
                                     U+22A5 ISOtech -->
 | 
			
		||||
<!ENTITY sdot     "⋅"> <!-- dot operator, U+22C5 ISOamsb -->
 | 
			
		||||
<!-- dot operator is NOT the same character as U+00B7 middle dot -->
 | 
			
		||||
 | 
			
		||||
<!-- Miscellaneous Technical -->
 | 
			
		||||
<!ENTITY lceil    "⌈"> <!-- left ceiling = APL upstile,
 | 
			
		||||
                                     U+2308 ISOamsc  -->
 | 
			
		||||
<!ENTITY rceil    "⌉"> <!-- right ceiling, U+2309 ISOamsc  -->
 | 
			
		||||
<!ENTITY lfloor   "⌊"> <!-- left floor = APL downstile,
 | 
			
		||||
                                     U+230A ISOamsc  -->
 | 
			
		||||
<!ENTITY rfloor   "⌋"> <!-- right floor, U+230B ISOamsc  -->
 | 
			
		||||
<!ENTITY lang     "〈"> <!-- left-pointing angle bracket = bra,
 | 
			
		||||
                                     U+2329 ISOtech -->
 | 
			
		||||
<!-- lang is NOT the same character as U+003C 'less than sign' 
 | 
			
		||||
     or U+2039 'single left-pointing angle quotation mark' -->
 | 
			
		||||
<!ENTITY rang     "〉"> <!-- right-pointing angle bracket = ket,
 | 
			
		||||
                                     U+232A ISOtech -->
 | 
			
		||||
<!-- rang is NOT the same character as U+003E 'greater than sign' 
 | 
			
		||||
     or U+203A 'single right-pointing angle quotation mark' -->
 | 
			
		||||
 | 
			
		||||
<!-- Geometric Shapes -->
 | 
			
		||||
<!ENTITY loz      "◊"> <!-- lozenge, U+25CA ISOpub -->
 | 
			
		||||
 | 
			
		||||
<!-- Miscellaneous Symbols -->
 | 
			
		||||
<!ENTITY spades   "♠"> <!-- black spade suit, U+2660 ISOpub -->
 | 
			
		||||
<!-- black here seems to mean filled as opposed to hollow -->
 | 
			
		||||
<!ENTITY clubs    "♣"> <!-- black club suit = shamrock,
 | 
			
		||||
                                     U+2663 ISOpub -->
 | 
			
		||||
<!ENTITY hearts   "♥"> <!-- black heart suit = valentine,
 | 
			
		||||
                                     U+2665 ISOpub -->
 | 
			
		||||
<!ENTITY diams    "♦"> <!-- black diamond suit, U+2666 ISOpub -->
 | 
			
		||||
| 
						 | 
				
			
			@ -6,22 +6,27 @@ IN: xml.errors.tests
 | 
			
		|||
    '[ _ string>xml ] swap '[ _ = ] must-fail-with ;
 | 
			
		||||
 | 
			
		||||
T{ no-entity f 1 10 "nbsp" } "<x> </x>" xml-error-test
 | 
			
		||||
T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" }
 | 
			
		||||
} "<x></y>" xml-error-test
 | 
			
		||||
T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } }
 | 
			
		||||
    "<x></y>" xml-error-test
 | 
			
		||||
T{ unclosed f 1 4 V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
 | 
			
		||||
T{ nonexist-ns f 1 5 "x" } "<x:y/>" xml-error-test
 | 
			
		||||
T{ unopened f 1 5 } "</x>" xml-error-test
 | 
			
		||||
T{ not-yes/no f 1 41 "maybe" } "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
 | 
			
		||||
T{ not-yes/no f 1 41 "maybe" }
 | 
			
		||||
    "<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
 | 
			
		||||
T{ extra-attrs f 1 32 V{ T{ name f "" "foo" f } }
 | 
			
		||||
} "<?xml version='1.1' foo='bar'?><x/>" xml-error-test
 | 
			
		||||
T{ bad-version f 1 28 "5 million" } "<?xml version='5 million'?><x/>" xml-error-test
 | 
			
		||||
T{ bad-version f 1 28 "5 million" }
 | 
			
		||||
    "<?xml version='5 million'?><x/>" xml-error-test
 | 
			
		||||
T{ notags f } "" xml-error-test
 | 
			
		||||
T{ multitags } "<x/><y/>" xml-error-test
 | 
			
		||||
T{ bad-prolog  f 1 26 T{ prolog f "1.0" "UTF-8" f }
 | 
			
		||||
} "<x/><?xml version='1.0'?>" xml-error-test
 | 
			
		||||
T{ bad-prolog  f 1 26 T{ prolog f "1.0" "UTF-8" f } }
 | 
			
		||||
    "<x/><?xml version='1.0'?>" xml-error-test
 | 
			
		||||
T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
 | 
			
		||||
xml-error-test
 | 
			
		||||
    xml-error-test
 | 
			
		||||
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
 | 
			
		||||
T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
 | 
			
		||||
T{ bad-instruction f 1 11 T{ instruction f "xsl" }
 | 
			
		||||
} "<x><?xsl?></x>" xml-error-test
 | 
			
		||||
T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
 | 
			
		||||
    "<x><?xsl?></x>" xml-error-test
 | 
			
		||||
T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
 | 
			
		||||
T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
 | 
			
		||||
T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
 | 
			
		||||
| 
						 | 
				
			
			@ -32,17 +32,6 @@ M: no-entity summary ( obj -- str )
 | 
			
		|||
        "Entity does not exist: &" write thing>> write ";" print
 | 
			
		||||
    ] with-string-writer ;
 | 
			
		||||
 | 
			
		||||
TUPLE: xml-string-error < parsing-error string ; ! this should not exist
 | 
			
		||||
 | 
			
		||||
: xml-string-error ( string -- * )
 | 
			
		||||
    \ xml-string-error parsing-error swap >>string throw ;
 | 
			
		||||
 | 
			
		||||
M: xml-string-error summary ( obj -- str )
 | 
			
		||||
    [
 | 
			
		||||
        dup call-next-method write
 | 
			
		||||
        string>> print
 | 
			
		||||
    ] with-string-writer ;
 | 
			
		||||
 | 
			
		||||
TUPLE: mismatched < parsing-error open close ;
 | 
			
		||||
 | 
			
		||||
: mismatched ( open close -- * )
 | 
			
		||||
| 
						 | 
				
			
			@ -233,7 +222,34 @@ M: misplaced-directive summary ( obj -- str )
 | 
			
		|||
        dir>> write-xml-chunk nl
 | 
			
		||||
    ] with-string-writer ;
 | 
			
		||||
 | 
			
		||||
TUPLE: bad-name < parsing-error name ;
 | 
			
		||||
 | 
			
		||||
: bad-name ( name -- * )
 | 
			
		||||
    \ bad-name parsing-error swap >>name throw ;
 | 
			
		||||
 | 
			
		||||
M: bad-name summary ( obj -- str )
 | 
			
		||||
    [ call-next-method ]
 | 
			
		||||
    [ "Invalid name: " swap name>> "\n" 3append ]
 | 
			
		||||
    bi append ;
 | 
			
		||||
 | 
			
		||||
TUPLE: unclosed-quote < parsing-error ;
 | 
			
		||||
 | 
			
		||||
: unclosed-quote ( -- * )
 | 
			
		||||
    \ unclosed-quote parsing-error throw ;
 | 
			
		||||
 | 
			
		||||
M: unclosed-quote summary
 | 
			
		||||
    call-next-method
 | 
			
		||||
    "XML document ends with quote still open\n" append ;
 | 
			
		||||
 | 
			
		||||
TUPLE: quoteless-attr < parsing-error ;
 | 
			
		||||
 | 
			
		||||
: quoteless-attr ( -- * )
 | 
			
		||||
    \ quoteless-attr parsing-error throw ;
 | 
			
		||||
 | 
			
		||||
M: quoteless-attr summary
 | 
			
		||||
    call-next-method "Attribute lacks quotes around value\n" append ;
 | 
			
		||||
 | 
			
		||||
UNION: xml-parse-error multitags notags extra-attrs nonexist-ns
 | 
			
		||||
       not-yes/no unclosed mismatched xml-string-error expected no-entity
 | 
			
		||||
       not-yes/no unclosed mismatched expected no-entity
 | 
			
		||||
       bad-prolog versionless-prolog capitalized-prolog bad-instruction
 | 
			
		||||
       bad-directive ;
 | 
			
		||||
       bad-directive bad-name unclosed-quote quoteless-attr ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version='1.0' encoding='ASCII'?><x>e</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,14 @@
 | 
			
		|||
USING: xml xml.data xml.utilities tools.test accessors kernel ;
 | 
			
		||||
 | 
			
		||||
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/spaces.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
 | 
			
		||||
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version='1.0' encoding='ISO-8859-1'?><x>é</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version='1.0' encoding='ISO-8859-9'?><x>ý</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<x>é</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
<x>é</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -2,7 +2,7 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
IN: xml.tests
 | 
			
		||||
USING: kernel xml tools.test io namespaces make sequences
 | 
			
		||||
xml.errors xml.entities parser strings xml.data io.files
 | 
			
		||||
xml.errors xml.entities.html parser strings xml.data io.files
 | 
			
		||||
xml.writer xml.utilities state-parser continuations assocs
 | 
			
		||||
sequences.deep accessors io.streams.string ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,12 +53,15 @@ SYMBOL: xml-file
 | 
			
		|||
[ "<foo>         bar            </foo>" string>xml pprint-xml>string ] unit-test
 | 
			
		||||
[ "<!-- B+, B, or B--->" string>xml ] must-fail
 | 
			
		||||
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
 | 
			
		||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo   SYSTEM \"blah.dtd\"   >" string>xml-chunk second ] unit-test
 | 
			
		||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
 | 
			
		||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo   SYSTEM \"blah.dtd\"   >" string>xml-chunk first ] unit-test
 | 
			
		||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
 | 
			
		||||
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
 | 
			
		||||
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
 | 
			
		||||
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<é>x</é>
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version='1.0' encoding='UTF-8'?><x/>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version='1.0' encoding='UTF-8'?><x>é</x>
 | 
			
		||||
| 
						 | 
				
			
			@ -1,9 +1,10 @@
 | 
			
		|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: xml.errors xml.data xml.utilities xml.char-classes sets
 | 
			
		||||
xml.entities kernel state-parser kernel namespaces make strings
 | 
			
		||||
math math.parser sequences assocs arrays splitting combinators
 | 
			
		||||
unicode.case accessors fry ascii ;
 | 
			
		||||
USING: accessors arrays ascii assocs combinators
 | 
			
		||||
combinators.short-circuit fry io.encodings io.encodings.iana
 | 
			
		||||
io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
 | 
			
		||||
math math.parser namespaces sequences sets splitting state-parser
 | 
			
		||||
strings xml.char-classes xml.data xml.entities xml.errors hashtables ;
 | 
			
		||||
IN: xml.tokenize
 | 
			
		||||
 | 
			
		||||
! XML namespace processing: ns = namespace
 | 
			
		||||
| 
						 | 
				
			
			@ -53,34 +54,37 @@ SYMBOL: ns-stack
 | 
			
		|||
 | 
			
		||||
! version=1.0? is calculated once and passed around for efficiency
 | 
			
		||||
 | 
			
		||||
: (parse-name) ( -- str )
 | 
			
		||||
    version=1.0? dup
 | 
			
		||||
    get-char name-start? [
 | 
			
		||||
        [ dup get-char name-char? not ] take-until nip
 | 
			
		||||
    ] [
 | 
			
		||||
        "Malformed name" xml-string-error
 | 
			
		||||
    ] if ;
 | 
			
		||||
: assure-name ( str version=1.0? -- str )
 | 
			
		||||
    over {
 | 
			
		||||
        [ first name-start? ]
 | 
			
		||||
        [ rest-slice [ name-char? ] with all? ]
 | 
			
		||||
    } 2&& [ bad-name ] unless ;
 | 
			
		||||
 | 
			
		||||
: (parse-name) ( start -- str )
 | 
			
		||||
    version=1.0?
 | 
			
		||||
    [ [ get-char name-char? not ] curry take-until append ]
 | 
			
		||||
    [ assure-name ] bi ;
 | 
			
		||||
 | 
			
		||||
: parse-name-starting ( start -- name )
 | 
			
		||||
    (parse-name) get-char CHAR: : =
 | 
			
		||||
    [ next "" (parse-name) ] [ "" swap ] if f <name> ;
 | 
			
		||||
 | 
			
		||||
: parse-name ( -- name )
 | 
			
		||||
    (parse-name) get-char CHAR: : =
 | 
			
		||||
    [ next (parse-name) ] [ "" swap ] if f <name> ;
 | 
			
		||||
    "" parse-name-starting ;
 | 
			
		||||
 | 
			
		||||
!   -- Parsing strings
 | 
			
		||||
 | 
			
		||||
: (parse-entity) ( string -- )
 | 
			
		||||
: parse-named-entity ( string -- )
 | 
			
		||||
    dup entities at [ , ] [ 
 | 
			
		||||
        prolog-data get standalone>>
 | 
			
		||||
        [ no-entity ] [
 | 
			
		||||
            dup extra-entities get at
 | 
			
		||||
            [ , ] [ no-entity ] ?if
 | 
			
		||||
        ] if
 | 
			
		||||
        dup extra-entities get at
 | 
			
		||||
        [ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish
 | 
			
		||||
    ] ?if ;
 | 
			
		||||
 | 
			
		||||
: parse-entity ( -- )
 | 
			
		||||
    next CHAR: ; take-char next
 | 
			
		||||
    "#" ?head [
 | 
			
		||||
        "x" ?head 16 10 ? base> ,
 | 
			
		||||
    ] [ (parse-entity) ] if ;
 | 
			
		||||
    ] [ parse-named-entity ] if ;
 | 
			
		||||
 | 
			
		||||
: (parse-char) ( ch -- )
 | 
			
		||||
    get-char {
 | 
			
		||||
| 
						 | 
				
			
			@ -93,13 +97,9 @@ SYMBOL: ns-stack
 | 
			
		|||
: parse-char ( ch -- string )
 | 
			
		||||
    [ (parse-char) ] "" make ;
 | 
			
		||||
 | 
			
		||||
: parse-quot ( ch -- string )
 | 
			
		||||
    parse-char get-char
 | 
			
		||||
    [ "XML file ends in a quote" xml-string-error ] unless ;
 | 
			
		||||
 | 
			
		||||
: parse-text ( -- string )
 | 
			
		||||
    CHAR: < parse-char ;
 | 
			
		||||
 | 
			
		||||
                                   
 | 
			
		||||
! Parsing tags
 | 
			
		||||
 | 
			
		||||
: start-tag ( -- name ? )
 | 
			
		||||
| 
						 | 
				
			
			@ -107,17 +107,18 @@ SYMBOL: ns-stack
 | 
			
		|||
    get-char CHAR: / = dup [ next ] when
 | 
			
		||||
    parse-name swap ;
 | 
			
		||||
 | 
			
		||||
: parse-attr-value ( -- seq )
 | 
			
		||||
    get-char dup "'\"" member? [
 | 
			
		||||
        next parse-quot
 | 
			
		||||
    ] [
 | 
			
		||||
        "Attribute lacks quote" xml-string-error
 | 
			
		||||
    ] if ;
 | 
			
		||||
: (parse-quote) ( ch -- string )
 | 
			
		||||
    parse-char get-char
 | 
			
		||||
    [ unclosed-quote ] unless ;
 | 
			
		||||
 | 
			
		||||
: parse-quote ( -- seq )
 | 
			
		||||
    pass-blank get-char dup "'\"" member?
 | 
			
		||||
    [ next (parse-quote) ] [ quoteless-attr ] if ;
 | 
			
		||||
 | 
			
		||||
: parse-attr ( -- )
 | 
			
		||||
    [ parse-name ] with-scope
 | 
			
		||||
    pass-blank CHAR: = expect pass-blank
 | 
			
		||||
    [ parse-attr-value ] with-scope
 | 
			
		||||
    parse-name
 | 
			
		||||
    pass-blank CHAR: = expect
 | 
			
		||||
    parse-quote
 | 
			
		||||
    2array , ;
 | 
			
		||||
 | 
			
		||||
: (middle-tag) ( -- )
 | 
			
		||||
| 
						 | 
				
			
			@ -153,7 +154,7 @@ SYMBOL: ns-stack
 | 
			
		|||
: only-blanks ( str -- )
 | 
			
		||||
    [ blank? ] all? [ bad-doctype-decl ] unless ;
 | 
			
		||||
 | 
			
		||||
: take-system-literal ( -- str )
 | 
			
		||||
: take-system-literal ( -- str ) ! replace with parse-quote?
 | 
			
		||||
    pass-blank get-char next {
 | 
			
		||||
        { CHAR: ' [ "'" take-string ] }
 | 
			
		||||
        { CHAR: " [ "\"" take-string ] }
 | 
			
		||||
| 
						 | 
				
			
			@ -207,15 +208,18 @@ DEFER: direct
 | 
			
		|||
 | 
			
		||||
: take-entity-def ( -- entity-name entity-def )
 | 
			
		||||
    " " take-string pass-blank get-char {
 | 
			
		||||
        { CHAR: ' [ take-system-literal ] }
 | 
			
		||||
        { CHAR: " [ take-system-literal ] }
 | 
			
		||||
        { CHAR: ' [ parse-quote ] }
 | 
			
		||||
        { CHAR: " [ parse-quote ] }
 | 
			
		||||
        [ drop take-external-id ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
: associate-entity ( entity-name entity-def -- )
 | 
			
		||||
    swap extra-entities [ ?set-at ] change ;
 | 
			
		||||
 | 
			
		||||
: take-entity-decl ( -- entity-decl )
 | 
			
		||||
    pass-blank get-char {
 | 
			
		||||
        { CHAR: % [ next pass-blank take-entity-def ] }
 | 
			
		||||
        [ drop take-entity-def ]
 | 
			
		||||
        [ drop take-entity-def 2dup associate-entity ]
 | 
			
		||||
    } case
 | 
			
		||||
    ">" take-string only-blanks <entity-decl> ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -253,22 +257,36 @@ DEFER: direct
 | 
			
		|||
: good-version ( version -- version )
 | 
			
		||||
    dup { "1.0" "1.1" } member? [ bad-version ] unless ;
 | 
			
		||||
 | 
			
		||||
: prolog-attrs ( alist -- prolog )
 | 
			
		||||
    [ T{ name f "" "version" f } swap at
 | 
			
		||||
      [ good-version ] [ versionless-prolog ] if* ] keep
 | 
			
		||||
    [ T{ name f "" "encoding" f } swap at
 | 
			
		||||
      "UTF-8" or ] keep
 | 
			
		||||
: prolog-version ( alist -- version )
 | 
			
		||||
    T{ name f "" "version" f } swap at
 | 
			
		||||
    [ good-version ] [ versionless-prolog ] if* ;
 | 
			
		||||
 | 
			
		||||
: prolog-encoding ( alist -- encoding )
 | 
			
		||||
    T{ name f "" "encoding" f } swap at "UTF-8" or ;
 | 
			
		||||
 | 
			
		||||
: prolog-standalone ( alist -- version )
 | 
			
		||||
    T{ name f "" "standalone" f } swap at
 | 
			
		||||
    [ yes/no>bool ] [ f ] if*
 | 
			
		||||
    <prolog> ;
 | 
			
		||||
    [ yes/no>bool ] [ f ] if* ;
 | 
			
		||||
 | 
			
		||||
: prolog-attrs ( alist -- prolog )
 | 
			
		||||
    [ prolog-version ]
 | 
			
		||||
    [ prolog-encoding ]
 | 
			
		||||
    [ prolog-standalone ]
 | 
			
		||||
    tri <prolog> ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: string-input?
 | 
			
		||||
: decode-input-if ( encoding -- )
 | 
			
		||||
    string-input? get [ drop ] [ decode-input ] if ;
 | 
			
		||||
 | 
			
		||||
: parse-prolog ( -- prolog )
 | 
			
		||||
    pass-blank middle-tag "?>" expect-string
 | 
			
		||||
    dup assure-no-extra prolog-attrs
 | 
			
		||||
    dup encoding>> dup "UTF-16" =
 | 
			
		||||
    [ drop ] [ name>encoding [ decode-input-if ] when* ] if
 | 
			
		||||
    dup prolog-data set ;
 | 
			
		||||
 | 
			
		||||
: instruct ( -- instruction )
 | 
			
		||||
    (parse-name) dup "xml" =
 | 
			
		||||
    "" (parse-name) dup "xml" =
 | 
			
		||||
    [ drop parse-prolog ] [
 | 
			
		||||
        dup >lower "xml" =
 | 
			
		||||
        [ capitalized-prolog ]
 | 
			
		||||
| 
						 | 
				
			
			@ -278,10 +296,69 @@ DEFER: direct
 | 
			
		|||
: make-tag ( -- tag )
 | 
			
		||||
    {
 | 
			
		||||
        { [ get-char dup CHAR: ! = ] [ drop next direct ] }
 | 
			
		||||
        { [ CHAR: ? = ] [ next instruct ] } 
 | 
			
		||||
        { [ CHAR: ? = ] [ next instruct ] }
 | 
			
		||||
        [
 | 
			
		||||
            start-tag [ dup add-ns pop-ns <closer> ]
 | 
			
		||||
            [ middle-tag end-tag ] if
 | 
			
		||||
            CHAR: > expect
 | 
			
		||||
        ]
 | 
			
		||||
    } cond ;
 | 
			
		||||
 | 
			
		||||
! Autodetecting encodings
 | 
			
		||||
 | 
			
		||||
: continue-make-tag ( str -- tag )
 | 
			
		||||
    parse-name-starting middle-tag end-tag CHAR: > expect ;
 | 
			
		||||
 | 
			
		||||
: start-utf16le ( -- tag )
 | 
			
		||||
    utf16le decode-input-if
 | 
			
		||||
    CHAR: ? expect
 | 
			
		||||
    0 expect instruct ;
 | 
			
		||||
 | 
			
		||||
: 10xxxxxx? ( ch -- ? )
 | 
			
		||||
    -6 shift 3 bitand 2 = ;
 | 
			
		||||
          
 | 
			
		||||
: start<name ( ch -- tag )
 | 
			
		||||
    ascii?
 | 
			
		||||
    [ utf8 decode-input-if next make-tag ] [
 | 
			
		||||
        next
 | 
			
		||||
        [ get-next 10xxxxxx? not ] take-until
 | 
			
		||||
        get-char suffix utf8 decode
 | 
			
		||||
        utf8 decode-input-if next
 | 
			
		||||
        continue-make-tag
 | 
			
		||||
    ] if ;
 | 
			
		||||
          
 | 
			
		||||
: start< ( -- tag )
 | 
			
		||||
    get-next {
 | 
			
		||||
        { 0 [ next next start-utf16le ] }
 | 
			
		||||
        { CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
 | 
			
		||||
        { CHAR: ! [ utf8 decode-input next next direct ] }
 | 
			
		||||
        [ start<name ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
: skip-utf8-bom ( -- tag )
 | 
			
		||||
    "\u0000bb\u0000bf" expect utf8 decode-input
 | 
			
		||||
    CHAR: < expect make-tag ;
 | 
			
		||||
 | 
			
		||||
: decode-expecting ( encoding string -- tag )
 | 
			
		||||
    [ decode-input-if next ] [ expect-string ] bi* make-tag ;
 | 
			
		||||
 | 
			
		||||
: start-utf16be ( -- tag )
 | 
			
		||||
    utf16be "<" decode-expecting ;
 | 
			
		||||
 | 
			
		||||
: skip-utf16le-bom ( -- tag )
 | 
			
		||||
    utf16le "\u0000fe<" decode-expecting ;
 | 
			
		||||
 | 
			
		||||
: skip-utf16be-bom ( -- tag )
 | 
			
		||||
    utf16be "\u0000ff<" decode-expecting ;
 | 
			
		||||
 | 
			
		||||
: start-document ( -- tag )
 | 
			
		||||
    get-char {
 | 
			
		||||
        { CHAR: < [ start< ] }
 | 
			
		||||
        { 0 [ start-utf16be ] }
 | 
			
		||||
        { HEX: EF [ skip-utf8-bom ] }
 | 
			
		||||
        { HEX: FF [ skip-utf16le-bom ] }
 | 
			
		||||
        { HEX: FE [ skip-utf16be-bom ] }
 | 
			
		||||
        { f [ "" ] }
 | 
			
		||||
        [ drop utf8 decode-input-if f ]
 | 
			
		||||
        ! Same problem as with <e`>, in the case of XML chunks?
 | 
			
		||||
    } case ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,7 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: help.markup help.syntax kernel xml.data xml.errors
 | 
			
		||||
xml.writer state-parser xml.tokenize xml.utilities xml.entities
 | 
			
		||||
strings sequences io ;
 | 
			
		||||
strings sequences io xml.entities.html ;
 | 
			
		||||
IN: xml
 | 
			
		||||
 | 
			
		||||
HELP: string>xml
 | 
			
		||||
| 
						 | 
				
			
			@ -295,9 +295,6 @@ HELP: expected
 | 
			
		|||
HELP: no-entity
 | 
			
		||||
{ $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link parsing-error } ". Contains one slot, thing, containing a string representing the entity." } ;
 | 
			
		||||
 | 
			
		||||
HELP: xml-string-error
 | 
			
		||||
{ $class-description "XML parsing error that delegates to " { $link parsing-error } " and represents an other, unspecified error, which is represented by the slot string, containing a string describing the error." } ;
 | 
			
		||||
 | 
			
		||||
HELP: open-tag
 | 
			
		||||
{ $class-description "represents a tag that does have children, ie is not a contained tag" }
 | 
			
		||||
{ $notes "the constructor used for this class is simply " { $link <tag> } "." }
 | 
			
		||||
| 
						 | 
				
			
			@ -324,6 +321,15 @@ HELP: state-parse
 | 
			
		|||
HELP: pre/post-content
 | 
			
		||||
{ $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;
 | 
			
		||||
 | 
			
		||||
HELP: unclosed-quote
 | 
			
		||||
{ $class-description "describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;
 | 
			
		||||
 | 
			
		||||
HELP: bad-name
 | 
			
		||||
{ $class-description "describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;
 | 
			
		||||
 | 
			
		||||
HELP: quoteless-attr
 | 
			
		||||
{ $class-description "describes the error where an attribute of an XML tag is missing quotes around a value." } ;
 | 
			
		||||
 | 
			
		||||
HELP: entities
 | 
			
		||||
{ $description "a hash table from default XML entity names (like & and <) to the characters they represent. This is automatically included when parsing any XML document." }
 | 
			
		||||
{ $see-also html-entities } ;
 | 
			
		||||
| 
						 | 
				
			
			@ -444,6 +450,9 @@ ARTICLE: { "xml" "errors" } "XML parsing errors"
 | 
			
		|||
    { $subsection expected }
 | 
			
		||||
    { $subsection no-entity }
 | 
			
		||||
    { $subsection pre/post-content }
 | 
			
		||||
    { $subsection unclosed-quote }
 | 
			
		||||
    { $subsection bad-name }
 | 
			
		||||
    { $subsection quoteless-attr }
 | 
			
		||||
    "Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information"
 | 
			
		||||
    $nl
 | 
			
		||||
    "Note that, in parsing an XML document, only the first error is reported." ;
 | 
			
		||||
| 
						 | 
				
			
			@ -456,7 +465,7 @@ ARTICLE: { "xml" "entities" } "XML entities"
 | 
			
		|||
    { $subsection with-html-entities } ;
 | 
			
		||||
 | 
			
		||||
ARTICLE: "xml" "XML parser"
 | 
			
		||||
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.1 standard, converting strings of text into XML and vice versa."
 | 
			
		||||
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa."
 | 
			
		||||
    { $subsection { "xml" "reading" } }
 | 
			
		||||
    { $subsection { "xml" "writing" } }
 | 
			
		||||
    { $subsection { "xml" "classes" } }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,10 +1,9 @@
 | 
			
		|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: io io.streams.string io.files kernel math namespaces
 | 
			
		||||
prettyprint sequences arrays generic strings vectors
 | 
			
		||||
xml.char-classes xml.data xml.errors xml.tokenize xml.writer
 | 
			
		||||
xml.utilities state-parser assocs ascii io.encodings.utf8
 | 
			
		||||
accessors xml.backend ;
 | 
			
		||||
USING: accessors arrays io io.encodings.binary io.files
 | 
			
		||||
io.streams.string kernel namespaces sequences state-parser strings
 | 
			
		||||
xml.backend xml.data xml.errors xml.tokenize ascii
 | 
			
		||||
xml.writer ;
 | 
			
		||||
IN: xml
 | 
			
		||||
 | 
			
		||||
!   -- Overall parser with data tree
 | 
			
		||||
| 
						 | 
				
			
			@ -23,7 +22,7 @@ GENERIC: process ( object -- )
 | 
			
		|||
M: object process add-child ;
 | 
			
		||||
 | 
			
		||||
M: prolog process
 | 
			
		||||
    xml-stack get V{ { f V{ "" } } } =
 | 
			
		||||
    xml-stack get V{ { f V{ } } } =
 | 
			
		||||
    [ bad-prolog ] unless drop ;
 | 
			
		||||
 | 
			
		||||
M: instruction process
 | 
			
		||||
| 
						 | 
				
			
			@ -101,6 +100,7 @@ TUPLE: pull-xml scope ;
 | 
			
		|||
        text-now? on
 | 
			
		||||
    ] H{ } make-assoc
 | 
			
		||||
    pull-xml boa ;
 | 
			
		||||
! pull-xml needs to call start-document somewhere
 | 
			
		||||
 | 
			
		||||
: pull-event ( pull -- xml-event/f )
 | 
			
		||||
    scope>> [
 | 
			
		||||
| 
						 | 
				
			
			@ -133,11 +133,12 @@ TUPLE: pull-xml scope ;
 | 
			
		|||
: sax ( stream quot: ( xml-elem -- ) -- )
 | 
			
		||||
    swap [
 | 
			
		||||
        reset-prolog init-ns-stack
 | 
			
		||||
        prolog-data get call-under
 | 
			
		||||
        start-document [ call-under ] when*
 | 
			
		||||
        sax-loop
 | 
			
		||||
    ] state-parse ; inline recursive
 | 
			
		||||
 | 
			
		||||
: (read-xml) ( -- )
 | 
			
		||||
    start-document [ process ] when*
 | 
			
		||||
    [ process ] sax-loop ; inline
 | 
			
		||||
 | 
			
		||||
: (read-xml-chunk) ( stream -- prolog seq )
 | 
			
		||||
| 
						 | 
				
			
			@ -159,11 +160,12 @@ TUPLE: pull-xml scope ;
 | 
			
		|||
    <string-reader> read-xml ;
 | 
			
		||||
 | 
			
		||||
: string>xml-chunk ( string -- xml )
 | 
			
		||||
    <string-reader> read-xml-chunk ;
 | 
			
		||||
    t string-input?
 | 
			
		||||
    [ <string-reader> read-xml-chunk ] with-variable ;
 | 
			
		||||
 | 
			
		||||
: file>xml ( filename -- xml )
 | 
			
		||||
    ! Autodetect encoding!
 | 
			
		||||
    utf8 <file-reader> read-xml ;
 | 
			
		||||
    binary <file-reader> read-xml ;
 | 
			
		||||
 | 
			
		||||
: xml-reprint ( string -- )
 | 
			
		||||
    string>xml print-xml ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -141,7 +141,7 @@ PRIVATE>
 | 
			
		|||
 | 
			
		||||
: fuel-get-article ( name -- ) article fuel-eval-set-result ;
 | 
			
		||||
 | 
			
		||||
MEMO: fuel-get-article-title ( name -- )
 | 
			
		||||
: fuel-get-article-title ( name -- )
 | 
			
		||||
    articles get at [ article-title ] [ f ] if* fuel-eval-set-result ;
 | 
			
		||||
 | 
			
		||||
: fuel-word-help ( name -- ) (fuel-word-help) fuel-eval-set-result ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,16 +18,11 @@ beast.
 | 
			
		|||
 | 
			
		||||
  (load-file "<path/to/factor/installation>/misc/fuel/fu.el")
 | 
			
		||||
 | 
			
		||||
  or
 | 
			
		||||
 | 
			
		||||
  (add-to-list load-path "<path/to/factor/installation>/fuel")
 | 
			
		||||
  (require 'fuel)
 | 
			
		||||
 | 
			
		||||
  If all you want is a major mode for editing Factor code with pretty
 | 
			
		||||
  font colors and indentation, without running the factor listener
 | 
			
		||||
  inside Emacs, you can use instead:
 | 
			
		||||
 | 
			
		||||
  (add-to-list load-path "<path/to/factor/installation>/fuel")
 | 
			
		||||
  (add-to-list 'load-path "<path/to/factor/installation>/fuel")
 | 
			
		||||
  (setq factor-mode-use-fuel nil)
 | 
			
		||||
  (require 'factor-mode)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,7 @@
 | 
			
		|||
        ((listp sexp)
 | 
			
		||||
         (case (car sexp)
 | 
			
		||||
           (:array (factor--seq 'V{ '} (cdr sexp)))
 | 
			
		||||
           (:seq (factor--seq '{ '} (cdr sexp)))
 | 
			
		||||
           (:quote (format "\\ %s" (factor `(:factor ,(cadr sexp)))))
 | 
			
		||||
           (:quotation (factor--seq '\[ '\] (cdr sexp)))
 | 
			
		||||
           (:using (factor `(USING: ,@(cdr sexp) :end)))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -72,11 +72,21 @@
 | 
			
		|||
 | 
			
		||||
;;; Font lock:
 | 
			
		||||
 | 
			
		||||
(defun fuel-font-lock--syntactic-face (state)
 | 
			
		||||
  (cond ((nth 3 state) 'factor-font-lock-string)
 | 
			
		||||
        ((char-equal (char-after (nth 8 state)) ?\ )
 | 
			
		||||
         (save-excursion
 | 
			
		||||
           (goto-char (nth 8 state))
 | 
			
		||||
           (beginning-of-line)
 | 
			
		||||
           (cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name)
 | 
			
		||||
                 ((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ")
 | 
			
		||||
                  'factor-font-lock-symbol)
 | 
			
		||||
                 (t 'default))))
 | 
			
		||||
        (t 'factor-font-lock-comment)))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-font-lock--font-lock-keywords
 | 
			
		||||
  `((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect)
 | 
			
		||||
    (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)
 | 
			
		||||
    (,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word)
 | 
			
		||||
    ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
 | 
			
		||||
    (,fuel-syntax--vocab-ref-regexp  2 'factor-font-lock-vocabulary-name)
 | 
			
		||||
    (,fuel-syntax--declaration-words-regex . 'factor-font-lock-declaration)
 | 
			
		||||
    (,fuel-syntax--word-definition-regex 2 'factor-font-lock-word)
 | 
			
		||||
| 
						 | 
				
			
			@ -89,24 +99,26 @@
 | 
			
		|||
    (,fuel-syntax--type-definition-regex 2 'factor-font-lock-type-name)
 | 
			
		||||
    (,fuel-syntax--method-definition-regex (1 'factor-font-lock-type-name)
 | 
			
		||||
                                           (2 'factor-font-lock-word))
 | 
			
		||||
    (,fuel-syntax--parent-type-regex 2 'factor-font-lock-type-name)
 | 
			
		||||
    (,fuel-syntax--tuple-decl-regex 2 'factor-font-lock-type-name)
 | 
			
		||||
    (,fuel-syntax--constructor-regex . 'factor-font-lock-constructor)
 | 
			
		||||
    (,fuel-syntax--setter-regex . 'factor-font-lock-setter-word)
 | 
			
		||||
    (,fuel-syntax--getter-regex . 'factor-font-lock-getter-word)
 | 
			
		||||
    (,fuel-syntax--symbol-definition-regex 2 'factor-font-lock-symbol)
 | 
			
		||||
    (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)))
 | 
			
		||||
    (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)
 | 
			
		||||
    ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
 | 
			
		||||
    (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)))
 | 
			
		||||
 | 
			
		||||
(defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax)
 | 
			
		||||
  (set (make-local-variable 'comment-start) "! ")
 | 
			
		||||
  (set (make-local-variable 'parse-sexp-lookup-properties) t)
 | 
			
		||||
  (set (make-local-variable 'font-lock-comment-face) 'factor-font-lock-comment)
 | 
			
		||||
  (set (make-local-variable 'font-lock-string-face) 'factor-font-lock-string)
 | 
			
		||||
  (set (make-local-variable 'font-lock-defaults)
 | 
			
		||||
       `(,(or keywords 'fuel-font-lock--font-lock-keywords)
 | 
			
		||||
         nil nil nil nil
 | 
			
		||||
         ,@(if no-syntax nil
 | 
			
		||||
             (list (cons 'font-lock-syntactic-keywords
 | 
			
		||||
                         fuel-syntax--syntactic-keywords))))))
 | 
			
		||||
                         fuel-syntax--syntactic-keywords)
 | 
			
		||||
                   (cons 'font-lock-syntactic-face-function
 | 
			
		||||
                         'fuel-font-lock--syntactic-face))))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
;;; Fontify strings as Factor code:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -137,7 +137,8 @@
 | 
			
		|||
 | 
			
		||||
(defun fuel-help--get-article (name label)
 | 
			
		||||
  (message "Retrieving article ...")
 | 
			
		||||
  (let* ((cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
 | 
			
		||||
  (let* ((name (if (listp name) (cons :seq name) name))
 | 
			
		||||
         (cmd `(:fuel* ((,name fuel-get-article)) "fuel" t))
 | 
			
		||||
         (ret (fuel-eval--send/wait cmd))
 | 
			
		||||
         (res (fuel-eval--retort-result ret)))
 | 
			
		||||
    (if (not res)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,7 +31,12 @@
 | 
			
		|||
  :group 'fuel)
 | 
			
		||||
 | 
			
		||||
(defcustom fuel-listener-factor-binary
 | 
			
		||||
  (expand-file-name "factor" fuel-factor-root-dir)
 | 
			
		||||
  (expand-file-name (cond ((eq system-type 'windows-nt)
 | 
			
		||||
                           "factor.exe")
 | 
			
		||||
                          ((eq system-type 'darwin)
 | 
			
		||||
                           "Factor.app/Contents/MacOS/factor")
 | 
			
		||||
                          (t "factor"))
 | 
			
		||||
                    fuel-factor-root-dir)
 | 
			
		||||
  "Full path to the factor executable to use when starting a listener."
 | 
			
		||||
  :type '(file :must-match t)
 | 
			
		||||
  :group 'fuel-listener)
 | 
			
		||||
| 
						 | 
				
			
			@ -132,8 +137,7 @@ buffer."
 | 
			
		|||
 | 
			
		||||
(defun fuel-listener--setup-completion ()
 | 
			
		||||
  (setq fuel-syntax--current-vocab-function 'fuel-listener--current-vocab)
 | 
			
		||||
  (setq fuel-syntax--usings-function 'fuel-listener--usings)
 | 
			
		||||
  (set-syntax-table fuel-syntax--syntax-table))
 | 
			
		||||
  (setq fuel-syntax--usings-function 'fuel-listener--usings))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
;;; Stack mode support
 | 
			
		||||
| 
						 | 
				
			
			@ -160,7 +164,6 @@ buffer."
 | 
			
		|||
  (set (make-local-variable 'comint-prompt-regexp) fuel-con--prompt-regex)
 | 
			
		||||
  (set (make-local-variable 'comint-use-prompt-regexp) t)
 | 
			
		||||
  (set (make-local-variable 'comint-prompt-read-only) t)
 | 
			
		||||
  (set-syntax-table fuel-syntax--syntax-table)
 | 
			
		||||
  (fuel-listener--setup-completion)
 | 
			
		||||
  (fuel-listener--setup-stack-mode))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,7 +61,7 @@
 | 
			
		|||
 | 
			
		||||
(defun fuel-markup--insert-button (label link type)
 | 
			
		||||
  (let ((label (format "%s" label))
 | 
			
		||||
        (link (format "%s" link)))
 | 
			
		||||
        (link (if (listp link) link (format "%s" link))))
 | 
			
		||||
    (insert-text-button label
 | 
			
		||||
                        :type 'fuel-markup--button
 | 
			
		||||
                        'markup-link link
 | 
			
		||||
| 
						 | 
				
			
			@ -70,8 +70,9 @@
 | 
			
		|||
                        'help-echo (format "%s (%s)" label type))))
 | 
			
		||||
 | 
			
		||||
(defun fuel-markup--article-title (name)
 | 
			
		||||
  (fuel-eval--retort-result
 | 
			
		||||
   (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel"))))
 | 
			
		||||
  (let ((name (if (listp name) (cons :seq name) name)))
 | 
			
		||||
    (fuel-eval--retort-result
 | 
			
		||||
     (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel")))))
 | 
			
		||||
 | 
			
		||||
(defun fuel-markup--link-at-point ()
 | 
			
		||||
  (let ((button (condition-case nil (forward-button 0) (error nil))))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -79,7 +79,7 @@
 | 
			
		|||
  (regexp-opt fuel-syntax--declaration-words 'words))
 | 
			
		||||
 | 
			
		||||
(defsubst fuel-syntax--second-word-regex (prefixes)
 | 
			
		||||
  (format "^%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
 | 
			
		||||
  (format "%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t)))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--method-definition-regex
 | 
			
		||||
  "^M: +\\([^ ]+\\) +\\([^ ]+\\)")
 | 
			
		||||
| 
						 | 
				
			
			@ -87,14 +87,22 @@
 | 
			
		|||
(defconst fuel-syntax--integer-regex
 | 
			
		||||
  "\\_<-?[0-9]+\\_>")
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--ratio-regex
 | 
			
		||||
  "\\_<-?\\([0-9]+\\+\\)?[0-9]+/-?[0-9]+\\_>")
 | 
			
		||||
(defconst fuel-syntax--raw-float-regex
 | 
			
		||||
  "[0-9]*\\.[0-9]*\\([eE][+-]?[0-9]+\\)?")
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--float-regex
 | 
			
		||||
  "\\_<-?[0-9]+\\.[0-9]*\\([eE][+-]?[0-9]+\\)?\\_>")
 | 
			
		||||
  (format "\\_<-?%s\\_>" fuel-syntax--raw-float-regex))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--number-regex
 | 
			
		||||
  (format "\\([0-9]+\\|%s\\)" fuel-syntax--raw-float-regex))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--ratio-regex
 | 
			
		||||
  (format "\\_<[+-]?%s/-?%s\\_>"
 | 
			
		||||
          fuel-syntax--number-regex
 | 
			
		||||
          fuel-syntax--number-regex))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--bad-string-regex
 | 
			
		||||
  "\"\\([^\"]\\|\\\\\"\\)*\n")
 | 
			
		||||
  "\\_<\"[^>]\\([^\"\n]\\|\\\\\"\\)*\n")
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--word-definition-regex
 | 
			
		||||
  (fuel-syntax--second-word-regex
 | 
			
		||||
| 
						 | 
				
			
			@ -114,8 +122,8 @@
 | 
			
		|||
(defconst fuel-syntax--type-definition-regex
 | 
			
		||||
  (fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:")))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--parent-type-regex
 | 
			
		||||
  "^\\(TUPLE\\|PREDICTE\\): +[^ ]+ +< +\\([^ ]+\\)")
 | 
			
		||||
(defconst fuel-syntax--tuple-decl-regex
 | 
			
		||||
  "^TUPLE: +\\([^ \n]+\\) +< +\\([^ \n]+\\)\\_>")
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--constructor-regex "<[^ >]+>")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -125,7 +133,8 @@
 | 
			
		|||
(defconst fuel-syntax--symbol-definition-regex
 | 
			
		||||
  (fuel-syntax--second-word-regex '("SYMBOL:" "VAR:")))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--stack-effect-regex " ( .* )")
 | 
			
		||||
(defconst fuel-syntax--stack-effect-regex
 | 
			
		||||
  "\\( ( .* )\\)\\|\\( (( .* ))\\)")
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--using-lines-regex "^USING: +\\([^;]+\\);")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -163,26 +172,26 @@
 | 
			
		|||
          fuel-syntax--declaration-words-regex))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--single-liner-regex
 | 
			
		||||
  (format "^%s" (regexp-opt '("ABOUT:"
 | 
			
		||||
                              "ARTICLE:"
 | 
			
		||||
                              "ALIAS:"
 | 
			
		||||
                              "CONSTANT:" "C:"
 | 
			
		||||
                              "DEFER:"
 | 
			
		||||
                              "FORGET:"
 | 
			
		||||
                              "GENERIC:" "GENERIC#"
 | 
			
		||||
                              "HELP:" "HEX:" "HOOK:"
 | 
			
		||||
                              "IN:" "INSTANCE:"
 | 
			
		||||
                              "MAIN:" "MATH:" "MIXIN:"
 | 
			
		||||
                              "OCT:"
 | 
			
		||||
                              "POSTPONE:" "PRIVATE>" "<PRIVATE"
 | 
			
		||||
                              "QUALIFIED-WITH:" "QUALIFIED:"
 | 
			
		||||
                              "RENAME:"
 | 
			
		||||
                              "SINGLETON:" "SLOT:" "SYMBOL:"
 | 
			
		||||
                              "USE:"
 | 
			
		||||
                              "VAR:"))))
 | 
			
		||||
  (regexp-opt '("ABOUT:"
 | 
			
		||||
                "ARTICLE:"
 | 
			
		||||
                "ALIAS:"
 | 
			
		||||
                "CONSTANT:" "C:"
 | 
			
		||||
                "DEFER:"
 | 
			
		||||
                "FORGET:"
 | 
			
		||||
                "GENERIC:" "GENERIC#"
 | 
			
		||||
                "HELP:" "HEX:" "HOOK:"
 | 
			
		||||
                "IN:" "INSTANCE:"
 | 
			
		||||
                "MAIN:" "MATH:" "MIXIN:"
 | 
			
		||||
                "OCT:"
 | 
			
		||||
                "POSTPONE:" "PRIVATE>" "<PRIVATE"
 | 
			
		||||
                "QUALIFIED-WITH:" "QUALIFIED:"
 | 
			
		||||
                "RENAME:"
 | 
			
		||||
                "SINGLETON:" "SLOT:" "SYMBOL:"
 | 
			
		||||
                "USE:"
 | 
			
		||||
                "VAR:")))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--begin-of-def-regex
 | 
			
		||||
  (format "^USING: \\|\\(%s\\)\\|\\(%s .*\\)"
 | 
			
		||||
  (format "^USING: \\|\\(%s\\)\\|\\(^%s .*\\)"
 | 
			
		||||
          fuel-syntax--definition-start-regex
 | 
			
		||||
          fuel-syntax--single-liner-regex))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -190,7 +199,7 @@
 | 
			
		|||
  (format "^.*%s" fuel-syntax--definition-end-regex))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--end-of-def-regex
 | 
			
		||||
  (format "\\(%s\\)\\|\\(%s .*\\)"
 | 
			
		||||
  (format "\\(%s\\)\\|\\(^%s .*\\)"
 | 
			
		||||
          fuel-syntax--end-of-def-line-regex
 | 
			
		||||
          fuel-syntax--single-liner-regex))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -220,13 +229,21 @@
 | 
			
		|||
    table))
 | 
			
		||||
 | 
			
		||||
(defconst fuel-syntax--syntactic-keywords
 | 
			
		||||
  `(;; Comments:
 | 
			
		||||
  `(;; CHARs:
 | 
			
		||||
    ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
 | 
			
		||||
    ;; Comments:
 | 
			
		||||
    ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">"))
 | 
			
		||||
    ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">"))
 | 
			
		||||
    ;; CHARs:
 | 
			
		||||
    ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w"))
 | 
			
		||||
    ;; Strings
 | 
			
		||||
    ("\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)*\\(\"\\)" (1 "\"") (3 "\""))
 | 
			
		||||
    ("\\_<\\(\"\\)\\([^\n\r\f\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\""))
 | 
			
		||||
    ("\\_<<\\(\"\\)\\_>" (1 "\""))
 | 
			
		||||
    ("\\_<\\(\"\\)>\\_>" (1 "\""))
 | 
			
		||||
    ;; Multiline constructs
 | 
			
		||||
    ("\\_<USING:\\( \\)\\(;\\)" (1 "<b") (2 ">b"))
 | 
			
		||||
    ("\\_<USING:\\( \\)" (1 "<b"))
 | 
			
		||||
    ("\\_<TUPLE: +\\w+? +< +\\w+? *\\( \\)" (1 "<b"))
 | 
			
		||||
    ("\\_<\\(TUPLE\\|SYMBOLS\\|VARS\\): +\\w+? *\\( \\)\\([^<]\\|\\_>\\)" (2 "<b"))
 | 
			
		||||
    ("\\(\n\\| \\);\\_>" (1 ">b"))
 | 
			
		||||
    ;; Let and lambda:
 | 
			
		||||
    ("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">"))
 | 
			
		||||
    ("\\(\\[\\)\\(let\\|wlet\\|let\\*\\)\\( \\|$\\)" (1 "(]"))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue