Merge branch 'master' of git://factorcode.org/git/factor into unicode
commit
b6d292fb23
|
@ -659,13 +659,40 @@ M: ppc %callback-value ( ctype -- )
|
|||
|
||||
M: ppc small-enough? ( n -- ? ) -32768 32767 between? ;
|
||||
|
||||
M: ppc return-struct-in-registers? ( c-type -- ? ) drop f ;
|
||||
M: ppc return-struct-in-registers? ( c-type -- ? )
|
||||
c-type return-in-registers?>> ;
|
||||
|
||||
M: ppc %box-small-struct
|
||||
drop "No small structs" throw ;
|
||||
M: ppc %box-small-struct ( c-type -- )
|
||||
#! Box a <= 16-byte struct returned in r3:r4:r5:r6
|
||||
heap-size 7 LI
|
||||
"box_medium_struct" f %alien-invoke ;
|
||||
|
||||
M: ppc %unbox-small-struct
|
||||
drop "No small structs" throw ;
|
||||
: %unbox-struct-1 ( -- )
|
||||
! Alien must be in r3.
|
||||
"alien_offset" f %alien-invoke
|
||||
3 3 0 LWZ ;
|
||||
|
||||
: %unbox-struct-2 ( -- )
|
||||
! Alien must be in r3.
|
||||
"alien_offset" f %alien-invoke
|
||||
4 3 4 LWZ
|
||||
3 3 0 LWZ ;
|
||||
|
||||
: %unbox-struct-4 ( -- )
|
||||
! Alien must be in r3.
|
||||
"alien_offset" f %alien-invoke
|
||||
6 3 12 LWZ
|
||||
5 3 8 LWZ
|
||||
4 3 4 LWZ
|
||||
3 3 0 LWZ ;
|
||||
|
||||
M: ppc %unbox-small-struct ( size -- )
|
||||
#! Alien must be in EAX.
|
||||
heap-size cell align cell /i {
|
||||
{ 1 [ %unbox-struct-1 ] }
|
||||
{ 2 [ %unbox-struct-2 ] }
|
||||
{ 4 [ %unbox-struct-4 ] }
|
||||
} case ;
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
|
@ -673,3 +700,5 @@ USE: vocabs.loader
|
|||
{ [ os macosx? ] [ "cpu.ppc.macosx" require ] }
|
||||
{ [ os linux? ] [ "cpu.ppc.linux" require ] }
|
||||
} cond
|
||||
|
||||
"complex-double" c-type t >>return-in-registers? drop
|
||||
|
|
|
@ -5,12 +5,13 @@ IN: models
|
|||
HELP: model
|
||||
{ $class-description "A mutable cell holding a single value. When the value is changed, a sequence of connected objects are notified. Models have the following slots:"
|
||||
{ $list
|
||||
{ { $snippet "value" } " - the value of the model. Use " { $link set-model } " to change the value." }
|
||||
{ { $snippet "connections" } " - a sequence of objects implementing the " { $link model-changed } " generic word, to be notified when the model's value changes." }
|
||||
{ { $snippet "dependencies" } " - a sequence of models which should have this model added to their sequence of connections when activated." }
|
||||
{ { $snippet "ref" } " - a reference count tracking the number of models which depend on this one." }
|
||||
{ { $slot "value" } " - the value of the model. Use " { $link set-model } " to change the value." }
|
||||
{ { $slot "connections" } " - a sequence of objects implementing the " { $link model-changed } " generic word, to be notified when the model's value changes." }
|
||||
{ { $slot "dependencies" } " - a sequence of models which should have this model added to their sequence of connections when activated." }
|
||||
{ { $slot "ref" } " - a reference count tracking the number of models which depend on this one." }
|
||||
{ { $slot "locked?" } " - a slot set by " { $link with-locked-model } " to ensure that the model doesn't get changed recursively" }
|
||||
}
|
||||
"Other classes may delegate to " { $link model } "."
|
||||
"Other classes may inherit from " { $link model } "."
|
||||
} ;
|
||||
|
||||
HELP: <model>
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
!
|
||||
USING: kernel tools.test peg peg.ebnf words math math.parser
|
||||
sequences accessors peg.parsers parser namespaces arrays
|
||||
strings eval ;
|
||||
strings eval unicode.data multiline ;
|
||||
IN: peg.ebnf.tests
|
||||
|
||||
{ T{ ebnf-non-terminal f "abc" } } [
|
||||
|
@ -520,3 +520,13 @@ Tok = Spaces (Number | Special )
|
|||
{ "\\" } [
|
||||
"\\" [EBNF foo="\\" EBNF]
|
||||
] unit-test
|
||||
|
||||
[ "USE: peg.ebnf [EBNF EBNF]" eval ] must-fail
|
||||
|
||||
[ <" USE: peg.ebnf [EBNF
|
||||
lol = a
|
||||
lol = b
|
||||
EBNF] "> eval
|
||||
] [
|
||||
error>> [ redefined-rule? ] [ name>> "lol" = ] bi and
|
||||
] must-fail-with
|
||||
|
|
|
@ -5,13 +5,18 @@ sequences quotations vectors namespaces make math assocs
|
|||
continuations peg peg.parsers unicode.categories multiline
|
||||
splitting accessors effects sequences.deep peg.search
|
||||
combinators.short-circuit lexer io.streams.string stack-checker
|
||||
io combinators parser ;
|
||||
io combinators parser summary ;
|
||||
IN: peg.ebnf
|
||||
|
||||
: rule ( name word -- parser )
|
||||
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
||||
"ebnf-parser" word-prop at ;
|
||||
|
||||
ERROR: no-rule rule parser ;
|
||||
|
||||
: lookup-rule ( rule parser -- rule' )
|
||||
2dup rule [ 2nip ] [ no-rule ] if* ;
|
||||
|
||||
TUPLE: tokenizer any one many ;
|
||||
|
||||
: default-tokenizer ( -- tokenizer )
|
||||
|
@ -34,8 +39,13 @@ TUPLE: tokenizer any one many ;
|
|||
: reset-tokenizer ( -- )
|
||||
default-tokenizer \ tokenizer set-global ;
|
||||
|
||||
ERROR: no-tokenizer name ;
|
||||
|
||||
M: no-tokenizer summary
|
||||
drop "Tokenizer not found" ;
|
||||
|
||||
SYNTAX: TOKENIZER:
|
||||
scan search [ "Tokenizer not found" throw ] unless*
|
||||
scan dup search [ nip ] [ no-tokenizer ] if*
|
||||
execute( -- tokenizer ) \ tokenizer set-global ;
|
||||
|
||||
TUPLE: ebnf-non-terminal symbol ;
|
||||
|
@ -258,7 +268,7 @@ DEFER: 'choice'
|
|||
"]]" token ensure-not ,
|
||||
"]?" token ensure-not ,
|
||||
[ drop t ] satisfy ,
|
||||
] seq* [ first ] action repeat0 [ >string ] action ;
|
||||
] seq* repeat0 [ concat >string ] action ;
|
||||
|
||||
: 'ensure-not' ( -- parser )
|
||||
#! Parses the '!' syntax to ensure that
|
||||
|
@ -367,15 +377,16 @@ M: ebnf-tokenizer (transform) ( ast -- parser )
|
|||
(transform)
|
||||
dup parser-tokenizer \ tokenizer set-global
|
||||
] if ;
|
||||
|
||||
ERROR: redefined-rule name ;
|
||||
|
||||
M: redefined-rule summary
|
||||
name>> "Rule '" "' defined more than once" surround ;
|
||||
|
||||
M: ebnf-rule (transform) ( ast -- parser )
|
||||
dup elements>>
|
||||
(transform) [
|
||||
swap symbol>> dup get parser? [
|
||||
"Rule '" over append "' defined more than once" append throw
|
||||
] [
|
||||
set
|
||||
] if
|
||||
swap symbol>> dup get parser? [ redefined-rule ] [ set ] if
|
||||
] keep ;
|
||||
|
||||
M: ebnf-sequence (transform) ( ast -- parser )
|
||||
|
@ -466,14 +477,18 @@ ERROR: bad-effect quot effect ;
|
|||
{ [ dup (( -- b )) effect<= ] [ drop [ drop ] prepose ] }
|
||||
[ bad-effect ]
|
||||
} cond ;
|
||||
|
||||
: ebnf-transform ( ast -- parser quot )
|
||||
[ parser>> (transform) ]
|
||||
[ code>> insert-escapes ]
|
||||
[ parser>> ] tri build-locals
|
||||
[ string-lines parse-lines ] call( string -- quot ) ;
|
||||
|
||||
M: ebnf-action (transform) ( ast -- parser )
|
||||
[ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals
|
||||
[ string-lines parse-lines ] call( string -- quot ) check-action-effect action ;
|
||||
ebnf-transform check-action-effect action ;
|
||||
|
||||
M: ebnf-semantic (transform) ( ast -- parser )
|
||||
[ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals
|
||||
[ string-lines parse-lines ] call( string -- quot ) semantic ;
|
||||
ebnf-transform semantic ;
|
||||
|
||||
M: ebnf-var (transform) ( ast -- parser )
|
||||
parser>> (transform) ;
|
||||
|
@ -481,19 +496,20 @@ M: ebnf-var (transform) ( ast -- parser )
|
|||
M: ebnf-terminal (transform) ( ast -- parser )
|
||||
symbol>> tokenizer one>> call( symbol -- parser ) ;
|
||||
|
||||
ERROR: ebnf-foreign-not-found name ;
|
||||
|
||||
M: ebnf-foreign-not-found summary
|
||||
name>> "Foreign word '" "' not found" surround ;
|
||||
|
||||
M: ebnf-foreign (transform) ( ast -- parser )
|
||||
dup word>> search
|
||||
[ "Foreign word '" swap word>> append "' not found" append throw ] unless*
|
||||
dup word>> search [ word>> ebnf-foreign-not-found ] unless*
|
||||
swap rule>> [ main ] unless* over rule [
|
||||
nip
|
||||
] [
|
||||
execute( -- parser )
|
||||
] if* ;
|
||||
|
||||
: parser-not-found ( name -- * )
|
||||
[
|
||||
"Parser '" % % "' not found." %
|
||||
] "" make throw ;
|
||||
ERROR: parser-not-found name ;
|
||||
|
||||
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||
symbol>> [
|
||||
|
@ -504,16 +520,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
'ebnf' parse transform ;
|
||||
|
||||
: check-parse-result ( result -- result )
|
||||
dup [
|
||||
dup remaining>> [ blank? ] trim empty? [
|
||||
[
|
||||
dup remaining>> [ blank? ] trim [
|
||||
[
|
||||
"Unable to fully parse EBNF. Left to parse was: " %
|
||||
remaining>> %
|
||||
] "" make throw
|
||||
] unless
|
||||
] unless-empty
|
||||
] [
|
||||
"Could not parse EBNF" throw
|
||||
] if ;
|
||||
] if* ;
|
||||
|
||||
: parse-ebnf ( string -- hashtable )
|
||||
'ebnf' (parse) check-parse-result ast>> transform ;
|
||||
|
@ -522,14 +538,18 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
parse-ebnf dup dup parser [ main swap at compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
|
||||
|
||||
SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
|
||||
SYNTAX: <EBNF
|
||||
"EBNF>"
|
||||
reset-tokenizer parse-multiline-string parse-ebnf main swap at
|
||||
parsed reset-tokenizer ;
|
||||
|
||||
SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
SYNTAX: [EBNF
|
||||
"EBNF]"
|
||||
reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
parsed \ call parsed reset-tokenizer ;
|
||||
|
||||
SYNTAX: EBNF:
|
||||
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string
|
||||
ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop
|
||||
ebnf>quot swapd
|
||||
(( input -- ast )) define-declared "ebnf-parser" set-word-prop
|
||||
reset-tokenizer ;
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test quoting ;
|
||||
IN: quoting.tests
|
||||
|
||||
[ f ] [ "" quoted? ] unit-test
|
||||
[ t ] [ "''" quoted? ] unit-test
|
||||
[ t ] [ "\"\"" quoted? ] unit-test
|
||||
[ t ] [ "\"Circus Maximus\"" quoted? ] unit-test
|
||||
[ t ] [ "'Circus Maximus'" quoted? ] unit-test
|
||||
[ f ] [ "Circus Maximus" quoted? ] unit-test
|
|
@ -84,21 +84,24 @@ C: <box> box
|
|||
{ } assoc-like [ first integer? ] partition
|
||||
[ [ literals>cases ] keep ] dip non-literals>dispatch ;
|
||||
|
||||
:: step ( last-match index str quot final? direction -- last-index/f )
|
||||
: advance ( index backwards? -- index+/-1 )
|
||||
-1 1 ? + >fixnum ; inline
|
||||
|
||||
: check ( index string backwards? -- in-bounds? )
|
||||
[ drop -1 eq? not ] [ length < ] if ; inline
|
||||
|
||||
:: step ( last-match index str quot final? backwards? -- last-index/f )
|
||||
final? index last-match ?
|
||||
index str bounds-check? [
|
||||
index direction + str
|
||||
index str backwards? check [
|
||||
index backwards? advance str
|
||||
index str nth-unsafe
|
||||
quot call
|
||||
] when ; inline
|
||||
|
||||
: direction ( -- n )
|
||||
backwards? get -1 1 ? ;
|
||||
|
||||
: transitions>quot ( transitions final-state? -- quot )
|
||||
dup shortest? get and [ 2drop [ drop nip ] ] [
|
||||
[ split-literals swap case>quot ] dip direction
|
||||
'[ { array-capacity string } declare _ _ _ step ]
|
||||
[ split-literals swap case>quot ] dip backwards? get
|
||||
'[ { fixnum string } declare _ _ _ step ]
|
||||
] if ;
|
||||
|
||||
: word>quot ( word dfa -- quot )
|
||||
|
@ -122,10 +125,13 @@ C: <box> box
|
|||
: dfa>main-word ( dfa -- word )
|
||||
states>words [ states>code ] keep start-state>> ;
|
||||
|
||||
: word-template ( quot -- quot' )
|
||||
'[ drop [ f ] 2dip over array-capacity? _ [ 2drop ] if ] ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: dfa>word ( dfa -- quot )
|
||||
dfa>main-word execution-quot '[ drop [ f ] 2dip @ ]
|
||||
dfa>main-word execution-quot word-template
|
||||
(( start-index string regexp -- i/f )) define-temp ;
|
||||
|
||||
: dfa>shortest-word ( dfa -- word )
|
||||
|
|
|
@ -11,7 +11,7 @@ IN: sorting.human
|
|||
|
||||
: human>=< ( obj1 obj2 -- >=< ) human<=> invert-comparison ; inline
|
||||
|
||||
: human-compare ( obj1 obj2 quot -- <=> ) bi@ human<=> ;
|
||||
: human-compare ( obj1 obj2 quot -- <=> ) bi@ human<=> ; inline
|
||||
|
||||
: human-sort ( seq -- seq' ) [ human<=> ] sort ;
|
||||
|
||||
|
|
|
@ -35,9 +35,9 @@ HELP: download-feed
|
|||
{ $values { "url" url } { "feed" feed } }
|
||||
{ $description "Downloads a feed from a URL using the " { $link "http.client" } "." } ;
|
||||
|
||||
HELP: string>feed
|
||||
{ $values { "string" string } { "feed" feed } }
|
||||
{ $description "Parses a feed in string form." } ;
|
||||
HELP: parse-feed
|
||||
{ $values { "sequence" "a string or a byte array" } { "feed" feed } }
|
||||
{ $description "Parses a feed." } ;
|
||||
|
||||
HELP: xml>feed
|
||||
{ $values { "xml" xml } { "feed" feed } }
|
||||
|
@ -58,7 +58,7 @@ $nl
|
|||
{ $subsection <entry> }
|
||||
"Reading feeds:"
|
||||
{ $subsection download-feed }
|
||||
{ $subsection string>feed }
|
||||
{ $subsection parse-feed }
|
||||
{ $subsection xml>feed }
|
||||
"Writing feeds:"
|
||||
{ $subsection feed>xml }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: syndication io kernel io.files tools.test io.encodings.utf8
|
||||
USING: syndication io kernel io.files tools.test io.encodings.binary
|
||||
calendar urls xml.writer ;
|
||||
IN: syndication.tests
|
||||
|
||||
|
@ -8,7 +8,7 @@ IN: syndication.tests
|
|||
: load-news-file ( filename -- feed )
|
||||
#! Load an news syndication file and process it, returning
|
||||
#! it as an feed tuple.
|
||||
utf8 file-contents string>feed ;
|
||||
binary file-contents parse-feed ;
|
||||
|
||||
[ T{
|
||||
feed
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
|
||||
! Portions copyright (C) 2008 Slava Pestov.
|
||||
! Portions copyright (C) 2008, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml.traversal kernel assocs math.order
|
||||
strings sequences xml.data xml.writer
|
||||
io.streams.string combinators xml xml.entities.html io.files io
|
||||
http.client namespaces make xml.syntax hashtables
|
||||
calendar.format accessors continuations urls present ;
|
||||
USING: xml.traversal kernel assocs math.order strings sequences
|
||||
xml.data xml.writer io.streams.string combinators xml
|
||||
xml.entities.html io.files io http.client namespaces make
|
||||
xml.syntax hashtables calendar.format accessors continuations
|
||||
urls present byte-arrays ;
|
||||
IN: syndication
|
||||
|
||||
: any-tag-named ( tag names -- tag-inside )
|
||||
|
@ -106,12 +106,15 @@ TUPLE: entry title url description date ;
|
|||
{ "feed" [ atom1.0 ] }
|
||||
} case ;
|
||||
|
||||
: string>feed ( string -- feed )
|
||||
[ string>xml xml>feed ] with-html-entities ;
|
||||
GENERIC: parse-feed ( sequence -- feed )
|
||||
|
||||
M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
|
||||
|
||||
M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
|
||||
|
||||
: download-feed ( url -- feed )
|
||||
#! Retrieve an news syndication file, return as a feed tuple.
|
||||
http-get nip string>feed ;
|
||||
http-get nip parse-feed ;
|
||||
|
||||
! Atom generation
|
||||
|
||||
|
|
|
@ -59,11 +59,11 @@ C: <transaction> transaction
|
|||
[ dup [ over [ swap call ] dip ] dip 1 days time+ ] dip each-day
|
||||
] [
|
||||
3drop
|
||||
] if ;
|
||||
] if ; inline recursive
|
||||
|
||||
: process-to-date ( account date -- account )
|
||||
over interest-last-paid>> 1 days time+
|
||||
[ dupd process-day ] spin each-day ;
|
||||
[ dupd process-day ] spin each-day ; inline
|
||||
|
||||
: inserting-transactions ( account transactions -- account )
|
||||
[ [ date>> process-to-date ] keep >>transaction ] each ;
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays html.parser.utils hashtables io kernel
|
||||
namespaces make prettyprint quotations sequences splitting
|
||||
html.parser.state strings unicode.categories unicode.case ;
|
||||
USING: accessors arrays hashtables html.parser.state
|
||||
html.parser.utils kernel make namespaces sequences
|
||||
unicode.case unicode.categories combinators.short-circuit
|
||||
quoting ;
|
||||
IN: html.parser
|
||||
|
||||
|
||||
TUPLE: tag name attributes text closing? ;
|
||||
|
||||
SINGLETON: text
|
||||
|
@ -28,113 +30,100 @@ SYMBOL: tagstack
|
|||
: make-tag ( string attribs -- tag )
|
||||
[ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ;
|
||||
|
||||
: make-text-tag ( string -- tag )
|
||||
: new-tag ( string type -- tag )
|
||||
tag new
|
||||
text >>name
|
||||
swap >>text ;
|
||||
swap >>name
|
||||
swap >>text ; inline
|
||||
|
||||
: make-comment-tag ( string -- tag )
|
||||
tag new
|
||||
comment >>name
|
||||
swap >>text ;
|
||||
: make-text-tag ( string -- tag ) text new-tag ; inline
|
||||
|
||||
: make-dtd-tag ( string -- tag )
|
||||
tag new
|
||||
dtd >>name
|
||||
swap >>text ;
|
||||
: make-comment-tag ( string -- tag ) comment new-tag ; inline
|
||||
|
||||
: read-whitespace ( -- string )
|
||||
[ get-char blank? not ] take-until ;
|
||||
: make-dtd-tag ( string -- tag ) dtd new-tag ; inline
|
||||
|
||||
: read-whitespace* ( -- ) read-whitespace drop ;
|
||||
: read-single-quote ( state-parser -- string )
|
||||
[ [ CHAR: ' = ] take-until ] [ next drop ] bi ;
|
||||
|
||||
: read-token ( -- string )
|
||||
read-whitespace*
|
||||
[ get-char blank? ] take-until ;
|
||||
: read-double-quote ( state-parser -- string )
|
||||
[ [ CHAR: " = ] take-until ] [ next drop ] bi ;
|
||||
|
||||
: read-single-quote ( -- string )
|
||||
[ get-char CHAR: ' = ] take-until ;
|
||||
: read-quote ( state-parser -- string )
|
||||
dup get+increment CHAR: ' =
|
||||
[ read-single-quote ] [ read-double-quote ] if ;
|
||||
|
||||
: read-double-quote ( -- string )
|
||||
[ get-char CHAR: " = ] take-until ;
|
||||
: read-key ( state-parser -- string )
|
||||
skip-whitespace
|
||||
[ { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ;
|
||||
|
||||
: read-quote ( -- string )
|
||||
get-char next CHAR: ' =
|
||||
[ read-single-quote ] [ read-double-quote ] if next ;
|
||||
: read-= ( state-parser -- )
|
||||
skip-whitespace
|
||||
[ [ CHAR: = = ] take-until drop ] [ next drop ] bi ;
|
||||
|
||||
: read-key ( -- string )
|
||||
read-whitespace*
|
||||
[ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
|
||||
: read-token ( state-parser -- string )
|
||||
[ blank? ] take-until ;
|
||||
|
||||
: read-= ( -- )
|
||||
read-whitespace*
|
||||
[ get-char CHAR: = = ] take-until drop next ;
|
||||
|
||||
: read-value ( -- string )
|
||||
read-whitespace*
|
||||
get-char quote? [ read-quote ] [ read-token ] if
|
||||
: read-value ( state-parser -- string )
|
||||
skip-whitespace
|
||||
dup get-char quote? [ read-quote ] [ read-token ] if
|
||||
[ blank? ] trim ;
|
||||
|
||||
: read-comment ( -- )
|
||||
"-->" take-string make-comment-tag push-tag ;
|
||||
: read-comment ( state-parser -- )
|
||||
"-->" take-until-string make-comment-tag push-tag ;
|
||||
|
||||
: read-dtd ( -- )
|
||||
">" take-string make-dtd-tag push-tag ;
|
||||
: read-dtd ( state-parser -- )
|
||||
">" take-until-string make-dtd-tag push-tag ;
|
||||
|
||||
: read-bang ( -- )
|
||||
next get-char CHAR: - = get-next CHAR: - = and [
|
||||
: read-bang ( state-parser -- )
|
||||
next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
|
||||
next next
|
||||
read-comment
|
||||
] [
|
||||
read-dtd
|
||||
] if ;
|
||||
|
||||
: read-tag ( -- string )
|
||||
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
|
||||
get-char CHAR: < = [ next ] unless ;
|
||||
: read-tag ( state-parser -- string )
|
||||
[ [ "><" member? ] take-until ]
|
||||
[ dup get-char CHAR: < = [ next ] unless drop ] bi ;
|
||||
|
||||
: read-< ( -- string )
|
||||
next get-char CHAR: ! = [
|
||||
read-bang f
|
||||
: read-until-< ( state-parser -- string )
|
||||
[ CHAR: < = ] take-until ;
|
||||
|
||||
: parse-text ( state-parser -- )
|
||||
read-until-< [ make-text-tag push-tag ] unless-empty ;
|
||||
|
||||
: (parse-attributes) ( state-parser -- )
|
||||
skip-whitespace
|
||||
dup string-parse-end? [
|
||||
drop
|
||||
] [
|
||||
read-tag
|
||||
[
|
||||
[ read-key >lower ] [ read-= ] [ read-value ] tri
|
||||
2array ,
|
||||
] keep (parse-attributes)
|
||||
] if ;
|
||||
|
||||
: read-until-< ( -- string )
|
||||
[ get-char CHAR: < = ] take-until ;
|
||||
|
||||
: parse-text ( -- )
|
||||
read-until-< [
|
||||
make-text-tag push-tag
|
||||
] unless-empty ;
|
||||
|
||||
: (parse-attributes) ( -- )
|
||||
read-whitespace*
|
||||
string-parse-end? [
|
||||
read-key >lower read-= read-value
|
||||
2array , (parse-attributes)
|
||||
] unless ;
|
||||
|
||||
: parse-attributes ( -- hashtable )
|
||||
: parse-attributes ( state-parser -- hashtable )
|
||||
[ (parse-attributes) ] { } make >hashtable ;
|
||||
|
||||
: (parse-tag) ( string -- string' hashtable )
|
||||
[
|
||||
read-token >lower
|
||||
parse-attributes
|
||||
[ read-token >lower ] [ parse-attributes ] bi
|
||||
] string-parse ;
|
||||
|
||||
: parse-tag ( -- )
|
||||
read-< [
|
||||
(parse-tag) make-tag push-tag
|
||||
] unless-empty ;
|
||||
: read-< ( state-parser -- string/f )
|
||||
next dup get-char [
|
||||
CHAR: ! = [ read-bang f ] [ read-tag ] if
|
||||
] [
|
||||
drop f
|
||||
] if* ;
|
||||
|
||||
: (parse-html) ( -- )
|
||||
get-next [
|
||||
parse-text
|
||||
parse-tag
|
||||
(parse-html)
|
||||
] when ;
|
||||
: parse-tag ( state-parser -- )
|
||||
read-< [ (parse-tag) make-tag push-tag ] unless-empty ;
|
||||
|
||||
: (parse-html) ( state-parser -- )
|
||||
dup get-next [
|
||||
[ parse-text ] [ parse-tag ] [ (parse-html) ] tri
|
||||
] [ drop ] if ;
|
||||
|
||||
: tag-parse ( quot -- vector )
|
||||
V{ } clone tagstack [ string-parse ] with-variable ; inline
|
||||
|
|
|
@ -1,14 +1,30 @@
|
|||
USING: tools.test html.parser.state ascii kernel ;
|
||||
USING: tools.test html.parser.state ascii kernel accessors ;
|
||||
IN: html.parser.state.tests
|
||||
|
||||
: take-rest ( -- string )
|
||||
[ f ] take-until ;
|
||||
[ "hello" ]
|
||||
[ "hello" [ take-rest ] string-parse ] unit-test
|
||||
|
||||
: take-char ( -- string )
|
||||
[ get-char = ] curry take-until ;
|
||||
[ "hi" " how are you?" ]
|
||||
[
|
||||
"hi how are you?"
|
||||
[ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse
|
||||
] unit-test
|
||||
|
||||
[ "foo" ";bar" ]
|
||||
[
|
||||
"foo;bar" [
|
||||
[ CHAR: ; take-until-char ] [ take-rest ] bi
|
||||
] string-parse
|
||||
] unit-test
|
||||
|
||||
[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
|
||||
[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
|
||||
[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
|
||||
[ "foo " " bar" ]
|
||||
[ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
|
||||
[
|
||||
"foo and bar" [
|
||||
[ "and" take-until-string ] [ take-rest ] bi
|
||||
] string-parse
|
||||
] unit-test
|
||||
|
||||
[ 6 ]
|
||||
[
|
||||
" foo " [ skip-whitespace i>> ] string-parse
|
||||
] unit-test
|
||||
|
|
|
@ -1,41 +1,62 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces math kernel sequences accessors fry circular ;
|
||||
USING: namespaces math kernel sequences accessors fry circular
|
||||
unicode.case unicode.categories locals ;
|
||||
IN: html.parser.state
|
||||
|
||||
TUPLE: state string i ;
|
||||
TUPLE: state-parser string i ;
|
||||
|
||||
: get-i ( -- i ) state get i>> ; inline
|
||||
: <state-parser> ( string -- state-parser )
|
||||
state-parser new
|
||||
swap >>string
|
||||
0 >>i ;
|
||||
|
||||
: get-char ( -- char )
|
||||
state get [ i>> ] [ string>> ] bi ?nth ; inline
|
||||
: (get-char) ( i state -- char/f )
|
||||
string>> ?nth ; inline
|
||||
|
||||
: get-next ( -- char )
|
||||
state get [ i>> 1+ ] [ string>> ] bi ?nth ; inline
|
||||
: get-char ( state -- char/f )
|
||||
[ i>> ] keep (get-char) ; inline
|
||||
|
||||
: next ( -- )
|
||||
state get [ 1+ ] change-i drop ; inline
|
||||
: get-next ( state -- char/f )
|
||||
[ i>> 1+ ] keep (get-char) ; inline
|
||||
|
||||
: next ( state -- state )
|
||||
[ 1+ ] change-i ; inline
|
||||
|
||||
: get+increment ( state -- char/f )
|
||||
[ get-char ] [ next drop ] bi ; inline
|
||||
|
||||
: string-parse ( string quot -- )
|
||||
[ 0 state boa state ] dip with-variable ; inline
|
||||
[ <state-parser> ] dip call ; inline
|
||||
|
||||
: short* ( n seq -- n' seq )
|
||||
over [ nip dup length swap ] unless ; inline
|
||||
:: skip-until ( state quot: ( obj -- ? ) -- )
|
||||
state get-char [
|
||||
quot call [ state next quot skip-until ] unless
|
||||
] when* ; inline recursive
|
||||
|
||||
: skip-until ( quot: ( -- ? ) -- )
|
||||
get-char [
|
||||
[ call ] keep swap
|
||||
[ drop ] [ next skip-until ] if
|
||||
] [ drop ] if ; inline recursive
|
||||
: take-until ( state quot: ( obj -- ? ) -- string )
|
||||
[ drop i>> ]
|
||||
[ skip-until ]
|
||||
[ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline
|
||||
|
||||
: take-until ( quot: ( -- ? ) -- )
|
||||
get-i [ skip-until ] dip get-i
|
||||
state get string>> subseq ; inline
|
||||
:: take-until-string ( state-parser string -- string' )
|
||||
string length <growing-circular> :> growing
|
||||
state-parser
|
||||
[
|
||||
growing push-growing-circular
|
||||
string growing sequence=
|
||||
] take-until :> found
|
||||
found dup length
|
||||
growing length 1- - head
|
||||
state-parser next drop ;
|
||||
|
||||
: skip-whitespace ( state -- state )
|
||||
[ [ blank? not ] take-until drop ] keep ;
|
||||
|
||||
: string-matches? ( string circular -- ? )
|
||||
get-char over push-growing-circular sequence= ; inline
|
||||
: take-rest ( state -- string )
|
||||
[ drop f ] take-until ; inline
|
||||
|
||||
: take-string ( match -- string )
|
||||
dup length <growing-circular>
|
||||
[ 2dup string-matches? ] take-until nip
|
||||
dup length rot length 1- - head next ; inline
|
||||
: take-until-char ( state ch -- string )
|
||||
'[ _ = ] take-until ;
|
||||
|
||||
: string-parse-end? ( state -- ? ) get-next not ;
|
||||
|
|
|
@ -1,20 +1,13 @@
|
|||
USING: assocs combinators continuations hashtables
|
||||
hashtables.private io kernel math
|
||||
namespaces prettyprint quotations sequences splitting
|
||||
strings tools.test ;
|
||||
USING: html.parser.utils ;
|
||||
strings tools.test html.parser.utils quoting ;
|
||||
IN: html.parser.utils.tests
|
||||
|
||||
[ "'Rome'" ] [ "Rome" single-quote ] unit-test
|
||||
[ "\"Roma\"" ] [ "Roma" double-quote ] unit-test
|
||||
[ "'Firenze'" ] [ "Firenze" quote ] unit-test
|
||||
[ "\"Caesar's\"" ] [ "Caesar's" quote ] unit-test
|
||||
[ f ] [ "" quoted? ] unit-test
|
||||
[ t ] [ "''" quoted? ] unit-test
|
||||
[ t ] [ "\"\"" quoted? ] unit-test
|
||||
[ t ] [ "\"Circus Maximus\"" quoted? ] unit-test
|
||||
[ t ] [ "'Circus Maximus'" quoted? ] unit-test
|
||||
[ f ] [ "Circus Maximus" quoted? ] unit-test
|
||||
[ "'Italy'" ] [ "Italy" ?quote ] unit-test
|
||||
[ "'Italy'" ] [ "'Italy'" ?quote ] unit-test
|
||||
[ "\"Italy\"" ] [ "\"Italy\"" ?quote ] unit-test
|
||||
|
|
|
@ -3,16 +3,12 @@
|
|||
USING: assocs circular combinators continuations hashtables
|
||||
hashtables.private io kernel math namespaces prettyprint
|
||||
quotations sequences splitting html.parser.state strings
|
||||
combinators.short-circuit ;
|
||||
combinators.short-circuit quoting ;
|
||||
IN: html.parser.utils
|
||||
|
||||
: string-parse-end? ( -- ? ) get-next not ;
|
||||
|
||||
: trim1 ( seq ch -- newseq )
|
||||
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
|
||||
|
||||
: quote? ( ch -- ? ) "'\"" member? ;
|
||||
|
||||
: single-quote ( str -- newstr ) "'" dup surround ;
|
||||
|
||||
: double-quote ( str -- newstr ) "\"" dup surround ;
|
||||
|
@ -21,14 +17,4 @@ IN: html.parser.utils
|
|||
CHAR: ' over member?
|
||||
[ double-quote ] [ single-quote ] if ;
|
||||
|
||||
: quoted? ( str -- ? )
|
||||
{
|
||||
[ length 1 > ]
|
||||
[ first quote? ]
|
||||
[ [ first ] [ peek ] bi = ]
|
||||
} 1&& ;
|
||||
|
||||
: ?quote ( str -- newstr ) dup quoted? [ quote ] unless ;
|
||||
|
||||
: unquote ( str -- newstr )
|
||||
dup quoted? [ but-last-slice rest-slice >string ] when ;
|
||||
|
|
|
@ -165,7 +165,7 @@ M: irc-chat to-chat in-messages>> mailbox-put ;
|
|||
" hostname servername :irc.factor" irc-print ;
|
||||
|
||||
: /CONNECT ( server port -- stream )
|
||||
irc> connect>> call drop ;
|
||||
irc> connect>> call drop ; inline
|
||||
|
||||
: /JOIN ( channel password -- )
|
||||
"JOIN " irc-write
|
||||
|
|
13
vm/alien.c
13
vm/alien.c
|
@ -160,7 +160,7 @@ void box_value_struct(void *src, CELL size)
|
|||
dpush(tag_object(array));
|
||||
}
|
||||
|
||||
/* On OS X, structs <= 8 bytes are returned in registers. */
|
||||
/* On some x86 OSes, structs <= 8 bytes are returned in registers. */
|
||||
void box_small_struct(CELL x, CELL y, CELL size)
|
||||
{
|
||||
CELL data[2];
|
||||
|
@ -169,6 +169,17 @@ void box_small_struct(CELL x, CELL y, CELL size)
|
|||
box_value_struct(data,size);
|
||||
}
|
||||
|
||||
/* On OS X/PPC, complex numbers are returned in registers. */
|
||||
void box_medium_struct(CELL x1, CELL x2, CELL x3, CELL x4, CELL size)
|
||||
{
|
||||
CELL data[4];
|
||||
data[0] = x1;
|
||||
data[1] = x2;
|
||||
data[2] = x3;
|
||||
data[3] = x4;
|
||||
box_value_struct(data,size);
|
||||
}
|
||||
|
||||
/* open a native library and push a handle */
|
||||
void primitive_dlopen(void)
|
||||
{
|
||||
|
|
|
@ -40,6 +40,7 @@ void primitive_set_alien_cell(void);
|
|||
DLLEXPORT void to_value_struct(CELL src, void *dest, CELL size);
|
||||
DLLEXPORT void box_value_struct(void *src, CELL size);
|
||||
DLLEXPORT void box_small_struct(CELL x, CELL y, CELL size);
|
||||
void box_medium_struct(CELL x1, CELL x2, CELL x3, CELL x4, CELL size);
|
||||
|
||||
DEFINE_UNTAG(F_DLL,DLL_TYPE,dll)
|
||||
|
||||
|
|
Loading…
Reference in New Issue