Merge branch 'master' of git://factorcode.org/git/factor into unicode

db4
Daniel Ehrenberg 2009-03-31 21:15:18 -05:00
commit b6d292fb23
19 changed files with 299 additions and 202 deletions

View File

@ -659,13 +659,40 @@ M: ppc %callback-value ( ctype -- )
M: ppc small-enough? ( n -- ? ) -32768 32767 between? ; M: ppc small-enough? ( n -- ? ) -32768 32767 between? ;
M: ppc return-struct-in-registers? ( c-type -- ? ) drop f ; M: ppc return-struct-in-registers? ( c-type -- ? )
c-type return-in-registers?>> ;
M: ppc %box-small-struct M: ppc %box-small-struct ( c-type -- )
drop "No small structs" throw ; #! Box a <= 16-byte struct returned in r3:r4:r5:r6
heap-size 7 LI
"box_medium_struct" f %alien-invoke ;
M: ppc %unbox-small-struct : %unbox-struct-1 ( -- )
drop "No small structs" throw ; ! Alien must be in r3.
"alien_offset" f %alien-invoke
3 3 0 LWZ ;
: %unbox-struct-2 ( -- )
! Alien must be in r3.
"alien_offset" f %alien-invoke
4 3 4 LWZ
3 3 0 LWZ ;
: %unbox-struct-4 ( -- )
! Alien must be in r3.
"alien_offset" f %alien-invoke
6 3 12 LWZ
5 3 8 LWZ
4 3 4 LWZ
3 3 0 LWZ ;
M: ppc %unbox-small-struct ( size -- )
#! Alien must be in EAX.
heap-size cell align cell /i {
{ 1 [ %unbox-struct-1 ] }
{ 2 [ %unbox-struct-2 ] }
{ 4 [ %unbox-struct-4 ] }
} case ;
USE: vocabs.loader USE: vocabs.loader
@ -673,3 +700,5 @@ USE: vocabs.loader
{ [ os macosx? ] [ "cpu.ppc.macosx" require ] } { [ os macosx? ] [ "cpu.ppc.macosx" require ] }
{ [ os linux? ] [ "cpu.ppc.linux" require ] } { [ os linux? ] [ "cpu.ppc.linux" require ] }
} cond } cond
"complex-double" c-type t >>return-in-registers? drop

View File

@ -5,12 +5,13 @@ IN: models
HELP: model HELP: model
{ $class-description "A mutable cell holding a single value. When the value is changed, a sequence of connected objects are notified. Models have the following slots:" { $class-description "A mutable cell holding a single value. When the value is changed, a sequence of connected objects are notified. Models have the following slots:"
{ $list { $list
{ { $snippet "value" } " - the value of the model. Use " { $link set-model } " to change the value." } { { $slot "value" } " - the value of the model. Use " { $link set-model } " to change the value." }
{ { $snippet "connections" } " - a sequence of objects implementing the " { $link model-changed } " generic word, to be notified when the model's value changes." } { { $slot "connections" } " - a sequence of objects implementing the " { $link model-changed } " generic word, to be notified when the model's value changes." }
{ { $snippet "dependencies" } " - a sequence of models which should have this model added to their sequence of connections when activated." } { { $slot "dependencies" } " - a sequence of models which should have this model added to their sequence of connections when activated." }
{ { $snippet "ref" } " - a reference count tracking the number of models which depend on this one." } { { $slot "ref" } " - a reference count tracking the number of models which depend on this one." }
{ { $slot "locked?" } " - a slot set by " { $link with-locked-model } " to ensure that the model doesn't get changed recursively" }
} }
"Other classes may delegate to " { $link model } "." "Other classes may inherit from " { $link model } "."
} ; } ;
HELP: <model> HELP: <model>

View File

@ -3,7 +3,7 @@
! !
USING: kernel tools.test peg peg.ebnf words math math.parser USING: kernel tools.test peg peg.ebnf words math math.parser
sequences accessors peg.parsers parser namespaces arrays sequences accessors peg.parsers parser namespaces arrays
strings eval ; strings eval unicode.data multiline ;
IN: peg.ebnf.tests IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [ { T{ ebnf-non-terminal f "abc" } } [
@ -520,3 +520,13 @@ Tok = Spaces (Number | Special )
{ "\\" } [ { "\\" } [
"\\" [EBNF foo="\\" EBNF] "\\" [EBNF foo="\\" EBNF]
] unit-test ] unit-test
[ "USE: peg.ebnf [EBNF EBNF]" eval ] must-fail
[ <" USE: peg.ebnf [EBNF
lol = a
lol = b
EBNF] "> eval
] [
error>> [ redefined-rule? ] [ name>> "lol" = ] bi and
] must-fail-with

View File

@ -5,13 +5,18 @@ sequences quotations vectors namespaces make math assocs
continuations peg peg.parsers unicode.categories multiline continuations peg peg.parsers unicode.categories multiline
splitting accessors effects sequences.deep peg.search splitting accessors effects sequences.deep peg.search
combinators.short-circuit lexer io.streams.string stack-checker combinators.short-circuit lexer io.streams.string stack-checker
io combinators parser ; io combinators parser summary ;
IN: peg.ebnf IN: peg.ebnf
: rule ( name word -- parser ) : rule ( name word -- parser )
#! Given an EBNF word produced from EBNF: return the EBNF rule #! Given an EBNF word produced from EBNF: return the EBNF rule
"ebnf-parser" word-prop at ; "ebnf-parser" word-prop at ;
ERROR: no-rule rule parser ;
: lookup-rule ( rule parser -- rule' )
2dup rule [ 2nip ] [ no-rule ] if* ;
TUPLE: tokenizer any one many ; TUPLE: tokenizer any one many ;
: default-tokenizer ( -- tokenizer ) : default-tokenizer ( -- tokenizer )
@ -34,8 +39,13 @@ TUPLE: tokenizer any one many ;
: reset-tokenizer ( -- ) : reset-tokenizer ( -- )
default-tokenizer \ tokenizer set-global ; default-tokenizer \ tokenizer set-global ;
ERROR: no-tokenizer name ;
M: no-tokenizer summary
drop "Tokenizer not found" ;
SYNTAX: TOKENIZER: SYNTAX: TOKENIZER:
scan search [ "Tokenizer not found" throw ] unless* scan dup search [ nip ] [ no-tokenizer ] if*
execute( -- tokenizer ) \ tokenizer set-global ; execute( -- tokenizer ) \ tokenizer set-global ;
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
@ -258,7 +268,7 @@ DEFER: 'choice'
"]]" token ensure-not , "]]" token ensure-not ,
"]?" token ensure-not , "]?" token ensure-not ,
[ drop t ] satisfy , [ drop t ] satisfy ,
] seq* [ first ] action repeat0 [ >string ] action ; ] seq* repeat0 [ concat >string ] action ;
: 'ensure-not' ( -- parser ) : 'ensure-not' ( -- parser )
#! Parses the '!' syntax to ensure that #! Parses the '!' syntax to ensure that
@ -367,15 +377,16 @@ M: ebnf-tokenizer (transform) ( ast -- parser )
(transform) (transform)
dup parser-tokenizer \ tokenizer set-global dup parser-tokenizer \ tokenizer set-global
] if ; ] if ;
ERROR: redefined-rule name ;
M: redefined-rule summary
name>> "Rule '" "' defined more than once" surround ;
M: ebnf-rule (transform) ( ast -- parser ) M: ebnf-rule (transform) ( ast -- parser )
dup elements>> dup elements>>
(transform) [ (transform) [
swap symbol>> dup get parser? [ swap symbol>> dup get parser? [ redefined-rule ] [ set ] if
"Rule '" over append "' defined more than once" append throw
] [
set
] if
] keep ; ] keep ;
M: ebnf-sequence (transform) ( ast -- parser ) M: ebnf-sequence (transform) ( ast -- parser )
@ -466,14 +477,18 @@ ERROR: bad-effect quot effect ;
{ [ dup (( -- b )) effect<= ] [ drop [ drop ] prepose ] } { [ dup (( -- b )) effect<= ] [ drop [ drop ] prepose ] }
[ bad-effect ] [ bad-effect ]
} cond ; } cond ;
: ebnf-transform ( ast -- parser quot )
[ parser>> (transform) ]
[ code>> insert-escapes ]
[ parser>> ] tri build-locals
[ string-lines parse-lines ] call( string -- quot ) ;
M: ebnf-action (transform) ( ast -- parser ) M: ebnf-action (transform) ( ast -- parser )
[ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals ebnf-transform check-action-effect action ;
[ string-lines parse-lines ] call( string -- quot ) check-action-effect action ;
M: ebnf-semantic (transform) ( ast -- parser ) M: ebnf-semantic (transform) ( ast -- parser )
[ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals ebnf-transform semantic ;
[ string-lines parse-lines ] call( string -- quot ) semantic ;
M: ebnf-var (transform) ( ast -- parser ) M: ebnf-var (transform) ( ast -- parser )
parser>> (transform) ; parser>> (transform) ;
@ -481,19 +496,20 @@ M: ebnf-var (transform) ( ast -- parser )
M: ebnf-terminal (transform) ( ast -- parser ) M: ebnf-terminal (transform) ( ast -- parser )
symbol>> tokenizer one>> call( symbol -- parser ) ; symbol>> tokenizer one>> call( symbol -- parser ) ;
ERROR: ebnf-foreign-not-found name ;
M: ebnf-foreign-not-found summary
name>> "Foreign word '" "' not found" surround ;
M: ebnf-foreign (transform) ( ast -- parser ) M: ebnf-foreign (transform) ( ast -- parser )
dup word>> search dup word>> search [ word>> ebnf-foreign-not-found ] unless*
[ "Foreign word '" swap word>> append "' not found" append throw ] unless*
swap rule>> [ main ] unless* over rule [ swap rule>> [ main ] unless* over rule [
nip nip
] [ ] [
execute( -- parser ) execute( -- parser )
] if* ; ] if* ;
: parser-not-found ( name -- * ) ERROR: parser-not-found name ;
[
"Parser '" % % "' not found." %
] "" make throw ;
M: ebnf-non-terminal (transform) ( ast -- parser ) M: ebnf-non-terminal (transform) ( ast -- parser )
symbol>> [ symbol>> [
@ -504,16 +520,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
'ebnf' parse transform ; 'ebnf' parse transform ;
: check-parse-result ( result -- result ) : check-parse-result ( result -- result )
dup [ [
dup remaining>> [ blank? ] trim empty? [ dup remaining>> [ blank? ] trim [
[ [
"Unable to fully parse EBNF. Left to parse was: " % "Unable to fully parse EBNF. Left to parse was: " %
remaining>> % remaining>> %
] "" make throw ] "" make throw
] unless ] unless-empty
] [ ] [
"Could not parse EBNF" throw "Could not parse EBNF" throw
] if ; ] if* ;
: parse-ebnf ( string -- hashtable ) : parse-ebnf ( string -- hashtable )
'ebnf' (parse) check-parse-result ast>> transform ; 'ebnf' (parse) check-parse-result ast>> transform ;
@ -522,14 +538,18 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-ebnf dup dup parser [ main swap at compile ] with-variable parse-ebnf dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ast>> ] curry ; [ compiled-parse ] curry [ with-scope ast>> ] curry ;
SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at SYNTAX: <EBNF
"EBNF>"
reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ; parsed reset-tokenizer ;
SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip SYNTAX: [EBNF
"EBNF]"
reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ; parsed \ call parsed reset-tokenizer ;
SYNTAX: EBNF: SYNTAX: EBNF:
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string
ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop ebnf>quot swapd
(( input -- ast )) define-declared "ebnf-parser" set-word-prop
reset-tokenizer ; reset-tokenizer ;

View File

@ -0,0 +1,11 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test quoting ;
IN: quoting.tests
[ f ] [ "" quoted? ] unit-test
[ t ] [ "''" quoted? ] unit-test
[ t ] [ "\"\"" quoted? ] unit-test
[ t ] [ "\"Circus Maximus\"" quoted? ] unit-test
[ t ] [ "'Circus Maximus'" quoted? ] unit-test
[ f ] [ "Circus Maximus" quoted? ] unit-test

View File

@ -84,21 +84,24 @@ C: <box> box
{ } assoc-like [ first integer? ] partition { } assoc-like [ first integer? ] partition
[ [ literals>cases ] keep ] dip non-literals>dispatch ; [ [ literals>cases ] keep ] dip non-literals>dispatch ;
:: step ( last-match index str quot final? direction -- last-index/f ) : advance ( index backwards? -- index+/-1 )
-1 1 ? + >fixnum ; inline
: check ( index string backwards? -- in-bounds? )
[ drop -1 eq? not ] [ length < ] if ; inline
:: step ( last-match index str quot final? backwards? -- last-index/f )
final? index last-match ? final? index last-match ?
index str bounds-check? [ index str backwards? check [
index direction + str index backwards? advance str
index str nth-unsafe index str nth-unsafe
quot call quot call
] when ; inline ] when ; inline
: direction ( -- n )
backwards? get -1 1 ? ;
: transitions>quot ( transitions final-state? -- quot ) : transitions>quot ( transitions final-state? -- quot )
dup shortest? get and [ 2drop [ drop nip ] ] [ dup shortest? get and [ 2drop [ drop nip ] ] [
[ split-literals swap case>quot ] dip direction [ split-literals swap case>quot ] dip backwards? get
'[ { array-capacity string } declare _ _ _ step ] '[ { fixnum string } declare _ _ _ step ]
] if ; ] if ;
: word>quot ( word dfa -- quot ) : word>quot ( word dfa -- quot )
@ -122,10 +125,13 @@ C: <box> box
: dfa>main-word ( dfa -- word ) : dfa>main-word ( dfa -- word )
states>words [ states>code ] keep start-state>> ; states>words [ states>code ] keep start-state>> ;
: word-template ( quot -- quot' )
'[ drop [ f ] 2dip over array-capacity? _ [ 2drop ] if ] ;
PRIVATE> PRIVATE>
: dfa>word ( dfa -- quot ) : dfa>word ( dfa -- quot )
dfa>main-word execution-quot '[ drop [ f ] 2dip @ ] dfa>main-word execution-quot word-template
(( start-index string regexp -- i/f )) define-temp ; (( start-index string regexp -- i/f )) define-temp ;
: dfa>shortest-word ( dfa -- word ) : dfa>shortest-word ( dfa -- word )

View File

@ -11,7 +11,7 @@ IN: sorting.human
: human>=< ( obj1 obj2 -- >=< ) human<=> invert-comparison ; inline : human>=< ( obj1 obj2 -- >=< ) human<=> invert-comparison ; inline
: human-compare ( obj1 obj2 quot -- <=> ) bi@ human<=> ; : human-compare ( obj1 obj2 quot -- <=> ) bi@ human<=> ; inline
: human-sort ( seq -- seq' ) [ human<=> ] sort ; : human-sort ( seq -- seq' ) [ human<=> ] sort ;

View File

@ -35,9 +35,9 @@ HELP: download-feed
{ $values { "url" url } { "feed" feed } } { $values { "url" url } { "feed" feed } }
{ $description "Downloads a feed from a URL using the " { $link "http.client" } "." } ; { $description "Downloads a feed from a URL using the " { $link "http.client" } "." } ;
HELP: string>feed HELP: parse-feed
{ $values { "string" string } { "feed" feed } } { $values { "sequence" "a string or a byte array" } { "feed" feed } }
{ $description "Parses a feed in string form." } ; { $description "Parses a feed." } ;
HELP: xml>feed HELP: xml>feed
{ $values { "xml" xml } { "feed" feed } } { $values { "xml" xml } { "feed" feed } }
@ -58,7 +58,7 @@ $nl
{ $subsection <entry> } { $subsection <entry> }
"Reading feeds:" "Reading feeds:"
{ $subsection download-feed } { $subsection download-feed }
{ $subsection string>feed } { $subsection parse-feed }
{ $subsection xml>feed } { $subsection xml>feed }
"Writing feeds:" "Writing feeds:"
{ $subsection feed>xml } { $subsection feed>xml }

View File

@ -1,4 +1,4 @@
USING: syndication io kernel io.files tools.test io.encodings.utf8 USING: syndication io kernel io.files tools.test io.encodings.binary
calendar urls xml.writer ; calendar urls xml.writer ;
IN: syndication.tests IN: syndication.tests
@ -8,7 +8,7 @@ IN: syndication.tests
: load-news-file ( filename -- feed ) : load-news-file ( filename -- feed )
#! Load an news syndication file and process it, returning #! Load an news syndication file and process it, returning
#! it as an feed tuple. #! it as an feed tuple.
utf8 file-contents string>feed ; binary file-contents parse-feed ;
[ T{ [ T{
feed feed

View File

@ -1,11 +1,11 @@
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg. ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
! Portions copyright (C) 2008 Slava Pestov. ! Portions copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml.traversal kernel assocs math.order USING: xml.traversal kernel assocs math.order strings sequences
strings sequences xml.data xml.writer xml.data xml.writer io.streams.string combinators xml
io.streams.string combinators xml xml.entities.html io.files io xml.entities.html io.files io http.client namespaces make
http.client namespaces make xml.syntax hashtables xml.syntax hashtables calendar.format accessors continuations
calendar.format accessors continuations urls present ; urls present byte-arrays ;
IN: syndication IN: syndication
: any-tag-named ( tag names -- tag-inside ) : any-tag-named ( tag names -- tag-inside )
@ -106,12 +106,15 @@ TUPLE: entry title url description date ;
{ "feed" [ atom1.0 ] } { "feed" [ atom1.0 ] }
} case ; } case ;
: string>feed ( string -- feed ) GENERIC: parse-feed ( sequence -- feed )
[ string>xml xml>feed ] with-html-entities ;
M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
: download-feed ( url -- feed ) : download-feed ( url -- feed )
#! Retrieve an news syndication file, return as a feed tuple. #! Retrieve an news syndication file, return as a feed tuple.
http-get nip string>feed ; http-get nip parse-feed ;
! Atom generation ! Atom generation

View File

@ -59,11 +59,11 @@ C: <transaction> transaction
[ dup [ over [ swap call ] dip ] dip 1 days time+ ] dip each-day [ dup [ over [ swap call ] dip ] dip 1 days time+ ] dip each-day
] [ ] [
3drop 3drop
] if ; ] if ; inline recursive
: process-to-date ( account date -- account ) : process-to-date ( account date -- account )
over interest-last-paid>> 1 days time+ over interest-last-paid>> 1 days time+
[ dupd process-day ] spin each-day ; [ dupd process-day ] spin each-day ; inline
: inserting-transactions ( account transactions -- account ) : inserting-transactions ( account transactions -- account )
[ [ date>> process-to-date ] keep >>transaction ] each ; [ [ date>> process-to-date ] keep >>transaction ] each ;

View File

@ -1,10 +1,12 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays html.parser.utils hashtables io kernel USING: accessors arrays hashtables html.parser.state
namespaces make prettyprint quotations sequences splitting html.parser.utils kernel make namespaces sequences
html.parser.state strings unicode.categories unicode.case ; unicode.case unicode.categories combinators.short-circuit
quoting ;
IN: html.parser IN: html.parser
TUPLE: tag name attributes text closing? ; TUPLE: tag name attributes text closing? ;
SINGLETON: text SINGLETON: text
@ -28,113 +30,100 @@ SYMBOL: tagstack
: make-tag ( string attribs -- tag ) : make-tag ( string attribs -- tag )
[ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ; [ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ;
: make-text-tag ( string -- tag ) : new-tag ( string type -- tag )
tag new tag new
text >>name swap >>name
swap >>text ; swap >>text ; inline
: make-comment-tag ( string -- tag ) : make-text-tag ( string -- tag ) text new-tag ; inline
tag new
comment >>name
swap >>text ;
: make-dtd-tag ( string -- tag ) : make-comment-tag ( string -- tag ) comment new-tag ; inline
tag new
dtd >>name
swap >>text ;
: read-whitespace ( -- string ) : make-dtd-tag ( string -- tag ) dtd new-tag ; inline
[ get-char blank? not ] take-until ;
: read-whitespace* ( -- ) read-whitespace drop ; : read-single-quote ( state-parser -- string )
[ [ CHAR: ' = ] take-until ] [ next drop ] bi ;
: read-token ( -- string ) : read-double-quote ( state-parser -- string )
read-whitespace* [ [ CHAR: " = ] take-until ] [ next drop ] bi ;
[ get-char blank? ] take-until ;
: read-single-quote ( -- string ) : read-quote ( state-parser -- string )
[ get-char CHAR: ' = ] take-until ; dup get+increment CHAR: ' =
[ read-single-quote ] [ read-double-quote ] if ;
: read-double-quote ( -- string ) : read-key ( state-parser -- string )
[ get-char CHAR: " = ] take-until ; skip-whitespace
[ { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ;
: read-quote ( -- string ) : read-= ( state-parser -- )
get-char next CHAR: ' = skip-whitespace
[ read-single-quote ] [ read-double-quote ] if next ; [ [ CHAR: = = ] take-until drop ] [ next drop ] bi ;
: read-key ( -- string ) : read-token ( state-parser -- string )
read-whitespace* [ blank? ] take-until ;
[ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
: read-= ( -- ) : read-value ( state-parser -- string )
read-whitespace* skip-whitespace
[ get-char CHAR: = = ] take-until drop next ; dup get-char quote? [ read-quote ] [ read-token ] if
: read-value ( -- string )
read-whitespace*
get-char quote? [ read-quote ] [ read-token ] if
[ blank? ] trim ; [ blank? ] trim ;
: read-comment ( -- ) : read-comment ( state-parser -- )
"-->" take-string make-comment-tag push-tag ; "-->" take-until-string make-comment-tag push-tag ;
: read-dtd ( -- ) : read-dtd ( state-parser -- )
">" take-string make-dtd-tag push-tag ; ">" take-until-string make-dtd-tag push-tag ;
: read-bang ( -- ) : read-bang ( state-parser -- )
next get-char CHAR: - = get-next CHAR: - = and [ next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
next next next next
read-comment read-comment
] [ ] [
read-dtd read-dtd
] if ; ] if ;
: read-tag ( -- string ) : read-tag ( state-parser -- string )
[ get-char CHAR: > = get-char CHAR: < = or ] take-until [ [ "><" member? ] take-until ]
get-char CHAR: < = [ next ] unless ; [ dup get-char CHAR: < = [ next ] unless drop ] bi ;
: read-< ( -- string ) : read-until-< ( state-parser -- string )
next get-char CHAR: ! = [ [ CHAR: < = ] take-until ;
read-bang f
: parse-text ( state-parser -- )
read-until-< [ make-text-tag push-tag ] unless-empty ;
: (parse-attributes) ( state-parser -- )
skip-whitespace
dup string-parse-end? [
drop
] [ ] [
read-tag [
[ read-key >lower ] [ read-= ] [ read-value ] tri
2array ,
] keep (parse-attributes)
] if ; ] if ;
: read-until-< ( -- string ) : parse-attributes ( state-parser -- hashtable )
[ get-char CHAR: < = ] take-until ;
: parse-text ( -- )
read-until-< [
make-text-tag push-tag
] unless-empty ;
: (parse-attributes) ( -- )
read-whitespace*
string-parse-end? [
read-key >lower read-= read-value
2array , (parse-attributes)
] unless ;
: parse-attributes ( -- hashtable )
[ (parse-attributes) ] { } make >hashtable ; [ (parse-attributes) ] { } make >hashtable ;
: (parse-tag) ( string -- string' hashtable ) : (parse-tag) ( string -- string' hashtable )
[ [
read-token >lower [ read-token >lower ] [ parse-attributes ] bi
parse-attributes
] string-parse ; ] string-parse ;
: parse-tag ( -- ) : read-< ( state-parser -- string/f )
read-< [ next dup get-char [
(parse-tag) make-tag push-tag CHAR: ! = [ read-bang f ] [ read-tag ] if
] unless-empty ; ] [
drop f
] if* ;
: (parse-html) ( -- ) : parse-tag ( state-parser -- )
get-next [ read-< [ (parse-tag) make-tag push-tag ] unless-empty ;
parse-text
parse-tag : (parse-html) ( state-parser -- )
(parse-html) dup get-next [
] when ; [ parse-text ] [ parse-tag ] [ (parse-html) ] tri
] [ drop ] if ;
: tag-parse ( quot -- vector ) : tag-parse ( quot -- vector )
V{ } clone tagstack [ string-parse ] with-variable ; inline V{ } clone tagstack [ string-parse ] with-variable ; inline

View File

@ -1,14 +1,30 @@
USING: tools.test html.parser.state ascii kernel ; USING: tools.test html.parser.state ascii kernel accessors ;
IN: html.parser.state.tests IN: html.parser.state.tests
: take-rest ( -- string ) [ "hello" ]
[ f ] take-until ; [ "hello" [ take-rest ] string-parse ] unit-test
: take-char ( -- string ) [ "hi" " how are you?" ]
[ get-char = ] curry take-until ; [
"hi how are you?"
[ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse
] unit-test
[ "foo" ";bar" ]
[
"foo;bar" [
[ CHAR: ; take-until-char ] [ take-rest ] bi
] string-parse
] unit-test
[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
[ "foo " " bar" ] [ "foo " " bar" ]
[ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test [
"foo and bar" [
[ "and" take-until-string ] [ take-rest ] bi
] string-parse
] unit-test
[ 6 ]
[
" foo " [ skip-whitespace i>> ] string-parse
] unit-test

View File

@ -1,41 +1,62 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg ! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular ; USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals ;
IN: html.parser.state IN: html.parser.state
TUPLE: state string i ; TUPLE: state-parser string i ;
: get-i ( -- i ) state get i>> ; inline : <state-parser> ( string -- state-parser )
state-parser new
swap >>string
0 >>i ;
: get-char ( -- char ) : (get-char) ( i state -- char/f )
state get [ i>> ] [ string>> ] bi ?nth ; inline string>> ?nth ; inline
: get-next ( -- char ) : get-char ( state -- char/f )
state get [ i>> 1+ ] [ string>> ] bi ?nth ; inline [ i>> ] keep (get-char) ; inline
: next ( -- ) : get-next ( state -- char/f )
state get [ 1+ ] change-i drop ; inline [ i>> 1+ ] keep (get-char) ; inline
: next ( state -- state )
[ 1+ ] change-i ; inline
: get+increment ( state -- char/f )
[ get-char ] [ next drop ] bi ; inline
: string-parse ( string quot -- ) : string-parse ( string quot -- )
[ 0 state boa state ] dip with-variable ; inline [ <state-parser> ] dip call ; inline
: short* ( n seq -- n' seq ) :: skip-until ( state quot: ( obj -- ? ) -- )
over [ nip dup length swap ] unless ; inline state get-char [
quot call [ state next quot skip-until ] unless
] when* ; inline recursive
: skip-until ( quot: ( -- ? ) -- ) : take-until ( state quot: ( obj -- ? ) -- string )
get-char [ [ drop i>> ]
[ call ] keep swap [ skip-until ]
[ drop ] [ next skip-until ] if [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline
] [ drop ] if ; inline recursive
: take-until ( quot: ( -- ? ) -- ) :: take-until-string ( state-parser string -- string' )
get-i [ skip-until ] dip get-i string length <growing-circular> :> growing
state get string>> subseq ; inline state-parser
[
growing push-growing-circular
string growing sequence=
] take-until :> found
found dup length
growing length 1- - head
state-parser next drop ;
: skip-whitespace ( state -- state )
[ [ blank? not ] take-until drop ] keep ;
: string-matches? ( string circular -- ? ) : take-rest ( state -- string )
get-char over push-growing-circular sequence= ; inline [ drop f ] take-until ; inline
: take-string ( match -- string ) : take-until-char ( state ch -- string )
dup length <growing-circular> '[ _ = ] take-until ;
[ 2dup string-matches? ] take-until nip
dup length rot length 1- - head next ; inline : string-parse-end? ( state -- ? ) get-next not ;

View File

@ -1,20 +1,13 @@
USING: assocs combinators continuations hashtables USING: assocs combinators continuations hashtables
hashtables.private io kernel math hashtables.private io kernel math
namespaces prettyprint quotations sequences splitting namespaces prettyprint quotations sequences splitting
strings tools.test ; strings tools.test html.parser.utils quoting ;
USING: html.parser.utils ;
IN: html.parser.utils.tests IN: html.parser.utils.tests
[ "'Rome'" ] [ "Rome" single-quote ] unit-test [ "'Rome'" ] [ "Rome" single-quote ] unit-test
[ "\"Roma\"" ] [ "Roma" double-quote ] unit-test [ "\"Roma\"" ] [ "Roma" double-quote ] unit-test
[ "'Firenze'" ] [ "Firenze" quote ] unit-test [ "'Firenze'" ] [ "Firenze" quote ] unit-test
[ "\"Caesar's\"" ] [ "Caesar's" quote ] unit-test [ "\"Caesar's\"" ] [ "Caesar's" quote ] unit-test
[ f ] [ "" quoted? ] unit-test
[ t ] [ "''" quoted? ] unit-test
[ t ] [ "\"\"" quoted? ] unit-test
[ t ] [ "\"Circus Maximus\"" quoted? ] unit-test
[ t ] [ "'Circus Maximus'" quoted? ] unit-test
[ f ] [ "Circus Maximus" quoted? ] unit-test
[ "'Italy'" ] [ "Italy" ?quote ] unit-test [ "'Italy'" ] [ "Italy" ?quote ] unit-test
[ "'Italy'" ] [ "'Italy'" ?quote ] unit-test [ "'Italy'" ] [ "'Italy'" ?quote ] unit-test
[ "\"Italy\"" ] [ "\"Italy\"" ?quote ] unit-test [ "\"Italy\"" ] [ "\"Italy\"" ?quote ] unit-test

View File

@ -3,16 +3,12 @@
USING: assocs circular combinators continuations hashtables USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint hashtables.private io kernel math namespaces prettyprint
quotations sequences splitting html.parser.state strings quotations sequences splitting html.parser.state strings
combinators.short-circuit ; combinators.short-circuit quoting ;
IN: html.parser.utils IN: html.parser.utils
: string-parse-end? ( -- ? ) get-next not ;
: trim1 ( seq ch -- newseq ) : trim1 ( seq ch -- newseq )
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ; [ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
: quote? ( ch -- ? ) "'\"" member? ;
: single-quote ( str -- newstr ) "'" dup surround ; : single-quote ( str -- newstr ) "'" dup surround ;
: double-quote ( str -- newstr ) "\"" dup surround ; : double-quote ( str -- newstr ) "\"" dup surround ;
@ -21,14 +17,4 @@ IN: html.parser.utils
CHAR: ' over member? CHAR: ' over member?
[ double-quote ] [ single-quote ] if ; [ double-quote ] [ single-quote ] if ;
: quoted? ( str -- ? )
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: ?quote ( str -- newstr ) dup quoted? [ quote ] unless ; : ?quote ( str -- newstr ) dup quoted? [ quote ] unless ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;

View File

@ -165,7 +165,7 @@ M: irc-chat to-chat in-messages>> mailbox-put ;
" hostname servername :irc.factor" irc-print ; " hostname servername :irc.factor" irc-print ;
: /CONNECT ( server port -- stream ) : /CONNECT ( server port -- stream )
irc> connect>> call drop ; irc> connect>> call drop ; inline
: /JOIN ( channel password -- ) : /JOIN ( channel password -- )
"JOIN " irc-write "JOIN " irc-write

View File

@ -160,7 +160,7 @@ void box_value_struct(void *src, CELL size)
dpush(tag_object(array)); dpush(tag_object(array));
} }
/* On OS X, structs <= 8 bytes are returned in registers. */ /* On some x86 OSes, structs <= 8 bytes are returned in registers. */
void box_small_struct(CELL x, CELL y, CELL size) void box_small_struct(CELL x, CELL y, CELL size)
{ {
CELL data[2]; CELL data[2];
@ -169,6 +169,17 @@ void box_small_struct(CELL x, CELL y, CELL size)
box_value_struct(data,size); box_value_struct(data,size);
} }
/* On OS X/PPC, complex numbers are returned in registers. */
void box_medium_struct(CELL x1, CELL x2, CELL x3, CELL x4, CELL size)
{
CELL data[4];
data[0] = x1;
data[1] = x2;
data[2] = x3;
data[3] = x4;
box_value_struct(data,size);
}
/* open a native library and push a handle */ /* open a native library and push a handle */
void primitive_dlopen(void) void primitive_dlopen(void)
{ {

View File

@ -40,6 +40,7 @@ void primitive_set_alien_cell(void);
DLLEXPORT void to_value_struct(CELL src, void *dest, CELL size); DLLEXPORT void to_value_struct(CELL src, void *dest, CELL size);
DLLEXPORT void box_value_struct(void *src, CELL size); DLLEXPORT void box_value_struct(void *src, CELL size);
DLLEXPORT void box_small_struct(CELL x, CELL y, CELL size); DLLEXPORT void box_small_struct(CELL x, CELL y, CELL size);
void box_medium_struct(CELL x1, CELL x2, CELL x3, CELL x4, CELL size);
DEFINE_UNTAG(F_DLL,DLL_TYPE,dll) DEFINE_UNTAG(F_DLL,DLL_TYPE,dll)