Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2008-10-01 20:05:14 -07:00
commit 9cd9a98ce9
11 changed files with 100 additions and 108 deletions

View File

@ -29,10 +29,8 @@ $nl
{ $subsection heap-pop* } { $subsection heap-pop* }
{ $subsection heap-pop } { $subsection heap-pop }
{ $subsection heap-delete } { $subsection heap-delete }
$nl
"Processing heaps:" "Processing heaps:"
{ $subsection slurp-heap } { $subsection slurp-heap } ;
{ $subsection slurp-heap-when } ;
ABOUT: "heaps" ABOUT: "heaps"
@ -91,8 +89,3 @@ HELP: slurp-heap
{ $values { $values
{ "heap" "a heap" } { "quot" quotation } } { "heap" "a heap" } { "quot" quotation } }
{ $description "Removes values from a heap and processes them with the quotation until the heap is empty." } ; { $description "Removes values from a heap and processes them with the quotation until the heap is empty." } ;
HELP: slurp-heap-when
{ $values
{ "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } }
{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ;

View File

@ -195,9 +195,3 @@ M: heap heap-pop ( heap -- value key )
over heap-empty? [ 2drop ] [ over heap-empty? [ 2drop ] [
[ [ heap-pop drop ] dip call ] [ slurp-heap ] 2bi [ [ heap-pop drop ] dip call ] [ slurp-heap ] 2bi
] if ; inline recursive ] if ; inline recursive
: slurp-heap-when ( heap quot1 quot2: ( value key -- ) -- )
pick heap-empty? [ 3drop ] [
[ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ]
[ roll [ slurp-heap-when ] [ 3drop ] if ] 3bi
] if ; inline recursive

View File

@ -1,32 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.streams.string ;
IN: assoc-deques
HELP: <assoc-deque>
{ $description "Constructs a new " { $link assoc-deque } " from two existing data structures." } ;
HELP: <unique-max-heap>
{ $values
{ "unique-heap" assoc-deque } }
{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a max-heap." } ;
HELP: <unique-min-heap>
{ $values
{ "unique-heap" assoc-deque } }
{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a min-heap." } ;
HELP: assoc-deque
{ $description "A data structure containing an assoc and a deque to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-deque to get a sorted data structure with O(1) lookup. Operations on assoc-deques should update both the assoc and the deque." } ;
ARTICLE: "assoc-deques" "Associative deques"
"The " { $vocab-link "assoc-deques" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl
"Associative deque constructor:"
{ $subsection <assoc-deque> }
"Unique heaps:"
{ $subsection <unique-min-heap> }
{ $subsection <unique-max-heap> } ;
ABOUT: "assoc-deques"

View File

@ -1,31 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs deques hashtables heaps kernel ;
IN: assoc-deques
TUPLE: assoc-deque assoc deque ;
C: <assoc-deque> assoc-deque
: <unique-min-heap> ( -- unique-heap )
H{ } clone <min-heap> <assoc-deque> ;
: <unique-max-heap> ( -- unique-heap )
H{ } clone <max-heap> <assoc-deque> ;
M: assoc-deque heap-push* ( value key assoc-deque -- entry )
pick over assoc>> key? [
3drop f
] [
[ assoc>> swapd set-at ] [ deque>> heap-push* ] 3bi
] if ;
M: assoc-deque heap-pop ( assoc-deque -- value key )
[ deque>> heap-pop ] keep
[ over ] dip assoc>> delete-at ;
M: assoc-deque heap-peek ( assoc-deque -- value key )
deque>> heap-peek ;
M: assoc-deque heap-empty? ( assoc-deque -- value key )
deque>> heap-empty? ;

View File

@ -0,0 +1,33 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.streams.string ;
IN: assoc-heaps
HELP: <assoc-heap>
{ $description "Constructs a new " { $link assoc-heap } " from two existing data structures." } ;
HELP: <unique-max-heap>
{ $values
{ "unique-heap" assoc-heap } }
{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a max-heap. Popping an element from the heap leaves this element in the hashtable to ensure that the element will not be processed again." } ;
HELP: <unique-min-heap>
{ $values
{ "unique-heap" assoc-heap } }
{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a min-heap. Popping an element from the heap leaves this element in the hashtable to ensure that the element will not be processed again." } ;
{ <unique-max-heap> <unique-min-heap> } related-words
HELP: assoc-heap
{ $description "A data structure containing an assoc and a heap to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-heap to get a sorted data structure with O(1) lookup. Operations on assoc-heap may update both the assoc and the heap or leave them out of sync if it's advantageous." } ;
ARTICLE: "assoc-heaps" "Associative heaps"
"The " { $vocab-link "assoc-heaps" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl
"Associative heap constructor:"
{ $subsection <assoc-heap> }
"Unique heaps:"
{ $subsection <unique-min-heap> }
{ $subsection <unique-max-heap> } ;
ABOUT: "assoc-heaps"

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: tools.test assoc-deques ; USING: tools.test assoc-heaps ;
IN: assoc-deques.tests IN: assoc-heaps.tests

View File

@ -0,0 +1,30 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs hashtables heaps kernel ;
IN: assoc-heaps
TUPLE: assoc-heap assoc heap ;
C: <assoc-heap> assoc-heap
: <unique-min-heap> ( -- unique-heap )
H{ } clone <min-heap> <assoc-heap> ;
: <unique-max-heap> ( -- unique-heap )
H{ } clone <max-heap> <assoc-heap> ;
M: assoc-heap heap-push* ( value key assoc-heap -- entry )
pick over assoc>> key? [
3drop f
] [
[ assoc>> swapd set-at ] [ heap>> heap-push* ] 3bi
] if ;
M: assoc-heap heap-pop ( assoc-heap -- value key )
heap>> heap-pop ;
M: assoc-heap heap-peek ( assoc-heap -- value key )
heap>> heap-peek ;
M: assoc-heap heap-empty? ( assoc-heap -- value key )
heap>> heap-empty? ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.streams.string urls USING: help.markup help.syntax io.streams.string urls
multiline ; multiline spider.private quotations ;
IN: spider IN: spider
HELP: <spider> HELP: <spider>
@ -16,11 +16,10 @@ HELP: run-spider
{ "spider" spider } } { "spider" spider } }
{ $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ; { $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ;
HELP: spider HELP: slurp-heap-when
{ $description "" } ; { $values
{ "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } }
HELP: spider-result { $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ;
{ $description "" } ;
ARTICLE: "spider-tutorial" "Spider tutorial" ARTICLE: "spider-tutorial" "Spider tutorial"
"To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider." "To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider."

View File

@ -3,12 +3,12 @@
USING: accessors fry html.parser html.parser.analyzer USING: accessors fry html.parser html.parser.analyzer
http.client kernel tools.time sets assocs sequences http.client kernel tools.time sets assocs sequences
concurrency.combinators io threads namespaces math multiline concurrency.combinators io threads namespaces math multiline
heaps math.parser inspector urls assoc-deques logging heaps math.parser inspector urls assoc-heaps logging
combinators.short-circuit continuations calendar prettyprint ; combinators.short-circuit continuations calendar prettyprint ;
IN: spider IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links TUPLE: spider base count max-count sleep max-depth initial-links
filters spidered todo nonmatching ; filters spidered todo nonmatching quiet ;
! secure? agent page-timeout data-timeout overall-timeout ! secure? agent page-timeout data-timeout overall-timeout
TUPLE: spider-result url depth headers fetch-time parsed-html TUPLE: spider-result url depth headers fetch-time parsed-html
@ -27,19 +27,19 @@ links processing-time timestamp ;
<PRIVATE <PRIVATE
: relative-url? ( url -- ? ) protocol>> not ;
: apply-filters ( links spider -- links' ) : apply-filters ( links spider -- links' )
filters>> [ '[ _ 1&& ] filter ] when* ; filters>> [ '[ _ 1&& ] filter ] when* ;
: push-links ( links level assoc-heap -- )
'[ _ _ heap-push ] each ;
: add-todo ( links level spider -- ) : add-todo ( links level spider -- )
tuck [ apply-filters ] 2dip todo>> push-links ;
tuck
[ spidered>> keys diff ]
[ todo>> ] 2bi* '[ _ _ heap-push ] each ;
: add-nonmatching ( links level spider -- ) : add-nonmatching ( links level spider -- )
nonmatching>> '[ _ _ heap-push ] each ; nonmatching>> push-links ;
: relative-url? ( url -- ? ) protocol>> not ;
: filter-base ( spider spider-result -- base-links nonmatching-links ) : filter-base ( spider spider-result -- base-links nonmatching-links )
[ base>> host>> ] [ links>> prune ] bi* [ base>> host>> ] [ links>> prune ] bi*
@ -51,26 +51,27 @@ links processing-time timestamp ;
[ filter-base ] 2keep [ filter-base ] 2keep
depth>> 1+ swap depth>> 1+ swap
[ add-nonmatching ] [ add-nonmatching ]
[ add-todo ] 2bi ; [ tuck [ apply-filters ] 2dip add-todo ] 2bi ;
: print-spidering ( url depth -- )
"depth: " write number>string write
", spidering: " write . yield ;
: normalize-hrefs ( links -- links' ) : normalize-hrefs ( links -- links' )
[ >url ] map [ >url ] map
spider get base>> swap [ derive-url ] with map ; spider get base>> swap [ derive-url ] with map ;
: print-spidering ( url depth -- )
"depth: " write number>string write
", spidering: " write . yield ;
: (spider-page) ( url depth -- spider-result ) : (spider-page) ( url depth -- spider-result )
2dup print-spidering
f pick spider get spidered>> set-at f pick spider get spidered>> set-at
over '[ _ http-get ] benchmark swap over '[ _ http-get ] benchmark swap
[ parse-html dup find-hrefs normalize-hrefs ] benchmark [ parse-html dup find-hrefs normalize-hrefs ] benchmark
now spider-result boa now spider-result boa ;
dup describe ;
: spider-page ( url depth -- ) : spider-page ( url depth -- )
(spider-page) spider get swap add-spidered ; spider get quiet>> [ 2dup print-spidering ] unless
(spider-page)
spider get [ quiet>> [ dup describe ] unless ]
[ swap add-spidered ] bi ;
\ spider-page ERROR add-error-logging \ spider-page ERROR add-error-logging
@ -81,6 +82,12 @@ links processing-time timestamp ;
[ initial-links>> normalize-hrefs 0 ] keep [ initial-links>> normalize-hrefs 0 ] keep
[ add-todo ] keep ; [ add-todo ] keep ;
: slurp-heap-when ( heap quot1 quot2: ( value key -- ) -- )
pick heap-empty? [ 3drop ] [
[ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ]
[ roll [ slurp-heap-when ] [ 3drop ] if ] 3bi
] if ; inline recursive
PRIVATE> PRIVATE>
: run-spider ( spider -- spider ) : run-spider ( spider -- spider )

View File

@ -22,10 +22,9 @@ IN: suffix-arrays
: <funky-slice> ( from/f to/f seq -- slice ) : <funky-slice> ( from/f to/f seq -- slice )
[ [
tuck tuck
[ drop [ 0 ] unless* ] [ drop 0 or ] [ length or ] 2bi*
[ dupd length ? ] 2bi*
[ min ] keep [ min ] keep
] keep <slice> ; ] keep <slice> ; inline
PRIVATE> PRIVATE>
@ -35,6 +34,6 @@ PRIVATE>
: SA{ \ } [ >suffix-array ] parse-literal ; parsing : SA{ \ } [ >suffix-array ] parse-literal ; parsing
: query ( begin suffix-array -- matches ) : query ( begin suffix-array -- matches )
2dup find-index 2dup find-index dup
[ -rot [ from-to ] keep <funky-slice> [ seq>> ] map prune ] [ -rot [ from-to ] keep <funky-slice> [ seq>> ] map prune ]
[ 2drop { } ] if* ; [ 3drop { } ] if ;