From 32b3fab3a90cfbe2afde161f7a7eac94674fade9 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 1 Oct 2008 20:48:53 -0500 Subject: [PATCH 1/6] assoc-deques -> assoc-heaps --- extra/assoc-deques/assoc-deques-docs.factor | 32 ------------------- extra/assoc-deques/assoc-deques.factor | 31 ------------------ extra/assoc-heaps/assoc-heaps-docs.factor | 32 +++++++++++++++++++ .../assoc-heaps-tests.factor} | 4 +-- extra/assoc-heaps/assoc-heaps.factor | 31 ++++++++++++++++++ .../{assoc-deques => assoc-heaps}/authors.txt | 0 extra/spider/spider.factor | 2 +- 7 files changed, 66 insertions(+), 66 deletions(-) delete mode 100644 extra/assoc-deques/assoc-deques-docs.factor delete mode 100644 extra/assoc-deques/assoc-deques.factor create mode 100644 extra/assoc-heaps/assoc-heaps-docs.factor rename extra/{assoc-deques/assoc-deques-tests.factor => assoc-heaps/assoc-heaps-tests.factor} (62%) create mode 100644 extra/assoc-heaps/assoc-heaps.factor rename extra/{assoc-deques => assoc-heaps}/authors.txt (100%) diff --git a/extra/assoc-deques/assoc-deques-docs.factor b/extra/assoc-deques/assoc-deques-docs.factor deleted file mode 100644 index d8f305d51d..0000000000 --- a/extra/assoc-deques/assoc-deques-docs.factor +++ /dev/null @@ -1,32 +0,0 @@ -! Copyright (C) 2008 Doug Coleman. -! See http://factorcode.org/license.txt for BSD license. -USING: help.markup help.syntax io.streams.string ; -IN: assoc-deques - -HELP: -{ $description "Constructs a new " { $link assoc-deque } " from two existing data structures." } ; - -HELP: -{ $values - - { "unique-heap" assoc-deque } } -{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a max-heap." } ; - -HELP: -{ $values - - { "unique-heap" assoc-deque } } -{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a min-heap." } ; - -HELP: assoc-deque -{ $description "A data structure containing an assoc and a deque to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-deque to get a sorted data structure with O(1) lookup. Operations on assoc-deques should update both the assoc and the deque." } ; - -ARTICLE: "assoc-deques" "Associative deques" -"The " { $vocab-link "assoc-deques" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl -"Associative deque constructor:" -{ $subsection } -"Unique heaps:" -{ $subsection } -{ $subsection } ; - -ABOUT: "assoc-deques" diff --git a/extra/assoc-deques/assoc-deques.factor b/extra/assoc-deques/assoc-deques.factor deleted file mode 100644 index a23e632b8b..0000000000 --- a/extra/assoc-deques/assoc-deques.factor +++ /dev/null @@ -1,31 +0,0 @@ -! Copyright (C) 2008 Doug Coleman. -! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs deques hashtables heaps kernel ; -IN: assoc-deques - -TUPLE: assoc-deque assoc deque ; - -C: assoc-deque - -: ( -- unique-heap ) - H{ } clone ; - -: ( -- unique-heap ) - H{ } clone ; - -M: assoc-deque heap-push* ( value key assoc-deque -- entry ) - pick over assoc>> key? [ - 3drop f - ] [ - [ assoc>> swapd set-at ] [ deque>> heap-push* ] 3bi - ] if ; - -M: assoc-deque heap-pop ( assoc-deque -- value key ) - [ deque>> heap-pop ] keep - [ over ] dip assoc>> delete-at ; - -M: assoc-deque heap-peek ( assoc-deque -- value key ) - deque>> heap-peek ; - -M: assoc-deque heap-empty? ( assoc-deque -- value key ) - deque>> heap-empty? ; diff --git a/extra/assoc-heaps/assoc-heaps-docs.factor b/extra/assoc-heaps/assoc-heaps-docs.factor new file mode 100644 index 0000000000..6a80bcc6c6 --- /dev/null +++ b/extra/assoc-heaps/assoc-heaps-docs.factor @@ -0,0 +1,32 @@ +! Copyright (C) 2008 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax io.streams.string ; +IN: assoc-heaps + +HELP: +{ $description "Constructs a new " { $link assoc-heap } " from two existing data structures." } ; + +HELP: +{ $values + + { "unique-heap" assoc-heap } } +{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a max-heap." } ; + +HELP: +{ $values + + { "unique-heap" assoc-heap } } +{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a min-heap." } ; + +HELP: assoc-heap +{ $description "A data structure containing an assoc and a heap to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-heap to get a sorted data structure with O(1) lookup. Operations on assoc-heap should update both the assoc and the heap." } ; + +ARTICLE: "assoc-heaps" "Associative heaps" +"The " { $vocab-link "assoc-heaps" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl +"Associative heap constructor:" +{ $subsection } +"Unique heaps:" +{ $subsection } +{ $subsection } ; + +ABOUT: "assoc-heaps" diff --git a/extra/assoc-deques/assoc-deques-tests.factor b/extra/assoc-heaps/assoc-heaps-tests.factor similarity index 62% rename from extra/assoc-deques/assoc-deques-tests.factor rename to extra/assoc-heaps/assoc-heaps-tests.factor index fe9d8840bc..6ea3fe14a4 100644 --- a/extra/assoc-deques/assoc-deques-tests.factor +++ b/extra/assoc-heaps/assoc-heaps-tests.factor @@ -1,4 +1,4 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: tools.test assoc-deques ; -IN: assoc-deques.tests +USING: tools.test assoc-heaps ; +IN: assoc-heaps.tests diff --git a/extra/assoc-heaps/assoc-heaps.factor b/extra/assoc-heaps/assoc-heaps.factor new file mode 100644 index 0000000000..d2154002b9 --- /dev/null +++ b/extra/assoc-heaps/assoc-heaps.factor @@ -0,0 +1,31 @@ +! Copyright (C) 2008 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors assocs hashtables heaps kernel ; +IN: assoc-heaps + +TUPLE: assoc-heap assoc heap ; + +C: assoc-heap + +: ( -- unique-heap ) + H{ } clone ; + +: ( -- unique-heap ) + H{ } clone ; + +M: assoc-heap heap-push* ( value key assoc-heap -- entry ) + pick over assoc>> key? [ + 3drop f + ] [ + [ assoc>> swapd set-at ] [ heap>> heap-push* ] 3bi + ] if ; + +M: assoc-heap heap-pop ( assoc-heap -- value key ) + [ heap>> heap-pop ] keep + [ over ] dip assoc>> delete-at ; + +M: assoc-heap heap-peek ( assoc-heap -- value key ) + heap>> heap-peek ; + +M: assoc-heap heap-empty? ( assoc-heap -- value key ) + heap>> heap-empty? ; diff --git a/extra/assoc-deques/authors.txt b/extra/assoc-heaps/authors.txt similarity index 100% rename from extra/assoc-deques/authors.txt rename to extra/assoc-heaps/authors.txt diff --git a/extra/spider/spider.factor b/extra/spider/spider.factor index 6f5261f158..90326d21cd 100644 --- a/extra/spider/spider.factor +++ b/extra/spider/spider.factor @@ -3,7 +3,7 @@ USING: accessors fry html.parser html.parser.analyzer http.client kernel tools.time sets assocs sequences concurrency.combinators io threads namespaces math multiline -heaps math.parser inspector urls assoc-deques logging +heaps math.parser inspector urls assoc-heaps logging combinators.short-circuit continuations calendar prettyprint ; IN: spider From 4ce980b9acbf3416d04b35bb03d791d619abb2d1 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 1 Oct 2008 20:54:58 -0500 Subject: [PATCH 2/6] Move heap-slurp-when to spider --- basis/heaps/heaps-docs.factor | 5 ----- basis/heaps/heaps.factor | 6 ------ extra/spider/spider-docs.factor | 5 +++++ extra/spider/spider.factor | 6 ++++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/basis/heaps/heaps-docs.factor b/basis/heaps/heaps-docs.factor index 90298c6edf..77537cbfb1 100755 --- a/basis/heaps/heaps-docs.factor +++ b/basis/heaps/heaps-docs.factor @@ -91,8 +91,3 @@ HELP: slurp-heap { $values { "heap" "a heap" } { "quot" quotation } } { $description "Removes values from a heap and processes them with the quotation until the heap is empty." } ; - -HELP: slurp-heap-when -{ $values - { "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } } -{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ; diff --git a/basis/heaps/heaps.factor b/basis/heaps/heaps.factor index 50aad826f5..6c387632ed 100755 --- a/basis/heaps/heaps.factor +++ b/basis/heaps/heaps.factor @@ -195,9 +195,3 @@ M: heap heap-pop ( heap -- value key ) over heap-empty? [ 2drop ] [ [ [ heap-pop drop ] dip call ] [ slurp-heap ] 2bi ] if ; inline recursive - -: slurp-heap-when ( heap quot1 quot2: ( value key -- ) -- ) - pick heap-empty? [ 3drop ] [ - [ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ] - [ roll [ slurp-heap-when ] [ 3drop ] if ] 3bi - ] if ; inline recursive diff --git a/extra/spider/spider-docs.factor b/extra/spider/spider-docs.factor index 27238e4f19..458c1d14d8 100644 --- a/extra/spider/spider-docs.factor +++ b/extra/spider/spider-docs.factor @@ -22,6 +22,11 @@ HELP: spider HELP: spider-result { $description "" } ; +HELP: slurp-heap-when +{ $values + { "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } } +{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ; + ARTICLE: "spider-tutorial" "Spider tutorial" "To create a new spider, call the " { $link } " word with a link to the site you wish to spider." { $code <" "http://concatentative.org" "> } diff --git a/extra/spider/spider.factor b/extra/spider/spider.factor index 6f5261f158..8f60a0d521 100644 --- a/extra/spider/spider.factor +++ b/extra/spider/spider.factor @@ -81,6 +81,12 @@ links processing-time timestamp ; [ initial-links>> normalize-hrefs 0 ] keep [ add-todo ] keep ; +: slurp-heap-when ( heap quot1 quot2: ( value key -- ) -- ) + pick heap-empty? [ 3drop ] [ + [ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ] + [ roll [ slurp-heap-when ] [ 3drop ] if ] 3bi + ] if ; inline recursive + PRIVATE> : run-spider ( spider -- spider ) From f240e1ff37eca88ddb77dba530b7d7d459925b5b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 1 Oct 2008 20:55:24 -0500 Subject: [PATCH 3/6] Small cleanup --- extra/suffix-arrays/suffix-arrays.factor | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/extra/suffix-arrays/suffix-arrays.factor b/extra/suffix-arrays/suffix-arrays.factor index 719496243c..b181ba9d60 100755 --- a/extra/suffix-arrays/suffix-arrays.factor +++ b/extra/suffix-arrays/suffix-arrays.factor @@ -22,10 +22,9 @@ IN: suffix-arrays : ( from/f to/f seq -- slice ) [ tuck - [ drop [ 0 ] unless* ] - [ dupd length ? ] 2bi* + [ drop 0 or ] [ length or ] 2bi* [ min ] keep - ] keep ; + ] keep ; inline PRIVATE> @@ -35,6 +34,6 @@ PRIVATE> : SA{ \ } [ >suffix-array ] parse-literal ; parsing : query ( begin suffix-array -- matches ) - 2dup find-index + 2dup find-index dup [ -rot [ from-to ] keep [ seq>> ] map prune ] - [ 2drop { } ] if* ; + [ 3drop { } ] if ; From 37219fa139aae8ec3239ec69955c1d187fb088e8 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 1 Oct 2008 21:05:27 -0500 Subject: [PATCH 4/6] Fix help --- basis/heaps/heaps-docs.factor | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/basis/heaps/heaps-docs.factor b/basis/heaps/heaps-docs.factor index 77537cbfb1..3c1c61faec 100755 --- a/basis/heaps/heaps-docs.factor +++ b/basis/heaps/heaps-docs.factor @@ -29,10 +29,8 @@ $nl { $subsection heap-pop* } { $subsection heap-pop } { $subsection heap-delete } -$nl "Processing heaps:" -{ $subsection slurp-heap } -{ $subsection slurp-heap-when } ; +{ $subsection slurp-heap } ; ABOUT: "heaps" From a10c3f425c570a6c6726fce1a03704e1b82b0df9 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 1 Oct 2008 21:25:37 -0500 Subject: [PATCH 5/6] let the unique heap retain hashtable entries after elements are popped to ensure uniqueness forever --- extra/assoc-heaps/assoc-heaps-docs.factor | 9 ++++--- extra/assoc-heaps/assoc-heaps.factor | 3 +-- extra/spider/spider.factor | 33 ++++++++++++----------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/extra/assoc-heaps/assoc-heaps-docs.factor b/extra/assoc-heaps/assoc-heaps-docs.factor index 6a80bcc6c6..8beaf9c4b1 100644 --- a/extra/assoc-heaps/assoc-heaps-docs.factor +++ b/extra/assoc-heaps/assoc-heaps-docs.factor @@ -10,16 +10,17 @@ HELP: { $values { "unique-heap" assoc-heap } } -{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a max-heap." } ; +{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a max-heap. Popping an element from the heap leaves this element in the hashtable to ensure that the element will not be processed again." } ; HELP: { $values - { "unique-heap" assoc-heap } } -{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a min-heap." } ; +{ $description "Creates a new " { $link assoc-heap } " where the assoc is a hashtable and the heap is a min-heap. Popping an element from the heap leaves this element in the hashtable to ensure that the element will not be processed again." } ; + +{ } related-words HELP: assoc-heap -{ $description "A data structure containing an assoc and a heap to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-heap to get a sorted data structure with O(1) lookup. Operations on assoc-heap should update both the assoc and the heap." } ; +{ $description "A data structure containing an assoc and a heap to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-heap to get a sorted data structure with O(1) lookup. Operations on assoc-heap may update both the assoc and the heap or leave them out of sync if it's advantageous." } ; ARTICLE: "assoc-heaps" "Associative heaps" "The " { $vocab-link "assoc-heaps" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl diff --git a/extra/assoc-heaps/assoc-heaps.factor b/extra/assoc-heaps/assoc-heaps.factor index d2154002b9..a495aed626 100644 --- a/extra/assoc-heaps/assoc-heaps.factor +++ b/extra/assoc-heaps/assoc-heaps.factor @@ -21,8 +21,7 @@ M: assoc-heap heap-push* ( value key assoc-heap -- entry ) ] if ; M: assoc-heap heap-pop ( assoc-heap -- value key ) - [ heap>> heap-pop ] keep - [ over ] dip assoc>> delete-at ; + heap>> heap-pop ; M: assoc-heap heap-peek ( assoc-heap -- value key ) heap>> heap-peek ; diff --git a/extra/spider/spider.factor b/extra/spider/spider.factor index 90326d21cd..fb7757d448 100644 --- a/extra/spider/spider.factor +++ b/extra/spider/spider.factor @@ -8,7 +8,7 @@ combinators.short-circuit continuations calendar prettyprint ; IN: spider TUPLE: spider base count max-count sleep max-depth initial-links -filters spidered todo nonmatching ; +filters spidered todo nonmatching quiet ; ! secure? agent page-timeout data-timeout overall-timeout TUPLE: spider-result url depth headers fetch-time parsed-html @@ -27,19 +27,19 @@ links processing-time timestamp ; > not ; + : apply-filters ( links spider -- links' ) filters>> [ '[ _ 1&& ] filter ] when* ; +: push-links ( links level assoc-heap -- ) + '[ _ _ heap-push ] each ; + : add-todo ( links level spider -- ) - tuck [ apply-filters ] 2dip - tuck - [ spidered>> keys diff ] - [ todo>> ] 2bi* '[ _ _ heap-push ] each ; + todo>> push-links ; : add-nonmatching ( links level spider -- ) - nonmatching>> '[ _ _ heap-push ] each ; - -: relative-url? ( url -- ? ) protocol>> not ; + nonmatching>> push-links ; : filter-base ( spider spider-result -- base-links nonmatching-links ) [ base>> host>> ] [ links>> prune ] bi* @@ -51,26 +51,27 @@ links processing-time timestamp ; [ filter-base ] 2keep depth>> 1+ swap [ add-nonmatching ] - [ add-todo ] 2bi ; - -: print-spidering ( url depth -- ) - "depth: " write number>string write - ", spidering: " write . yield ; + [ tuck [ apply-filters ] 2dip add-todo ] 2bi ; : normalize-hrefs ( links -- links' ) [ >url ] map spider get base>> swap [ derive-url ] with map ; +: print-spidering ( url depth -- ) + "depth: " write number>string write + ", spidering: " write . yield ; + : (spider-page) ( url depth -- spider-result ) - 2dup print-spidering f pick spider get spidered>> set-at over '[ _ http-get ] benchmark swap [ parse-html dup find-hrefs normalize-hrefs ] benchmark now spider-result boa - dup describe ; : spider-page ( url depth -- ) - (spider-page) spider get swap add-spidered ; + spider get quiet>> [ 2dup print-spidering ] unless + (spider-page) + spider get [ quiet>> [ dup describe ] unless ] + [ swap add-spidered ] bi ; \ spider-page ERROR add-error-logging From 7308f2fb52c9fdd34ed6fac7c0793b2fc7399546 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 1 Oct 2008 21:30:12 -0500 Subject: [PATCH 6/6] fix shiz --- extra/spider/spider-docs.factor | 8 +------- extra/spider/spider.factor | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/extra/spider/spider-docs.factor b/extra/spider/spider-docs.factor index 458c1d14d8..5f820ca368 100644 --- a/extra/spider/spider-docs.factor +++ b/extra/spider/spider-docs.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: help.markup help.syntax io.streams.string urls -multiline ; +multiline spider.private quotations ; IN: spider HELP: @@ -16,12 +16,6 @@ HELP: run-spider { "spider" spider } } { $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ; -HELP: spider -{ $description "" } ; - -HELP: spider-result -{ $description "" } ; - HELP: slurp-heap-when { $values { "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } } diff --git a/extra/spider/spider.factor b/extra/spider/spider.factor index fa1cd2ac0f..64ee081ecc 100644 --- a/extra/spider/spider.factor +++ b/extra/spider/spider.factor @@ -65,7 +65,7 @@ links processing-time timestamp ; f pick spider get spidered>> set-at over '[ _ http-get ] benchmark swap [ parse-html dup find-hrefs normalize-hrefs ] benchmark - now spider-result boa + now spider-result boa ; : spider-page ( url depth -- ) spider get quiet>> [ 2dup print-spidering ] unless