remove some dead code, make spider use count and max-count again

db4
Doug Coleman 2009-03-31 23:04:59 -05:00
parent 19aa85ef49
commit 19d8a6a552
1 changed files with 2 additions and 5 deletions

View File

@ -8,7 +8,7 @@ continuations calendar prettyprint dlists deques locals ;
IN: spider IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links TUPLE: spider base count max-count sleep max-depth initial-links
filters spidered todo nonmatching filtered quiet ; filters spidered todo nonmatching quiet ;
TUPLE: spider-result url depth headers fetch-time parsed-html TUPLE: spider-result url depth headers fetch-time parsed-html
links processing-time timestamp ; links processing-time timestamp ;
@ -40,7 +40,6 @@ TUPLE: unique-deque assoc deque ;
over >>base over >>base
swap 0 <unique-deque> [ push-url ] keep >>todo swap 0 <unique-deque> [ push-url ] keep >>todo
<unique-deque> >>nonmatching <unique-deque> >>nonmatching
<unique-deque> >>filtered
0 >>max-depth 0 >>max-depth
0 >>count 0 >>count
1/0. >>max-count 1/0. >>max-count
@ -60,9 +59,6 @@ TUPLE: unique-deque assoc deque ;
: add-nonmatching ( links level spider -- ) : add-nonmatching ( links level spider -- )
nonmatching>> push-links ; nonmatching>> push-links ;
: add-filtered ( links level spider -- )
filtered>> push-links ;
: filter-base-links ( spider spider-result -- base-links nonmatching-links ) : filter-base-links ( spider spider-result -- base-links nonmatching-links )
[ base>> host>> ] [ links>> prune ] bi* [ base>> host>> ] [ links>> prune ] bi*
[ host>> = ] with partition ; [ host>> = ] with partition ;
@ -110,6 +106,7 @@ TUPLE: unique-deque assoc deque ;
{ {
[ todo>> deque>> deque-empty? not ] [ todo>> deque>> deque-empty? not ]
[ [ todo>> peek-url depth>> ] [ max-depth>> ] bi < ] [ [ todo>> peek-url depth>> ] [ max-depth>> ] bi < ]
[ [ count>> ] [ max-count>> ] bi < ]
} 1&& ; } 1&& ;
: setup-next-url ( spider -- spider url depth ) : setup-next-url ( spider -- spider url depth )