Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2008-10-01 17:38:20 -07:00
commit cdf7091ae8
16 changed files with 276 additions and 20 deletions

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math namespaces make sequences random
strings math.parser math.intervals combinators math.bitwise
nmake db db.tuples db.types db.sql classes words shuffle arrays
nmake db db.tuples db.types classes words shuffle arrays
destructors continuations db.tuples.private prettyprint ;
IN: db.queries
@ -186,18 +186,6 @@ M: db query>statement ( query -- tuple )
! select ID, NAME, SCORE from EXAM limit 1 offset 3
: select-tuples* ( tuple -- statement )
dup
[
select 0,
dup class db-columns [ ", " 0, ]
[ dup column-name>> 0, 2, ] interleave
from 0,
class name>> 0,
] { { } { } { } } nmake
>r >r parse-sql 4drop r> r>
<simple-statement> maybe-make-retryable do-select ;
M: db <count-statement> ( query -- statement )
[ tuple>> dup class ] keep
[ [ "select count(*) from " 0% 0% where-clause ] query-make ]

View File

@ -1,5 +1,5 @@
USING: heaps.private help.markup help.syntax kernel math assocs
math.order ;
math.order quotations ;
IN: heaps
ARTICLE: "heaps" "Heaps"
@ -28,7 +28,11 @@ $nl
"Removal:"
{ $subsection heap-pop* }
{ $subsection heap-pop }
{ $subsection heap-delete } ;
{ $subsection heap-delete }
$nl
"Processing heaps:"
{ $subsection slurp-heap }
{ $subsection slurp-heap-when } ;
ABOUT: "heaps"
@ -82,3 +86,13 @@ HELP: heap-delete
{ $description "Remove the specified entry from the heap." }
{ $errors "Throws an error if the entry is from another heap or if it has already been deleted." }
{ $side-effects "heap" } ;
HELP: slurp-heap
{ $values
{ "heap" "a heap" } { "quot" quotation } }
{ $description "Removes values from a heap and processes them with the quotation until the heap is empty." } ;
HELP: slurp-heap-when
{ $values
{ "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } }
{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ;

View File

@ -195,3 +195,9 @@ M: heap heap-pop ( heap -- value key )
over heap-empty? [ 2drop ] [
[ [ heap-pop drop ] dip call ] [ slurp-heap ] 2bi
] if ; inline recursive
: slurp-heap-when ( heap quot1 quot2: ( value key -- ) -- )
pick heap-empty? [ 3drop ] [
[ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ]
[ roll [ slurp-heap-when ] [ 3drop ] if ] 3bi
] if ; inline recursive

View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -0,0 +1,4 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test interpolate ;
IN: interpolate.tests

View File

@ -0,0 +1,21 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: io kernel macros make multiline namespaces parser
peg.ebnf present sequences strings ;
IN: interpolate
MACRO: interpolate ( string -- )
[EBNF
var = "${" [^}]+ "}" => [[ second >string [ get present write ] curry ]]
text = [^$]+ => [[ >string [ write ] curry ]]
interpolate = (var|text)* => [[ [ ] join ]]
EBNF] ;
EBNF: interpolate-locals
var = "${" [^}]+ "}" => [[ [ second >string search , [ present write ] % ] [ ] make ]]
text = [^$]+ => [[ [ >string , [ write ] % ] [ ] make ]]
interpolate = (var|text)* => [[ [ ] join ]]
;EBNF
: I[ "]I" parse-multiline-string
interpolate-locals parsed \ call parsed ; parsing

View File

@ -13,8 +13,7 @@ HELP: help.
{ $description "Prints out scaffold help markup for a given word." } ;
HELP: scaffold-help
{ $values
{ "vocab-root" "a vocabulary root string" } { "string" string } }
{ $values { "string" string } }
{ $description "Takes an existing vocabulary and creates a help file with scaffolded help for each word. This word only works if no help file yet exists." } ;
HELP: scaffold-undocumented

View File

@ -217,9 +217,9 @@ PRIVATE>
: help. ( word -- )
[ (help.) ] [ nl vocabulary>> link-vocab ] bi ;
: scaffold-help ( vocab-root string -- )
: scaffold-help ( string -- )
[
check-vocab
[ find-vocab-root ] [ check-vocab ] bi
prepare-scaffold
[ "-docs.factor" scaffold-path ] dip
swap [ set-scaffold-help-file ] [ 2drop ] if

View File

@ -0,0 +1,32 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.streams.string ;
IN: assoc-deques
HELP: <assoc-deque>
{ $description "Constructs a new " { $link assoc-deque } " from two existing data structures." } ;
HELP: <unique-max-heap>
{ $values
{ "unique-heap" assoc-deque } }
{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a max-heap." } ;
HELP: <unique-min-heap>
{ $values
{ "unique-heap" assoc-deque } }
{ $description "Creates a new " { $link assoc-deque } " where the assoc is a hashtable and the deque is a min-heap." } ;
HELP: assoc-deque
{ $description "A data structure containing an assoc and a deque to get certain properties with better time constraints at the expense of more space and complexity. For instance, a hashtable and a heap can be combined into one assoc-deque to get a sorted data structure with O(1) lookup. Operations on assoc-deques should update both the assoc and the deque." } ;
ARTICLE: "assoc-deques" "Associative deques"
"The " { $vocab-link "assoc-deques" } " vocabulary combines exists to synthesize data structures with better time properties than either of the two component data structures alone." $nl
"Associative deque constructor:"
{ $subsection <assoc-deque> }
"Unique heaps:"
{ $subsection <unique-min-heap> }
{ $subsection <unique-max-heap> } ;
ABOUT: "assoc-deques"

View File

@ -0,0 +1,4 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test assoc-deques ;
IN: assoc-deques.tests

View File

@ -0,0 +1,31 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs deques hashtables heaps kernel ;
IN: assoc-deques
TUPLE: assoc-deque assoc deque ;
C: <assoc-deque> assoc-deque
: <unique-min-heap> ( -- unique-heap )
H{ } clone <min-heap> <assoc-deque> ;
: <unique-max-heap> ( -- unique-heap )
H{ } clone <max-heap> <assoc-deque> ;
M: assoc-deque heap-push* ( value key assoc-deque -- entry )
pick over assoc>> key? [
3drop f
] [
[ assoc>> swapd set-at ] [ deque>> heap-push* ] 3bi
] if ;
M: assoc-deque heap-pop ( assoc-deque -- value key )
[ deque>> heap-pop ] keep
[ over ] dip assoc>> delete-at ;
M: assoc-deque heap-peek ( assoc-deque -- value key )
deque>> heap-peek ;
M: assoc-deque heap-empty? ( assoc-deque -- value key )
deque>> heap-empty? ;

View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -3,7 +3,7 @@
USING: assocs html.parser kernel math sequences strings ascii
arrays generalizations shuffle unicode.case namespaces make
splitting http accessors io combinators http.client urls
urls.encoding fry sequences.lib ;
urls.encoding fry ;
IN: html.parser.analyzer
TUPLE: link attributes clickable ;

1
extra/spider/authors.txt Normal file
View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -0,0 +1,57 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.streams.string urls
multiline ;
IN: spider
HELP: <spider>
{ $values
{ "base" "a string or url" }
{ "spider" spider } }
{ $description "Creates a new web spider with a given base url." } ;
HELP: run-spider
{ $values
{ "spider" spider }
{ "spider" spider } }
{ $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ;
HELP: spider
{ $description "" } ;
HELP: spider-result
{ $description "" } ;
ARTICLE: "spider-tutorial" "Spider tutorial"
"To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider."
{ $code <" "http://concatentative.org" <spider> "> }
"The max-depth is initialized to 0, which retrieves just the initial page. Let's initialize it to something more fun:"
{ $code <" 1 >>max-depth "> }
"Now the spider will retrieve the first page and all the pages it links to in the same domain." $nl
"But suppose the front page contains thousands of links. To avoid grabbing them all, we can set " { $slot "max-count" } " to a reasonable limit."
{ $code <" 10 >>max-count "> }
"A timeout might keep the spider from hitting the server too hard:"
{ $code <" USE: calendar 1.5 seconds >>sleep "> }
"Since we happen to know that not all pages of a wiki are suitable for spidering, we will spider only the wiki view pages, not the edit or revisions pages. To do this, we add a filter through which new links are tested; links that pass the filter are added to the todo queue, while links that do not are discarded. You can add several filters to the filter array, but we'll just add a single one for now."
{ $code <" { [ path>> "/wiki/view" head? ] } >>filters "> }
"Finally, to start the spider, call the " { $link run-spider } " word."
{ $code "run-spider" }
"The full code from the tutorial."
{ $code <" USING: spider calendar sequences accessors ;
: spider-concatenative ( -- spider )
"http://concatenative.org" <spider>
1 >>max-depth
10 >>max-count
1.5 seconds >>sleep
{ [ path>> "/wiki/view" head? ] } >>filters
run-spider ;"> } ;
ARTICLE: "spider" "Spider"
"The " { $vocab-link "spider" } " vocabulary implements a simple web spider for retrieving sets of webpages."
{ $subsection "spider-tutorial" }
"Creating a new spider:"
{ $subsection <spider> }
"Running the spider:"
{ $subsection run-spider } ;
ABOUT: "spider"

View File

@ -0,0 +1,97 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors fry html.parser html.parser.analyzer
http.client kernel tools.time sets assocs sequences
concurrency.combinators io threads namespaces math multiline
heaps math.parser inspector urls assoc-deques logging
combinators.short-circuit continuations calendar prettyprint ;
IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links
filters spidered todo nonmatching ;
! secure? agent page-timeout data-timeout overall-timeout
TUPLE: spider-result url depth headers fetch-time parsed-html
links processing-time timestamp ;
: <spider> ( base -- spider )
>url
spider new
over >>base
swap 0 <unique-min-heap> [ heap-push ] keep >>todo
<unique-min-heap> >>nonmatching
0 >>max-depth
0 >>count
1/0. >>max-count
H{ } clone >>spidered ;
<PRIVATE
: apply-filters ( links spider -- links' )
filters>> [ '[ _ 1&& ] filter ] when* ;
: add-todo ( links level spider -- )
tuck [ apply-filters ] 2dip
tuck
[ spidered>> keys diff ]
[ todo>> ] 2bi* '[ _ _ heap-push ] each ;
: add-nonmatching ( links level spider -- )
nonmatching>> '[ _ _ heap-push ] each ;
: relative-url? ( url -- ? ) protocol>> not ;
: filter-base ( spider spider-result -- base-links nonmatching-links )
[ base>> host>> ] [ links>> prune ] bi*
[ host>> = ] with partition ;
: add-spidered ( spider spider-result -- )
[ [ 1+ ] change-count ] dip
2dup [ spidered>> ] [ dup url>> ] bi* rot set-at
[ filter-base ] 2keep
depth>> 1+ swap
[ add-nonmatching ]
[ add-todo ] 2bi ;
: print-spidering ( url depth -- )
"depth: " write number>string write
", spidering: " write . yield ;
: normalize-hrefs ( links -- links' )
[ >url ] map
spider get base>> swap [ derive-url ] with map ;
: (spider-page) ( url depth -- spider-result )
2dup print-spidering
f pick spider get spidered>> set-at
over '[ _ http-get ] benchmark swap
[ parse-html dup find-hrefs normalize-hrefs ] benchmark
now spider-result boa
dup describe ;
: spider-page ( url depth -- )
(spider-page) spider get swap add-spidered ;
\ spider-page ERROR add-error-logging
: spider-sleep ( -- )
spider get sleep>> [ sleep ] when* ;
: queue-initial-links ( spider -- spider )
[ initial-links>> normalize-hrefs 0 ] keep
[ add-todo ] keep ;
PRIVATE>
: run-spider ( spider -- spider )
"spider" [
dup spider [
queue-initial-links
[ todo>> ] [ max-depth>> ] bi
'[
_ <= spider get
[ count>> ] [ max-count>> ] bi < and
] [ spider-page spider-sleep ] slurp-heap-when
spider get
] with-variable
] with-logging ;