From adc764efa3238b8a75b1f3715a631bdbea8e8baa Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 6 Apr 2009 13:26:07 -0500 Subject: [PATCH 1/6] Cleanup --- core/definitions/definitions.factor | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/definitions/definitions.factor b/core/definitions/definitions.factor index c95c5816ac..636067e04b 100644 --- a/core/definitions/definitions.factor +++ b/core/definitions/definitions.factor @@ -3,8 +3,6 @@ USING: kernel sequences namespaces assocs graphs math math.order ; IN: definitions -MIXIN: definition - ERROR: no-compilation-unit definition ; SYMBOLS: inlined-dependency flushed-dependency called-dependency ; @@ -42,7 +40,7 @@ GENERIC: set-where ( loc defspec -- ) GENERIC: forget* ( defspec -- ) -M: object forget* drop ; +M: f forget* drop ; SYMBOL: forgotten-definitions @@ -53,8 +51,6 @@ SYMBOL: forgotten-definitions : forget-all ( definitions -- ) [ forget ] each ; -GENERIC: synopsis* ( defspec -- ) - GENERIC: definer ( defspec -- start end ) GENERIC: definition ( defspec -- seq ) From 1d563edcd90495e5bbb174a9396fb79c5c449852 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 6 Apr 2009 13:34:38 -0500 Subject: [PATCH 2/6] Oops --- core/definitions/definitions.factor | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/definitions/definitions.factor b/core/definitions/definitions.factor index 636067e04b..7463a863e5 100644 --- a/core/definitions/definitions.factor +++ b/core/definitions/definitions.factor @@ -3,6 +3,8 @@ USING: kernel sequences namespaces assocs graphs math math.order ; IN: definitions +MIXIN: definition + ERROR: no-compilation-unit definition ; SYMBOLS: inlined-dependency flushed-dependency called-dependency ; From 37e5d502f1869d18ccb8ea06a925d1f88bfebedd Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 6 Apr 2009 13:39:55 -0500 Subject: [PATCH 3/6] Fix shift-drag losing focus in listener --- basis/ui/gadgets/editors/editors.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/ui/gadgets/editors/editors.factor b/basis/ui/gadgets/editors/editors.factor index 3eb40a5135..9461b2348f 100755 --- a/basis/ui/gadgets/editors/editors.factor +++ b/basis/ui/gadgets/editors/editors.factor @@ -452,6 +452,7 @@ editor "caret-motion" f { editor "selection" f { { T{ button-down f { S+ } 1 } extend-selection } + { T{ button-up f { S+ } 1 } com-copy-selection } { T{ drag } drag-selection } { gain-focus focus-editor } { lose-focus unfocus-editor } From 77f99eb70cda805feb463f1e77c0eb34fb25cc7a Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 6 Apr 2009 17:32:20 -0500 Subject: [PATCH 4/6] use map-reduce instead of unclip reduce, "Why do we need this?" --> we don't --- basis/regexp/ast/ast.factor | 4 ++-- basis/regexp/transition-tables/transition-tables.factor | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/basis/regexp/ast/ast.factor b/basis/regexp/ast/ast.factor index be657227e5..2916ef7c32 100644 --- a/basis/regexp/ast/ast.factor +++ b/basis/regexp/ast/ast.factor @@ -21,12 +21,12 @@ CONSTANT: epsilon T{ tagged-epsilon { tag t } } TUPLE: concatenation first second ; : ( seq -- concatenation ) - [ epsilon ] [ unclip [ concatenation boa ] reduce ] if-empty ; + [ epsilon ] [ [ ] [ concatenation boa ] map-reduce ] if-empty ; TUPLE: alternation first second ; : ( seq -- alternation ) - unclip [ alternation boa ] reduce ; + [ ] [ alternation boa ] map-reduce ; TUPLE: star term ; C: star diff --git a/basis/regexp/transition-tables/transition-tables.factor b/basis/regexp/transition-tables/transition-tables.factor index 3c33ae8846..f452e3d24a 100644 --- a/basis/regexp/transition-tables/transition-tables.factor +++ b/basis/regexp/transition-tables/transition-tables.factor @@ -11,12 +11,7 @@ TUPLE: transition-table transitions start-state final-states ; H{ } clone >>transitions H{ } clone >>final-states ; -: maybe-initialize-key ( key hashtable -- ) - ! Why do we have to do this? - 2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ; - :: (set-transition) ( from to obj hash -- ) - to condition? [ to hash maybe-initialize-key ] unless from hash at [ [ to obj ] dip set-at ] [ to obj associate from hash set-at ] if* ; @@ -25,7 +20,6 @@ TUPLE: transition-table transitions start-state final-states ; transitions>> (set-transition) ; :: (add-transition) ( from to obj hash -- ) - to hash maybe-initialize-key from hash at [ [ to obj ] dip push-at ] [ to 1vector obj associate from hash set-at ] if* ; From b6064813c92acb6ad64985258995dd2479356248 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 6 Apr 2009 17:39:44 -0500 Subject: [PATCH 5/6] construct a glob-matching quote for robots.txt files --- extra/robots/robots.factor | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/extra/robots/robots.factor b/extra/robots/robots.factor index 1b2422f06e..242aa1dea2 100644 --- a/extra/robots/robots.factor +++ b/extra/robots/robots.factor @@ -3,11 +3,21 @@ USING: accessors http.client kernel unicode.categories sequences urls splitting combinators splitting.monotonic combinators.short-circuit assocs unicode.case arrays -math.parser calendar.format make ; +math.parser calendar.format make fry present globs +multiline regexp.combinators regexp ; IN: robots ! visit-time is GMT, request-rate is pages/second ! crawl-rate is seconds + +TUPLE: robots site sitemap rules rules-quot ; + +: ( site sitemap rules -- robots ) + \ robots new + swap >>rules + swap >>sitemap + swap >>site ; + TUPLE: rules user-agents allows disallows visit-time request-rate crawl-delay unknowns ; @@ -40,8 +50,8 @@ visit-time request-rate crawl-delay unknowns ; H{ } clone >>unknowns ; : add-user-agent ( rules agent -- rules ) over user-agents>> push ; -: add-allow ( rules allow -- rules ) over allows>> push ; -: add-disallow ( rules disallow -- rules ) over disallows>> push ; +: add-allow ( rules allow -- rules ) >url over allows>> push ; +: add-disallow ( rules disallow -- rules ) >url over disallows>> push ; : parse-robots.txt-line ( rules seq -- rules ) first2 swap { @@ -57,12 +67,26 @@ visit-time request-rate crawl-delay unknowns ; [ pick unknowns>> push-at ] } case ; +: derive-urls ( url seq -- seq' ) + [ derive-url present ] with { } map-as ; + +: robot-rules-quot ( robots -- quot ) + [ + [ site>> ] [ rules>> allows>> ] bi + derive-urls [ ] map + + ] [ + [ site>> ] [ rules>> disallows>> ] bi + derive-urls [ ] map + ] bi 2array '[ _ matches? ] ; + PRIVATE> : parse-robots.txt ( string -- sitemaps rules-seq ) normalize-robots.txt [ [ dup ] dip [ parse-robots.txt-line drop ] with each - ] map ; + ] map first ; -: robots ( url -- sitemaps rules-seq ) - get-robots.txt nip parse-robots.txt ; +: robots ( url -- robots ) + >url + dup get-robots.txt nip parse-robots.txt ; From e9b9907ef9a1217dd58d9be430fd9054ea74e24a Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 6 Apr 2009 17:49:34 -0500 Subject: [PATCH 6/6] Factor out site-watcher.email, start work on site-watcher.spider --- extra/benchmark/fib6/deploy.factor | 15 +++++++++++++++ extra/site-watcher/db/db.factor | 23 +++++++++++++++++++---- extra/site-watcher/email/authors.txt | 1 + extra/site-watcher/email/email.factor | 14 ++++++++++++++ extra/site-watcher/site-watcher.factor | 23 ++++++++--------------- extra/site-watcher/spider/authors.txt | 1 + extra/site-watcher/spider/spider.factor | 22 ++++++++++++++++++++++ extra/spider/report/report.factor | 24 +++++++++++++++++------- 8 files changed, 97 insertions(+), 26 deletions(-) create mode 100644 extra/benchmark/fib6/deploy.factor create mode 100644 extra/site-watcher/email/authors.txt create mode 100644 extra/site-watcher/email/email.factor create mode 100644 extra/site-watcher/spider/authors.txt create mode 100644 extra/site-watcher/spider/spider.factor diff --git a/extra/benchmark/fib6/deploy.factor b/extra/benchmark/fib6/deploy.factor new file mode 100644 index 0000000000..3a367dcd51 --- /dev/null +++ b/extra/benchmark/fib6/deploy.factor @@ -0,0 +1,15 @@ +USING: tools.deploy.config ; +H{ + { deploy-name "benchmark.fib6" } + { deploy-threads? f } + { deploy-math? f } + { deploy-word-props? f } + { deploy-ui? f } + { deploy-io 1 } + { deploy-compiler? t } + { deploy-reflection 1 } + { "stop-after-last-window?" t } + { deploy-unicode? f } + { deploy-word-defs? f } + { deploy-c-types? f } +} diff --git a/extra/site-watcher/db/db.factor b/extra/site-watcher/db/db.factor index 148e5b96f9..26d05441f3 100644 --- a/extra/site-watcher/db/db.factor +++ b/extra/site-watcher/db/db.factor @@ -47,9 +47,26 @@ watching-site "WATCHING_SITE" { { "site-id" "SITE_ID" INTEGER +user-assigned-id+ } } define-persistent -TUPLE: reporting-site email url up? changed? last-up? error last-error ; +TUPLE: spidering-site < watching-site max-depth max-count ; -> + site-id>> site new swap >>site-id select-tuple ; + +SLOT: account + +M: watching-site account>> + account-name>> account new swap >>account-name select-tuple ; + +spidering-site "SPIDERING_SITE" { + { "account-name" "ACCOUNT_NAME" VARCHAR +user-assigned-id+ } + { "site-id" "SITE_ID" INTEGER +user-assigned-id+ } + { "max-depth" "MAX_DEPTH" INTEGER } + { "max-count" "MAX_COUNT" INTEGER } +} define-persistent + +TUPLE: reporting-site site-id email url up? changed? last-up? error last-error ; : set-notify-site-watchers ( site new-up? -- site ) [ over up?>> = [ t >>changed? ] unless ] keep >>up? ; @@ -82,8 +99,6 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ; : select-account/site ( username url -- account site ) insert-site site-id>> ; -PRIVATE> - : watch-site ( username url -- ) select-account/site insert-tuple ; diff --git a/extra/site-watcher/email/authors.txt b/extra/site-watcher/email/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/site-watcher/email/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/site-watcher/email/email.factor b/extra/site-watcher/email/email.factor new file mode 100644 index 0000000000..d028788e26 --- /dev/null +++ b/extra/site-watcher/email/email.factor @@ -0,0 +1,14 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: smtp namespaces accessors kernel arrays ; +IN: site-watcher.email + +SYMBOL: site-watcher-from +site-watcher-from [ "factor-site-watcher@gmail.com" ] initialize + +: send-site-email ( watching-site body subject -- ) + [ account>> email>> ] 2dip + pick [ + [ site-watcher-from get >>from ] 3dip + [ 1array >>to ] [ >>body ] [ >>subject ] tri* send-email + ] [ 3drop ] if ; \ No newline at end of file diff --git a/extra/site-watcher/site-watcher.factor b/extra/site-watcher/site-watcher.factor index 114cdf3259..c2ec2ada79 100644 --- a/extra/site-watcher/site-watcher.factor +++ b/extra/site-watcher/site-watcher.factor @@ -3,13 +3,9 @@ USING: accessors alarms arrays calendar combinators combinators.smart continuations debugger http.client fry init io.streams.string kernel locals math math.parser db -namespaces sequences site-watcher.db site-watcher.db.private -smtp ; +namespaces sequences site-watcher.db site-watcher.email ; IN: site-watcher -SYMBOL: site-watcher-from -"factor-site-watcher@gmail.com" site-watcher-from set-global - SYMBOL: site-watcher-frequency 5 minutes site-watcher-frequency set-global @@ -23,22 +19,19 @@ SYMBOL: running-site-watcher [ dup url>> http-get 2drop site-good ] [ site-bad ] recover ] each ; -: site-up-email ( email site -- email ) +: site-up-email ( site -- body ) last-up>> now swap time- duration>minutes 60 /mod [ >integer number>string ] bi@ [ " hours, " append ] [ " minutes" append ] bi* append - "Site was down for (at least): " prepend >>body ; + "Site was down for (at least): " prepend ; -: site-down-email ( email site -- email ) error>> >>body ; +: site-down-email ( site -- body ) error>> ; : send-report ( site -- ) - [ ] dip - { - [ email>> 1array >>to ] - [ drop site-watcher-from get "factor.site.watcher@gmail.com" or >>from ] - [ dup up?>> [ site-up-email ] [ site-down-email ] if ] - [ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue >>subject ] - } cleave send-email ; + [ ] + [ dup up?>> [ site-up-email ] [ site-down-email ] if ] + [ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue ] tri + send-site-email ; : send-reports ( seq -- ) [ ] [ [ send-report ] each ] if-empty ; diff --git a/extra/site-watcher/spider/authors.txt b/extra/site-watcher/spider/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/site-watcher/spider/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/site-watcher/spider/spider.factor b/extra/site-watcher/spider/spider.factor new file mode 100644 index 0000000000..1b3a96a018 --- /dev/null +++ b/extra/site-watcher/spider/spider.factor @@ -0,0 +1,22 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: site-watcher.db site-watcher.email +spider spider.report +accessors kernel sequences +xml.writer ; +IN: site-watcher.spider + +: ( spidering-site -- spider ) + [ max-depth>> ] + [ max-count>> ] + [ site>> url>> ] + tri + + swap >>max-count + swap >>max-depth ; + +: spider-and-email ( spidering-site -- ) + [ ] + [ run-spider spider-report xml>string ] + [ site>> url>> "Spidered " prefix ] tri + send-site-email ; diff --git a/extra/spider/report/report.factor b/extra/spider/report/report.factor index 43952701d5..48620cac55 100644 --- a/extra/spider/report/report.factor +++ b/extra/spider/report/report.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs combinators kernel math math.statistics namespaces sequences sorting xml.syntax -spider ; +spider urls html ; IN: spider.report SYMBOL: network-failures @@ -87,27 +87,37 @@ SYMBOL: time-std slowest-pages-table timing-summary-table [XML -

Slowest pages

+

Slowest pages

<-> -

Summary

+

Summary

<-> XML] ; : generate-report ( -- html ) + url get dup report-broken-pages report-network-failures report-timings [XML -

Broken pages

+

Spider report

+ URL: ><-> + +

Broken pages

<-> -

Network failures

+

Network failures

<-> -

Load times

+

Load times

<-> XML] ; : spider-report ( spider -- html ) - [ spidered>> process-results generate-report ] with-scope ; + [ "Spider report" f ] dip + [ + [ base>> url set ] + [ spidered>> process-results ] bi + generate-report + ] with-scope + simple-page ;