Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2009-04-06 15:52:53 -07:00
commit db280b6323
13 changed files with 131 additions and 43 deletions

View File

@ -21,12 +21,12 @@ CONSTANT: epsilon T{ tagged-epsilon { tag t } }
TUPLE: concatenation first second ; TUPLE: concatenation first second ;
: <concatenation> ( seq -- concatenation ) : <concatenation> ( seq -- concatenation )
[ epsilon ] [ unclip [ concatenation boa ] reduce ] if-empty ; [ epsilon ] [ [ ] [ concatenation boa ] map-reduce ] if-empty ;
TUPLE: alternation first second ; TUPLE: alternation first second ;
: <alternation> ( seq -- alternation ) : <alternation> ( seq -- alternation )
unclip [ alternation boa ] reduce ; [ ] [ alternation boa ] map-reduce ;
TUPLE: star term ; TUPLE: star term ;
C: <star> star C: <star> star

View File

@ -11,12 +11,7 @@ TUPLE: transition-table transitions start-state final-states ;
H{ } clone >>transitions H{ } clone >>transitions
H{ } clone >>final-states ; H{ } clone >>final-states ;
: maybe-initialize-key ( key hashtable -- )
! Why do we have to do this?
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
:: (set-transition) ( from to obj hash -- ) :: (set-transition) ( from to obj hash -- )
to condition? [ to hash maybe-initialize-key ] unless
from hash at from hash at
[ [ to obj ] dip set-at ] [ [ to obj ] dip set-at ]
[ to obj associate from hash set-at ] if* ; [ to obj associate from hash set-at ] if* ;
@ -25,7 +20,6 @@ TUPLE: transition-table transitions start-state final-states ;
transitions>> (set-transition) ; transitions>> (set-transition) ;
:: (add-transition) ( from to obj hash -- ) :: (add-transition) ( from to obj hash -- )
to hash maybe-initialize-key
from hash at from hash at
[ [ to obj ] dip push-at ] [ [ to obj ] dip push-at ]
[ to 1vector obj associate from hash set-at ] if* ; [ to 1vector obj associate from hash set-at ] if* ;

View File

@ -452,6 +452,7 @@ editor "caret-motion" f {
editor "selection" f { editor "selection" f {
{ T{ button-down f { S+ } 1 } extend-selection } { T{ button-down f { S+ } 1 } extend-selection }
{ T{ button-up f { S+ } 1 } com-copy-selection }
{ T{ drag } drag-selection } { T{ drag } drag-selection }
{ gain-focus focus-editor } { gain-focus focus-editor }
{ lose-focus unfocus-editor } { lose-focus unfocus-editor }

View File

@ -42,7 +42,7 @@ GENERIC: set-where ( loc defspec -- )
GENERIC: forget* ( defspec -- ) GENERIC: forget* ( defspec -- )
M: object forget* drop ; M: f forget* drop ;
SYMBOL: forgotten-definitions SYMBOL: forgotten-definitions
@ -53,8 +53,6 @@ SYMBOL: forgotten-definitions
: forget-all ( definitions -- ) [ forget ] each ; : forget-all ( definitions -- ) [ forget ] each ;
GENERIC: synopsis* ( defspec -- )
GENERIC: definer ( defspec -- start end ) GENERIC: definer ( defspec -- start end )
GENERIC: definition ( defspec -- seq ) GENERIC: definition ( defspec -- seq )

View File

@ -0,0 +1,15 @@
USING: tools.deploy.config ;
H{
{ deploy-name "benchmark.fib6" }
{ deploy-threads? f }
{ deploy-math? f }
{ deploy-word-props? f }
{ deploy-ui? f }
{ deploy-io 1 }
{ deploy-compiler? t }
{ deploy-reflection 1 }
{ "stop-after-last-window?" t }
{ deploy-unicode? f }
{ deploy-word-defs? f }
{ deploy-c-types? f }
}

View File

@ -3,11 +3,21 @@
USING: accessors http.client kernel unicode.categories USING: accessors http.client kernel unicode.categories
sequences urls splitting combinators splitting.monotonic sequences urls splitting combinators splitting.monotonic
combinators.short-circuit assocs unicode.case arrays combinators.short-circuit assocs unicode.case arrays
math.parser calendar.format make ; math.parser calendar.format make fry present globs
multiline regexp.combinators regexp ;
IN: robots IN: robots
! visit-time is GMT, request-rate is pages/second ! visit-time is GMT, request-rate is pages/second
! crawl-rate is seconds ! crawl-rate is seconds
TUPLE: robots site sitemap rules rules-quot ;
: <robots> ( site sitemap rules -- robots )
\ robots new
swap >>rules
swap >>sitemap
swap >>site ;
TUPLE: rules user-agents allows disallows TUPLE: rules user-agents allows disallows
visit-time request-rate crawl-delay unknowns ; visit-time request-rate crawl-delay unknowns ;
@ -40,8 +50,8 @@ visit-time request-rate crawl-delay unknowns ;
H{ } clone >>unknowns ; H{ } clone >>unknowns ;
: add-user-agent ( rules agent -- rules ) over user-agents>> push ; : add-user-agent ( rules agent -- rules ) over user-agents>> push ;
: add-allow ( rules allow -- rules ) over allows>> push ; : add-allow ( rules allow -- rules ) >url over allows>> push ;
: add-disallow ( rules disallow -- rules ) over disallows>> push ; : add-disallow ( rules disallow -- rules ) >url over disallows>> push ;
: parse-robots.txt-line ( rules seq -- rules ) : parse-robots.txt-line ( rules seq -- rules )
first2 swap { first2 swap {
@ -57,12 +67,26 @@ visit-time request-rate crawl-delay unknowns ;
[ pick unknowns>> push-at ] [ pick unknowns>> push-at ]
} case ; } case ;
: derive-urls ( url seq -- seq' )
[ derive-url present ] with { } map-as ;
: robot-rules-quot ( robots -- quot )
[
[ site>> ] [ rules>> allows>> ] bi
derive-urls [ <glob> ] map
<or>
] [
[ site>> ] [ rules>> disallows>> ] bi
derive-urls [ <glob> ] map <and> <not>
] bi 2array <or> '[ _ matches? ] ;
PRIVATE> PRIVATE>
: parse-robots.txt ( string -- sitemaps rules-seq ) : parse-robots.txt ( string -- sitemaps rules-seq )
normalize-robots.txt [ normalize-robots.txt [
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each [ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
] map ; ] map first ;
: robots ( url -- sitemaps rules-seq ) : robots ( url -- robots )
get-robots.txt nip parse-robots.txt ; >url
dup get-robots.txt nip parse-robots.txt <robots> ;

View File

@ -47,9 +47,26 @@ watching-site "WATCHING_SITE" {
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ } { "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
} define-persistent } define-persistent
TUPLE: reporting-site email url up? changed? last-up? error last-error ; TUPLE: spidering-site < watching-site max-depth max-count ;
<PRIVATE SLOT: site
M: watching-site site>>
site-id>> site new swap >>site-id select-tuple ;
SLOT: account
M: watching-site account>>
account-name>> account new swap >>account-name select-tuple ;
spidering-site "SPIDERING_SITE" {
{ "account-name" "ACCOUNT_NAME" VARCHAR +user-assigned-id+ }
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
{ "max-depth" "MAX_DEPTH" INTEGER }
{ "max-count" "MAX_COUNT" INTEGER }
} define-persistent
TUPLE: reporting-site site-id email url up? changed? last-up? error last-error ;
: set-notify-site-watchers ( site new-up? -- site ) : set-notify-site-watchers ( site new-up? -- site )
[ over up?>> = [ t >>changed? ] unless ] keep >>up? ; [ over up?>> = [ t >>changed? ] unless ] keep >>up? ;
@ -82,8 +99,6 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ;
: select-account/site ( username url -- account site ) : select-account/site ( username url -- account site )
insert-site site-id>> ; insert-site site-id>> ;
PRIVATE>
: watch-site ( username url -- ) : watch-site ( username url -- )
select-account/site <watching-site> insert-tuple ; select-account/site <watching-site> insert-tuple ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,14 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: smtp namespaces accessors kernel arrays ;
IN: site-watcher.email
SYMBOL: site-watcher-from
site-watcher-from [ "factor-site-watcher@gmail.com" ] initialize
: send-site-email ( watching-site body subject -- )
[ account>> email>> ] 2dip
pick [
[ <email> site-watcher-from get >>from ] 3dip
[ 1array >>to ] [ >>body ] [ >>subject ] tri* send-email
] [ 3drop ] if ;

View File

@ -3,13 +3,9 @@
USING: accessors alarms arrays calendar combinators USING: accessors alarms arrays calendar combinators
combinators.smart continuations debugger http.client fry combinators.smart continuations debugger http.client fry
init io.streams.string kernel locals math math.parser db init io.streams.string kernel locals math math.parser db
namespaces sequences site-watcher.db site-watcher.db.private namespaces sequences site-watcher.db site-watcher.email ;
smtp ;
IN: site-watcher IN: site-watcher
SYMBOL: site-watcher-from
"factor-site-watcher@gmail.com" site-watcher-from set-global
SYMBOL: site-watcher-frequency SYMBOL: site-watcher-frequency
5 minutes site-watcher-frequency set-global 5 minutes site-watcher-frequency set-global
@ -23,22 +19,19 @@ SYMBOL: running-site-watcher
[ dup url>> http-get 2drop site-good ] [ site-bad ] recover [ dup url>> http-get 2drop site-good ] [ site-bad ] recover
] each ; ] each ;
: site-up-email ( email site -- email ) : site-up-email ( site -- body )
last-up>> now swap time- duration>minutes 60 /mod last-up>> now swap time- duration>minutes 60 /mod
[ >integer number>string ] bi@ [ >integer number>string ] bi@
[ " hours, " append ] [ " minutes" append ] bi* append [ " hours, " append ] [ " minutes" append ] bi* append
"Site was down for (at least): " prepend >>body ; "Site was down for (at least): " prepend ;
: site-down-email ( email site -- email ) error>> >>body ; : site-down-email ( site -- body ) error>> ;
: send-report ( site -- ) : send-report ( site -- )
[ <email> ] dip [ ]
{ [ dup up?>> [ site-up-email ] [ site-down-email ] if ]
[ email>> 1array >>to ] [ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue ] tri
[ drop site-watcher-from get "factor.site.watcher@gmail.com" or >>from ] send-site-email ;
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue >>subject ]
} cleave send-email ;
: send-reports ( seq -- ) : send-reports ( seq -- )
[ ] [ [ send-report ] each ] if-empty ; [ ] [ [ send-report ] each ] if-empty ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,22 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: site-watcher.db site-watcher.email
spider spider.report
accessors kernel sequences
xml.writer ;
IN: site-watcher.spider
: <site-spider> ( spidering-site -- spider )
[ max-depth>> ]
[ max-count>> ]
[ site>> url>> ]
tri
<spider>
swap >>max-count
swap >>max-depth ;
: spider-and-email ( spidering-site -- )
[ ]
[ <site-spider> run-spider spider-report xml>string ]
[ site>> url>> "Spidered " prefix ] tri
send-site-email ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators kernel math USING: accessors arrays assocs combinators kernel math
math.statistics namespaces sequences sorting xml.syntax math.statistics namespaces sequences sorting xml.syntax
spider ; spider urls html ;
IN: spider.report IN: spider.report
SYMBOL: network-failures SYMBOL: network-failures
@ -87,27 +87,37 @@ SYMBOL: time-std
slowest-pages-table slowest-pages-table
timing-summary-table timing-summary-table
[XML [XML
<h2>Slowest pages</h2> <h3>Slowest pages</h3>
<-> <->
<h2>Summary</h2> <h3>Summary</h3>
<-> <->
XML] ; XML] ;
: generate-report ( -- html ) : generate-report ( -- html )
url get dup
report-broken-pages report-broken-pages
report-network-failures report-network-failures
report-timings report-timings
[XML [XML
<h1>Broken pages</h1> <h1>Spider report</h1>
URL: <a href=<->><-></a>
<h2>Broken pages</h2>
<-> <->
<h1>Network failures</h1> <h2>Network failures</h2>
<-> <->
<h1>Load times</h1> <h2>Load times</h2>
<-> <->
XML] ; XML] ;
: spider-report ( spider -- html ) : spider-report ( spider -- html )
[ spidered>> process-results generate-report ] with-scope ; [ "Spider report" f ] dip
[
[ base>> url set ]
[ spidered>> process-results ] bi
generate-report
] with-scope
simple-page ;