Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2009-04-06 15:52:53 -07:00
commit db280b6323
13 changed files with 131 additions and 43 deletions

View File

@ -21,12 +21,12 @@ CONSTANT: epsilon T{ tagged-epsilon { tag t } }
TUPLE: concatenation first second ;
: <concatenation> ( seq -- concatenation )
[ epsilon ] [ unclip [ concatenation boa ] reduce ] if-empty ;
[ epsilon ] [ [ ] [ concatenation boa ] map-reduce ] if-empty ;
TUPLE: alternation first second ;
: <alternation> ( seq -- alternation )
unclip [ alternation boa ] reduce ;
[ ] [ alternation boa ] map-reduce ;
TUPLE: star term ;
C: <star> star

View File

@ -11,12 +11,7 @@ TUPLE: transition-table transitions start-state final-states ;
H{ } clone >>transitions
H{ } clone >>final-states ;
: maybe-initialize-key ( key hashtable -- )
! Why do we have to do this?
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
:: (set-transition) ( from to obj hash -- )
to condition? [ to hash maybe-initialize-key ] unless
from hash at
[ [ to obj ] dip set-at ]
[ to obj associate from hash set-at ] if* ;
@ -25,7 +20,6 @@ TUPLE: transition-table transitions start-state final-states ;
transitions>> (set-transition) ;
:: (add-transition) ( from to obj hash -- )
to hash maybe-initialize-key
from hash at
[ [ to obj ] dip push-at ]
[ to 1vector obj associate from hash set-at ] if* ;

View File

@ -452,6 +452,7 @@ editor "caret-motion" f {
editor "selection" f {
{ T{ button-down f { S+ } 1 } extend-selection }
{ T{ button-up f { S+ } 1 } com-copy-selection }
{ T{ drag } drag-selection }
{ gain-focus focus-editor }
{ lose-focus unfocus-editor }

View File

@ -42,7 +42,7 @@ GENERIC: set-where ( loc defspec -- )
GENERIC: forget* ( defspec -- )
M: object forget* drop ;
M: f forget* drop ;
SYMBOL: forgotten-definitions
@ -53,8 +53,6 @@ SYMBOL: forgotten-definitions
: forget-all ( definitions -- ) [ forget ] each ;
GENERIC: synopsis* ( defspec -- )
GENERIC: definer ( defspec -- start end )
GENERIC: definition ( defspec -- seq )

View File

@ -0,0 +1,15 @@
USING: tools.deploy.config ;
H{
{ deploy-name "benchmark.fib6" }
{ deploy-threads? f }
{ deploy-math? f }
{ deploy-word-props? f }
{ deploy-ui? f }
{ deploy-io 1 }
{ deploy-compiler? t }
{ deploy-reflection 1 }
{ "stop-after-last-window?" t }
{ deploy-unicode? f }
{ deploy-word-defs? f }
{ deploy-c-types? f }
}

View File

@ -3,11 +3,21 @@
USING: accessors http.client kernel unicode.categories
sequences urls splitting combinators splitting.monotonic
combinators.short-circuit assocs unicode.case arrays
math.parser calendar.format make ;
math.parser calendar.format make fry present globs
multiline regexp.combinators regexp ;
IN: robots
! visit-time is GMT, request-rate is pages/second
! crawl-rate is seconds
TUPLE: robots site sitemap rules rules-quot ;
: <robots> ( site sitemap rules -- robots )
\ robots new
swap >>rules
swap >>sitemap
swap >>site ;
TUPLE: rules user-agents allows disallows
visit-time request-rate crawl-delay unknowns ;
@ -40,8 +50,8 @@ visit-time request-rate crawl-delay unknowns ;
H{ } clone >>unknowns ;
: add-user-agent ( rules agent -- rules ) over user-agents>> push ;
: add-allow ( rules allow -- rules ) over allows>> push ;
: add-disallow ( rules disallow -- rules ) over disallows>> push ;
: add-allow ( rules allow -- rules ) >url over allows>> push ;
: add-disallow ( rules disallow -- rules ) >url over disallows>> push ;
: parse-robots.txt-line ( rules seq -- rules )
first2 swap {
@ -57,12 +67,26 @@ visit-time request-rate crawl-delay unknowns ;
[ pick unknowns>> push-at ]
} case ;
: derive-urls ( url seq -- seq' )
[ derive-url present ] with { } map-as ;
: robot-rules-quot ( robots -- quot )
[
[ site>> ] [ rules>> allows>> ] bi
derive-urls [ <glob> ] map
<or>
] [
[ site>> ] [ rules>> disallows>> ] bi
derive-urls [ <glob> ] map <and> <not>
] bi 2array <or> '[ _ matches? ] ;
PRIVATE>
: parse-robots.txt ( string -- sitemaps rules-seq )
normalize-robots.txt [
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
] map ;
] map first ;
: robots ( url -- sitemaps rules-seq )
get-robots.txt nip parse-robots.txt ;
: robots ( url -- robots )
>url
dup get-robots.txt nip parse-robots.txt <robots> ;

View File

@ -47,9 +47,26 @@ watching-site "WATCHING_SITE" {
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
} define-persistent
TUPLE: reporting-site email url up? changed? last-up? error last-error ;
TUPLE: spidering-site < watching-site max-depth max-count ;
<PRIVATE
SLOT: site
M: watching-site site>>
site-id>> site new swap >>site-id select-tuple ;
SLOT: account
M: watching-site account>>
account-name>> account new swap >>account-name select-tuple ;
spidering-site "SPIDERING_SITE" {
{ "account-name" "ACCOUNT_NAME" VARCHAR +user-assigned-id+ }
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
{ "max-depth" "MAX_DEPTH" INTEGER }
{ "max-count" "MAX_COUNT" INTEGER }
} define-persistent
TUPLE: reporting-site site-id email url up? changed? last-up? error last-error ;
: set-notify-site-watchers ( site new-up? -- site )
[ over up?>> = [ t >>changed? ] unless ] keep >>up? ;
@ -82,8 +99,6 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ;
: select-account/site ( username url -- account site )
insert-site site-id>> ;
PRIVATE>
: watch-site ( username url -- )
select-account/site <watching-site> insert-tuple ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,14 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: smtp namespaces accessors kernel arrays ;
IN: site-watcher.email
SYMBOL: site-watcher-from
site-watcher-from [ "factor-site-watcher@gmail.com" ] initialize
: send-site-email ( watching-site body subject -- )
[ account>> email>> ] 2dip
pick [
[ <email> site-watcher-from get >>from ] 3dip
[ 1array >>to ] [ >>body ] [ >>subject ] tri* send-email
] [ 3drop ] if ;

View File

@ -3,13 +3,9 @@
USING: accessors alarms arrays calendar combinators
combinators.smart continuations debugger http.client fry
init io.streams.string kernel locals math math.parser db
namespaces sequences site-watcher.db site-watcher.db.private
smtp ;
namespaces sequences site-watcher.db site-watcher.email ;
IN: site-watcher
SYMBOL: site-watcher-from
"factor-site-watcher@gmail.com" site-watcher-from set-global
SYMBOL: site-watcher-frequency
5 minutes site-watcher-frequency set-global
@ -23,22 +19,19 @@ SYMBOL: running-site-watcher
[ dup url>> http-get 2drop site-good ] [ site-bad ] recover
] each ;
: site-up-email ( email site -- email )
: site-up-email ( site -- body )
last-up>> now swap time- duration>minutes 60 /mod
[ >integer number>string ] bi@
[ " hours, " append ] [ " minutes" append ] bi* append
"Site was down for (at least): " prepend >>body ;
"Site was down for (at least): " prepend ;
: site-down-email ( email site -- email ) error>> >>body ;
: site-down-email ( site -- body ) error>> ;
: send-report ( site -- )
[ <email> ] dip
{
[ email>> 1array >>to ]
[ drop site-watcher-from get "factor.site.watcher@gmail.com" or >>from ]
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue >>subject ]
} cleave send-email ;
[ ]
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue ] tri
send-site-email ;
: send-reports ( seq -- )
[ ] [ [ send-report ] each ] if-empty ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,22 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: site-watcher.db site-watcher.email
spider spider.report
accessors kernel sequences
xml.writer ;
IN: site-watcher.spider
: <site-spider> ( spidering-site -- spider )
[ max-depth>> ]
[ max-count>> ]
[ site>> url>> ]
tri
<spider>
swap >>max-count
swap >>max-depth ;
: spider-and-email ( spidering-site -- )
[ ]
[ <site-spider> run-spider spider-report xml>string ]
[ site>> url>> "Spidered " prefix ] tri
send-site-email ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators kernel math
math.statistics namespaces sequences sorting xml.syntax
spider ;
spider urls html ;
IN: spider.report
SYMBOL: network-failures
@ -87,27 +87,37 @@ SYMBOL: time-std
slowest-pages-table
timing-summary-table
[XML
<h2>Slowest pages</h2>
<h3>Slowest pages</h3>
<->
<h2>Summary</h2>
<h3>Summary</h3>
<->
XML] ;
: generate-report ( -- html )
url get dup
report-broken-pages
report-network-failures
report-timings
[XML
<h1>Broken pages</h1>
<h1>Spider report</h1>
URL: <a href=<->><-></a>
<h2>Broken pages</h2>
<->
<h1>Network failures</h1>
<h2>Network failures</h2>
<->
<h1>Load times</h1>
<h2>Load times</h2>
<->
XML] ;
: spider-report ( spider -- html )
[ spidered>> process-results generate-report ] with-scope ;
[ "Spider report" f ] dip
[
[ base>> url set ]
[ spidered>> process-results ] bi
generate-report
] with-scope
simple-page ;