Merge branch 'master' of git://factorcode.org/git/factor
commit
db280b6323
|
@ -21,12 +21,12 @@ CONSTANT: epsilon T{ tagged-epsilon { tag t } }
|
|||
TUPLE: concatenation first second ;
|
||||
|
||||
: <concatenation> ( seq -- concatenation )
|
||||
[ epsilon ] [ unclip [ concatenation boa ] reduce ] if-empty ;
|
||||
[ epsilon ] [ [ ] [ concatenation boa ] map-reduce ] if-empty ;
|
||||
|
||||
TUPLE: alternation first second ;
|
||||
|
||||
: <alternation> ( seq -- alternation )
|
||||
unclip [ alternation boa ] reduce ;
|
||||
[ ] [ alternation boa ] map-reduce ;
|
||||
|
||||
TUPLE: star term ;
|
||||
C: <star> star
|
||||
|
|
|
@ -11,12 +11,7 @@ TUPLE: transition-table transitions start-state final-states ;
|
|||
H{ } clone >>transitions
|
||||
H{ } clone >>final-states ;
|
||||
|
||||
: maybe-initialize-key ( key hashtable -- )
|
||||
! Why do we have to do this?
|
||||
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
||||
|
||||
:: (set-transition) ( from to obj hash -- )
|
||||
to condition? [ to hash maybe-initialize-key ] unless
|
||||
from hash at
|
||||
[ [ to obj ] dip set-at ]
|
||||
[ to obj associate from hash set-at ] if* ;
|
||||
|
@ -25,7 +20,6 @@ TUPLE: transition-table transitions start-state final-states ;
|
|||
transitions>> (set-transition) ;
|
||||
|
||||
:: (add-transition) ( from to obj hash -- )
|
||||
to hash maybe-initialize-key
|
||||
from hash at
|
||||
[ [ to obj ] dip push-at ]
|
||||
[ to 1vector obj associate from hash set-at ] if* ;
|
||||
|
|
|
@ -452,6 +452,7 @@ editor "caret-motion" f {
|
|||
|
||||
editor "selection" f {
|
||||
{ T{ button-down f { S+ } 1 } extend-selection }
|
||||
{ T{ button-up f { S+ } 1 } com-copy-selection }
|
||||
{ T{ drag } drag-selection }
|
||||
{ gain-focus focus-editor }
|
||||
{ lose-focus unfocus-editor }
|
||||
|
|
|
@ -42,7 +42,7 @@ GENERIC: set-where ( loc defspec -- )
|
|||
|
||||
GENERIC: forget* ( defspec -- )
|
||||
|
||||
M: object forget* drop ;
|
||||
M: f forget* drop ;
|
||||
|
||||
SYMBOL: forgotten-definitions
|
||||
|
||||
|
@ -53,8 +53,6 @@ SYMBOL: forgotten-definitions
|
|||
|
||||
: forget-all ( definitions -- ) [ forget ] each ;
|
||||
|
||||
GENERIC: synopsis* ( defspec -- )
|
||||
|
||||
GENERIC: definer ( defspec -- start end )
|
||||
|
||||
GENERIC: definition ( defspec -- seq )
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
USING: tools.deploy.config ;
|
||||
H{
|
||||
{ deploy-name "benchmark.fib6" }
|
||||
{ deploy-threads? f }
|
||||
{ deploy-math? f }
|
||||
{ deploy-word-props? f }
|
||||
{ deploy-ui? f }
|
||||
{ deploy-io 1 }
|
||||
{ deploy-compiler? t }
|
||||
{ deploy-reflection 1 }
|
||||
{ "stop-after-last-window?" t }
|
||||
{ deploy-unicode? f }
|
||||
{ deploy-word-defs? f }
|
||||
{ deploy-c-types? f }
|
||||
}
|
|
@ -3,11 +3,21 @@
|
|||
USING: accessors http.client kernel unicode.categories
|
||||
sequences urls splitting combinators splitting.monotonic
|
||||
combinators.short-circuit assocs unicode.case arrays
|
||||
math.parser calendar.format make ;
|
||||
math.parser calendar.format make fry present globs
|
||||
multiline regexp.combinators regexp ;
|
||||
IN: robots
|
||||
|
||||
! visit-time is GMT, request-rate is pages/second
|
||||
! crawl-rate is seconds
|
||||
|
||||
TUPLE: robots site sitemap rules rules-quot ;
|
||||
|
||||
: <robots> ( site sitemap rules -- robots )
|
||||
\ robots new
|
||||
swap >>rules
|
||||
swap >>sitemap
|
||||
swap >>site ;
|
||||
|
||||
TUPLE: rules user-agents allows disallows
|
||||
visit-time request-rate crawl-delay unknowns ;
|
||||
|
||||
|
@ -40,8 +50,8 @@ visit-time request-rate crawl-delay unknowns ;
|
|||
H{ } clone >>unknowns ;
|
||||
|
||||
: add-user-agent ( rules agent -- rules ) over user-agents>> push ;
|
||||
: add-allow ( rules allow -- rules ) over allows>> push ;
|
||||
: add-disallow ( rules disallow -- rules ) over disallows>> push ;
|
||||
: add-allow ( rules allow -- rules ) >url over allows>> push ;
|
||||
: add-disallow ( rules disallow -- rules ) >url over disallows>> push ;
|
||||
|
||||
: parse-robots.txt-line ( rules seq -- rules )
|
||||
first2 swap {
|
||||
|
@ -57,12 +67,26 @@ visit-time request-rate crawl-delay unknowns ;
|
|||
[ pick unknowns>> push-at ]
|
||||
} case ;
|
||||
|
||||
: derive-urls ( url seq -- seq' )
|
||||
[ derive-url present ] with { } map-as ;
|
||||
|
||||
: robot-rules-quot ( robots -- quot )
|
||||
[
|
||||
[ site>> ] [ rules>> allows>> ] bi
|
||||
derive-urls [ <glob> ] map
|
||||
<or>
|
||||
] [
|
||||
[ site>> ] [ rules>> disallows>> ] bi
|
||||
derive-urls [ <glob> ] map <and> <not>
|
||||
] bi 2array <or> '[ _ matches? ] ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: parse-robots.txt ( string -- sitemaps rules-seq )
|
||||
normalize-robots.txt [
|
||||
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
|
||||
] map ;
|
||||
] map first ;
|
||||
|
||||
: robots ( url -- sitemaps rules-seq )
|
||||
get-robots.txt nip parse-robots.txt ;
|
||||
: robots ( url -- robots )
|
||||
>url
|
||||
dup get-robots.txt nip parse-robots.txt <robots> ;
|
||||
|
|
|
@ -47,9 +47,26 @@ watching-site "WATCHING_SITE" {
|
|||
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
|
||||
} define-persistent
|
||||
|
||||
TUPLE: reporting-site email url up? changed? last-up? error last-error ;
|
||||
TUPLE: spidering-site < watching-site max-depth max-count ;
|
||||
|
||||
<PRIVATE
|
||||
SLOT: site
|
||||
|
||||
M: watching-site site>>
|
||||
site-id>> site new swap >>site-id select-tuple ;
|
||||
|
||||
SLOT: account
|
||||
|
||||
M: watching-site account>>
|
||||
account-name>> account new swap >>account-name select-tuple ;
|
||||
|
||||
spidering-site "SPIDERING_SITE" {
|
||||
{ "account-name" "ACCOUNT_NAME" VARCHAR +user-assigned-id+ }
|
||||
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
|
||||
{ "max-depth" "MAX_DEPTH" INTEGER }
|
||||
{ "max-count" "MAX_COUNT" INTEGER }
|
||||
} define-persistent
|
||||
|
||||
TUPLE: reporting-site site-id email url up? changed? last-up? error last-error ;
|
||||
|
||||
: set-notify-site-watchers ( site new-up? -- site )
|
||||
[ over up?>> = [ t >>changed? ] unless ] keep >>up? ;
|
||||
|
@ -82,8 +99,6 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ;
|
|||
: select-account/site ( username url -- account site )
|
||||
insert-site site-id>> ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: watch-site ( username url -- )
|
||||
select-account/site <watching-site> insert-tuple ;
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,14 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: smtp namespaces accessors kernel arrays ;
|
||||
IN: site-watcher.email
|
||||
|
||||
SYMBOL: site-watcher-from
|
||||
site-watcher-from [ "factor-site-watcher@gmail.com" ] initialize
|
||||
|
||||
: send-site-email ( watching-site body subject -- )
|
||||
[ account>> email>> ] 2dip
|
||||
pick [
|
||||
[ <email> site-watcher-from get >>from ] 3dip
|
||||
[ 1array >>to ] [ >>body ] [ >>subject ] tri* send-email
|
||||
] [ 3drop ] if ;
|
|
@ -3,13 +3,9 @@
|
|||
USING: accessors alarms arrays calendar combinators
|
||||
combinators.smart continuations debugger http.client fry
|
||||
init io.streams.string kernel locals math math.parser db
|
||||
namespaces sequences site-watcher.db site-watcher.db.private
|
||||
smtp ;
|
||||
namespaces sequences site-watcher.db site-watcher.email ;
|
||||
IN: site-watcher
|
||||
|
||||
SYMBOL: site-watcher-from
|
||||
"factor-site-watcher@gmail.com" site-watcher-from set-global
|
||||
|
||||
SYMBOL: site-watcher-frequency
|
||||
5 minutes site-watcher-frequency set-global
|
||||
|
||||
|
@ -23,22 +19,19 @@ SYMBOL: running-site-watcher
|
|||
[ dup url>> http-get 2drop site-good ] [ site-bad ] recover
|
||||
] each ;
|
||||
|
||||
: site-up-email ( email site -- email )
|
||||
: site-up-email ( site -- body )
|
||||
last-up>> now swap time- duration>minutes 60 /mod
|
||||
[ >integer number>string ] bi@
|
||||
[ " hours, " append ] [ " minutes" append ] bi* append
|
||||
"Site was down for (at least): " prepend >>body ;
|
||||
"Site was down for (at least): " prepend ;
|
||||
|
||||
: site-down-email ( email site -- email ) error>> >>body ;
|
||||
: site-down-email ( site -- body ) error>> ;
|
||||
|
||||
: send-report ( site -- )
|
||||
[ <email> ] dip
|
||||
{
|
||||
[ email>> 1array >>to ]
|
||||
[ drop site-watcher-from get "factor.site.watcher@gmail.com" or >>from ]
|
||||
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
|
||||
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue >>subject ]
|
||||
} cleave send-email ;
|
||||
[ ]
|
||||
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
|
||||
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue ] tri
|
||||
send-site-email ;
|
||||
|
||||
: send-reports ( seq -- )
|
||||
[ ] [ [ send-report ] each ] if-empty ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,22 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: site-watcher.db site-watcher.email
|
||||
spider spider.report
|
||||
accessors kernel sequences
|
||||
xml.writer ;
|
||||
IN: site-watcher.spider
|
||||
|
||||
: <site-spider> ( spidering-site -- spider )
|
||||
[ max-depth>> ]
|
||||
[ max-count>> ]
|
||||
[ site>> url>> ]
|
||||
tri
|
||||
<spider>
|
||||
swap >>max-count
|
||||
swap >>max-depth ;
|
||||
|
||||
: spider-and-email ( spidering-site -- )
|
||||
[ ]
|
||||
[ <site-spider> run-spider spider-report xml>string ]
|
||||
[ site>> url>> "Spidered " prefix ] tri
|
||||
send-site-email ;
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators kernel math
|
||||
math.statistics namespaces sequences sorting xml.syntax
|
||||
spider ;
|
||||
spider urls html ;
|
||||
IN: spider.report
|
||||
|
||||
SYMBOL: network-failures
|
||||
|
@ -87,27 +87,37 @@ SYMBOL: time-std
|
|||
slowest-pages-table
|
||||
timing-summary-table
|
||||
[XML
|
||||
<h2>Slowest pages</h2>
|
||||
<h3>Slowest pages</h3>
|
||||
<->
|
||||
|
||||
<h2>Summary</h2>
|
||||
<h3>Summary</h3>
|
||||
<->
|
||||
XML] ;
|
||||
|
||||
: generate-report ( -- html )
|
||||
url get dup
|
||||
report-broken-pages
|
||||
report-network-failures
|
||||
report-timings
|
||||
[XML
|
||||
<h1>Broken pages</h1>
|
||||
<h1>Spider report</h1>
|
||||
URL: <a href=<->><-></a>
|
||||
|
||||
<h2>Broken pages</h2>
|
||||
<->
|
||||
|
||||
<h1>Network failures</h1>
|
||||
<h2>Network failures</h2>
|
||||
<->
|
||||
|
||||
<h1>Load times</h1>
|
||||
<h2>Load times</h2>
|
||||
<->
|
||||
XML] ;
|
||||
|
||||
: spider-report ( spider -- html )
|
||||
[ spidered>> process-results generate-report ] with-scope ;
|
||||
[ "Spider report" f ] dip
|
||||
[
|
||||
[ base>> url set ]
|
||||
[ spidered>> process-results ] bi
|
||||
generate-report
|
||||
] with-scope
|
||||
simple-page ;
|
||||
|
|
Loading…
Reference in New Issue