Merge branch 'master' of git://factorcode.org/git/factor
commit
db280b6323
|
@ -21,12 +21,12 @@ CONSTANT: epsilon T{ tagged-epsilon { tag t } }
|
||||||
TUPLE: concatenation first second ;
|
TUPLE: concatenation first second ;
|
||||||
|
|
||||||
: <concatenation> ( seq -- concatenation )
|
: <concatenation> ( seq -- concatenation )
|
||||||
[ epsilon ] [ unclip [ concatenation boa ] reduce ] if-empty ;
|
[ epsilon ] [ [ ] [ concatenation boa ] map-reduce ] if-empty ;
|
||||||
|
|
||||||
TUPLE: alternation first second ;
|
TUPLE: alternation first second ;
|
||||||
|
|
||||||
: <alternation> ( seq -- alternation )
|
: <alternation> ( seq -- alternation )
|
||||||
unclip [ alternation boa ] reduce ;
|
[ ] [ alternation boa ] map-reduce ;
|
||||||
|
|
||||||
TUPLE: star term ;
|
TUPLE: star term ;
|
||||||
C: <star> star
|
C: <star> star
|
||||||
|
|
|
@ -11,12 +11,7 @@ TUPLE: transition-table transitions start-state final-states ;
|
||||||
H{ } clone >>transitions
|
H{ } clone >>transitions
|
||||||
H{ } clone >>final-states ;
|
H{ } clone >>final-states ;
|
||||||
|
|
||||||
: maybe-initialize-key ( key hashtable -- )
|
|
||||||
! Why do we have to do this?
|
|
||||||
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
|
||||||
|
|
||||||
:: (set-transition) ( from to obj hash -- )
|
:: (set-transition) ( from to obj hash -- )
|
||||||
to condition? [ to hash maybe-initialize-key ] unless
|
|
||||||
from hash at
|
from hash at
|
||||||
[ [ to obj ] dip set-at ]
|
[ [ to obj ] dip set-at ]
|
||||||
[ to obj associate from hash set-at ] if* ;
|
[ to obj associate from hash set-at ] if* ;
|
||||||
|
@ -25,7 +20,6 @@ TUPLE: transition-table transitions start-state final-states ;
|
||||||
transitions>> (set-transition) ;
|
transitions>> (set-transition) ;
|
||||||
|
|
||||||
:: (add-transition) ( from to obj hash -- )
|
:: (add-transition) ( from to obj hash -- )
|
||||||
to hash maybe-initialize-key
|
|
||||||
from hash at
|
from hash at
|
||||||
[ [ to obj ] dip push-at ]
|
[ [ to obj ] dip push-at ]
|
||||||
[ to 1vector obj associate from hash set-at ] if* ;
|
[ to 1vector obj associate from hash set-at ] if* ;
|
||||||
|
|
|
@ -452,6 +452,7 @@ editor "caret-motion" f {
|
||||||
|
|
||||||
editor "selection" f {
|
editor "selection" f {
|
||||||
{ T{ button-down f { S+ } 1 } extend-selection }
|
{ T{ button-down f { S+ } 1 } extend-selection }
|
||||||
|
{ T{ button-up f { S+ } 1 } com-copy-selection }
|
||||||
{ T{ drag } drag-selection }
|
{ T{ drag } drag-selection }
|
||||||
{ gain-focus focus-editor }
|
{ gain-focus focus-editor }
|
||||||
{ lose-focus unfocus-editor }
|
{ lose-focus unfocus-editor }
|
||||||
|
|
|
@ -42,7 +42,7 @@ GENERIC: set-where ( loc defspec -- )
|
||||||
|
|
||||||
GENERIC: forget* ( defspec -- )
|
GENERIC: forget* ( defspec -- )
|
||||||
|
|
||||||
M: object forget* drop ;
|
M: f forget* drop ;
|
||||||
|
|
||||||
SYMBOL: forgotten-definitions
|
SYMBOL: forgotten-definitions
|
||||||
|
|
||||||
|
@ -53,8 +53,6 @@ SYMBOL: forgotten-definitions
|
||||||
|
|
||||||
: forget-all ( definitions -- ) [ forget ] each ;
|
: forget-all ( definitions -- ) [ forget ] each ;
|
||||||
|
|
||||||
GENERIC: synopsis* ( defspec -- )
|
|
||||||
|
|
||||||
GENERIC: definer ( defspec -- start end )
|
GENERIC: definer ( defspec -- start end )
|
||||||
|
|
||||||
GENERIC: definition ( defspec -- seq )
|
GENERIC: definition ( defspec -- seq )
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
USING: tools.deploy.config ;
|
||||||
|
H{
|
||||||
|
{ deploy-name "benchmark.fib6" }
|
||||||
|
{ deploy-threads? f }
|
||||||
|
{ deploy-math? f }
|
||||||
|
{ deploy-word-props? f }
|
||||||
|
{ deploy-ui? f }
|
||||||
|
{ deploy-io 1 }
|
||||||
|
{ deploy-compiler? t }
|
||||||
|
{ deploy-reflection 1 }
|
||||||
|
{ "stop-after-last-window?" t }
|
||||||
|
{ deploy-unicode? f }
|
||||||
|
{ deploy-word-defs? f }
|
||||||
|
{ deploy-c-types? f }
|
||||||
|
}
|
|
@ -3,11 +3,21 @@
|
||||||
USING: accessors http.client kernel unicode.categories
|
USING: accessors http.client kernel unicode.categories
|
||||||
sequences urls splitting combinators splitting.monotonic
|
sequences urls splitting combinators splitting.monotonic
|
||||||
combinators.short-circuit assocs unicode.case arrays
|
combinators.short-circuit assocs unicode.case arrays
|
||||||
math.parser calendar.format make ;
|
math.parser calendar.format make fry present globs
|
||||||
|
multiline regexp.combinators regexp ;
|
||||||
IN: robots
|
IN: robots
|
||||||
|
|
||||||
! visit-time is GMT, request-rate is pages/second
|
! visit-time is GMT, request-rate is pages/second
|
||||||
! crawl-rate is seconds
|
! crawl-rate is seconds
|
||||||
|
|
||||||
|
TUPLE: robots site sitemap rules rules-quot ;
|
||||||
|
|
||||||
|
: <robots> ( site sitemap rules -- robots )
|
||||||
|
\ robots new
|
||||||
|
swap >>rules
|
||||||
|
swap >>sitemap
|
||||||
|
swap >>site ;
|
||||||
|
|
||||||
TUPLE: rules user-agents allows disallows
|
TUPLE: rules user-agents allows disallows
|
||||||
visit-time request-rate crawl-delay unknowns ;
|
visit-time request-rate crawl-delay unknowns ;
|
||||||
|
|
||||||
|
@ -40,8 +50,8 @@ visit-time request-rate crawl-delay unknowns ;
|
||||||
H{ } clone >>unknowns ;
|
H{ } clone >>unknowns ;
|
||||||
|
|
||||||
: add-user-agent ( rules agent -- rules ) over user-agents>> push ;
|
: add-user-agent ( rules agent -- rules ) over user-agents>> push ;
|
||||||
: add-allow ( rules allow -- rules ) over allows>> push ;
|
: add-allow ( rules allow -- rules ) >url over allows>> push ;
|
||||||
: add-disallow ( rules disallow -- rules ) over disallows>> push ;
|
: add-disallow ( rules disallow -- rules ) >url over disallows>> push ;
|
||||||
|
|
||||||
: parse-robots.txt-line ( rules seq -- rules )
|
: parse-robots.txt-line ( rules seq -- rules )
|
||||||
first2 swap {
|
first2 swap {
|
||||||
|
@ -57,12 +67,26 @@ visit-time request-rate crawl-delay unknowns ;
|
||||||
[ pick unknowns>> push-at ]
|
[ pick unknowns>> push-at ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
: derive-urls ( url seq -- seq' )
|
||||||
|
[ derive-url present ] with { } map-as ;
|
||||||
|
|
||||||
|
: robot-rules-quot ( robots -- quot )
|
||||||
|
[
|
||||||
|
[ site>> ] [ rules>> allows>> ] bi
|
||||||
|
derive-urls [ <glob> ] map
|
||||||
|
<or>
|
||||||
|
] [
|
||||||
|
[ site>> ] [ rules>> disallows>> ] bi
|
||||||
|
derive-urls [ <glob> ] map <and> <not>
|
||||||
|
] bi 2array <or> '[ _ matches? ] ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: parse-robots.txt ( string -- sitemaps rules-seq )
|
: parse-robots.txt ( string -- sitemaps rules-seq )
|
||||||
normalize-robots.txt [
|
normalize-robots.txt [
|
||||||
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
|
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
|
||||||
] map ;
|
] map first ;
|
||||||
|
|
||||||
: robots ( url -- sitemaps rules-seq )
|
: robots ( url -- robots )
|
||||||
get-robots.txt nip parse-robots.txt ;
|
>url
|
||||||
|
dup get-robots.txt nip parse-robots.txt <robots> ;
|
||||||
|
|
|
@ -47,9 +47,26 @@ watching-site "WATCHING_SITE" {
|
||||||
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
|
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
|
||||||
} define-persistent
|
} define-persistent
|
||||||
|
|
||||||
TUPLE: reporting-site email url up? changed? last-up? error last-error ;
|
TUPLE: spidering-site < watching-site max-depth max-count ;
|
||||||
|
|
||||||
<PRIVATE
|
SLOT: site
|
||||||
|
|
||||||
|
M: watching-site site>>
|
||||||
|
site-id>> site new swap >>site-id select-tuple ;
|
||||||
|
|
||||||
|
SLOT: account
|
||||||
|
|
||||||
|
M: watching-site account>>
|
||||||
|
account-name>> account new swap >>account-name select-tuple ;
|
||||||
|
|
||||||
|
spidering-site "SPIDERING_SITE" {
|
||||||
|
{ "account-name" "ACCOUNT_NAME" VARCHAR +user-assigned-id+ }
|
||||||
|
{ "site-id" "SITE_ID" INTEGER +user-assigned-id+ }
|
||||||
|
{ "max-depth" "MAX_DEPTH" INTEGER }
|
||||||
|
{ "max-count" "MAX_COUNT" INTEGER }
|
||||||
|
} define-persistent
|
||||||
|
|
||||||
|
TUPLE: reporting-site site-id email url up? changed? last-up? error last-error ;
|
||||||
|
|
||||||
: set-notify-site-watchers ( site new-up? -- site )
|
: set-notify-site-watchers ( site new-up? -- site )
|
||||||
[ over up?>> = [ t >>changed? ] unless ] keep >>up? ;
|
[ over up?>> = [ t >>changed? ] unless ] keep >>up? ;
|
||||||
|
@ -82,8 +99,6 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ;
|
||||||
: select-account/site ( username url -- account site )
|
: select-account/site ( username url -- account site )
|
||||||
insert-site site-id>> ;
|
insert-site site-id>> ;
|
||||||
|
|
||||||
PRIVATE>
|
|
||||||
|
|
||||||
: watch-site ( username url -- )
|
: watch-site ( username url -- )
|
||||||
select-account/site <watching-site> insert-tuple ;
|
select-account/site <watching-site> insert-tuple ;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Slava Pestov
|
|
@ -0,0 +1,14 @@
|
||||||
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: smtp namespaces accessors kernel arrays ;
|
||||||
|
IN: site-watcher.email
|
||||||
|
|
||||||
|
SYMBOL: site-watcher-from
|
||||||
|
site-watcher-from [ "factor-site-watcher@gmail.com" ] initialize
|
||||||
|
|
||||||
|
: send-site-email ( watching-site body subject -- )
|
||||||
|
[ account>> email>> ] 2dip
|
||||||
|
pick [
|
||||||
|
[ <email> site-watcher-from get >>from ] 3dip
|
||||||
|
[ 1array >>to ] [ >>body ] [ >>subject ] tri* send-email
|
||||||
|
] [ 3drop ] if ;
|
|
@ -3,13 +3,9 @@
|
||||||
USING: accessors alarms arrays calendar combinators
|
USING: accessors alarms arrays calendar combinators
|
||||||
combinators.smart continuations debugger http.client fry
|
combinators.smart continuations debugger http.client fry
|
||||||
init io.streams.string kernel locals math math.parser db
|
init io.streams.string kernel locals math math.parser db
|
||||||
namespaces sequences site-watcher.db site-watcher.db.private
|
namespaces sequences site-watcher.db site-watcher.email ;
|
||||||
smtp ;
|
|
||||||
IN: site-watcher
|
IN: site-watcher
|
||||||
|
|
||||||
SYMBOL: site-watcher-from
|
|
||||||
"factor-site-watcher@gmail.com" site-watcher-from set-global
|
|
||||||
|
|
||||||
SYMBOL: site-watcher-frequency
|
SYMBOL: site-watcher-frequency
|
||||||
5 minutes site-watcher-frequency set-global
|
5 minutes site-watcher-frequency set-global
|
||||||
|
|
||||||
|
@ -23,22 +19,19 @@ SYMBOL: running-site-watcher
|
||||||
[ dup url>> http-get 2drop site-good ] [ site-bad ] recover
|
[ dup url>> http-get 2drop site-good ] [ site-bad ] recover
|
||||||
] each ;
|
] each ;
|
||||||
|
|
||||||
: site-up-email ( email site -- email )
|
: site-up-email ( site -- body )
|
||||||
last-up>> now swap time- duration>minutes 60 /mod
|
last-up>> now swap time- duration>minutes 60 /mod
|
||||||
[ >integer number>string ] bi@
|
[ >integer number>string ] bi@
|
||||||
[ " hours, " append ] [ " minutes" append ] bi* append
|
[ " hours, " append ] [ " minutes" append ] bi* append
|
||||||
"Site was down for (at least): " prepend >>body ;
|
"Site was down for (at least): " prepend ;
|
||||||
|
|
||||||
: site-down-email ( email site -- email ) error>> >>body ;
|
: site-down-email ( site -- body ) error>> ;
|
||||||
|
|
||||||
: send-report ( site -- )
|
: send-report ( site -- )
|
||||||
[ <email> ] dip
|
[ ]
|
||||||
{
|
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
|
||||||
[ email>> 1array >>to ]
|
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue ] tri
|
||||||
[ drop site-watcher-from get "factor.site.watcher@gmail.com" or >>from ]
|
send-site-email ;
|
||||||
[ dup up?>> [ site-up-email ] [ site-down-email ] if ]
|
|
||||||
[ [ url>> ] [ up?>> "up" "down" ? ] bi " is " glue >>subject ]
|
|
||||||
} cleave send-email ;
|
|
||||||
|
|
||||||
: send-reports ( seq -- )
|
: send-reports ( seq -- )
|
||||||
[ ] [ [ send-report ] each ] if-empty ;
|
[ ] [ [ send-report ] each ] if-empty ;
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Slava Pestov
|
|
@ -0,0 +1,22 @@
|
||||||
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: site-watcher.db site-watcher.email
|
||||||
|
spider spider.report
|
||||||
|
accessors kernel sequences
|
||||||
|
xml.writer ;
|
||||||
|
IN: site-watcher.spider
|
||||||
|
|
||||||
|
: <site-spider> ( spidering-site -- spider )
|
||||||
|
[ max-depth>> ]
|
||||||
|
[ max-count>> ]
|
||||||
|
[ site>> url>> ]
|
||||||
|
tri
|
||||||
|
<spider>
|
||||||
|
swap >>max-count
|
||||||
|
swap >>max-depth ;
|
||||||
|
|
||||||
|
: spider-and-email ( spidering-site -- )
|
||||||
|
[ ]
|
||||||
|
[ <site-spider> run-spider spider-report xml>string ]
|
||||||
|
[ site>> url>> "Spidered " prefix ] tri
|
||||||
|
send-site-email ;
|
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators kernel math
|
USING: accessors arrays assocs combinators kernel math
|
||||||
math.statistics namespaces sequences sorting xml.syntax
|
math.statistics namespaces sequences sorting xml.syntax
|
||||||
spider ;
|
spider urls html ;
|
||||||
IN: spider.report
|
IN: spider.report
|
||||||
|
|
||||||
SYMBOL: network-failures
|
SYMBOL: network-failures
|
||||||
|
@ -87,27 +87,37 @@ SYMBOL: time-std
|
||||||
slowest-pages-table
|
slowest-pages-table
|
||||||
timing-summary-table
|
timing-summary-table
|
||||||
[XML
|
[XML
|
||||||
<h2>Slowest pages</h2>
|
<h3>Slowest pages</h3>
|
||||||
<->
|
<->
|
||||||
|
|
||||||
<h2>Summary</h2>
|
<h3>Summary</h3>
|
||||||
<->
|
<->
|
||||||
XML] ;
|
XML] ;
|
||||||
|
|
||||||
: generate-report ( -- html )
|
: generate-report ( -- html )
|
||||||
|
url get dup
|
||||||
report-broken-pages
|
report-broken-pages
|
||||||
report-network-failures
|
report-network-failures
|
||||||
report-timings
|
report-timings
|
||||||
[XML
|
[XML
|
||||||
<h1>Broken pages</h1>
|
<h1>Spider report</h1>
|
||||||
|
URL: <a href=<->><-></a>
|
||||||
|
|
||||||
|
<h2>Broken pages</h2>
|
||||||
<->
|
<->
|
||||||
|
|
||||||
<h1>Network failures</h1>
|
<h2>Network failures</h2>
|
||||||
<->
|
<->
|
||||||
|
|
||||||
<h1>Load times</h1>
|
<h2>Load times</h2>
|
||||||
<->
|
<->
|
||||||
XML] ;
|
XML] ;
|
||||||
|
|
||||||
: spider-report ( spider -- html )
|
: spider-report ( spider -- html )
|
||||||
[ spidered>> process-results generate-report ] with-scope ;
|
[ "Spider report" f ] dip
|
||||||
|
[
|
||||||
|
[ base>> url set ]
|
||||||
|
[ spidered>> process-results ] bi
|
||||||
|
generate-report
|
||||||
|
] with-scope
|
||||||
|
simple-page ;
|
||||||
|
|
Loading…
Reference in New Issue