Check in spider.report

db4
Slava Pestov 2009-04-03 21:16:27 -05:00
parent 48aa95e5b1
commit 95f304bee0
2 changed files with 114 additions and 0 deletions

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,113 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators kernel math
math.statistics namespaces sequences sorting xml.syntax
spider ;
IN: spider.report
SYMBOL: network-failures
SYMBOL: broken-pages
SYMBOL: timings
: record-broken-page ( url spider-result -- )
headers>> [ code>> ] [ message>> ] bi 2array 2array
broken-pages push ;
: record-page-timings ( url spider-result -- )
fetch-time>> 2array timings get push ;
: record-network-failure ( url -- )
network-failures get push ;
: process-result ( url spider-result -- )
{
{ f [ record-network-failure ] }
[
dup headers>> code>> 200 =
[ record-page-timings ] [ record-broken-page ] if
]
} case ;
CONSTANT: slowest 5
SYMBOL: slowest-pages
SYMBOL: mean-time
SYMBOL: median-time
SYMBOL: time-std
: process-timings ( -- )
timings get sort-values
[ slowest short tail* reverse slowest-pages set ]
[
values
[ mean 1000000 /f mean-time set ]
[ median 1000000 /f median-time set ]
[ std 1000000 /f time-std set ] tri
] bi ;
: process-results ( results -- )
V{ } clone network-failures set
V{ } clone broken-pages set
V{ } clone timings set
[ process-result ] assoc-each
process-timings ;
: info-table ( alist -- html )
[
first2 dupd 1000000 /f
[XML
<tr><td><a href=<->><-></a></td><td><-> seconds</td></tr>
XML]
] map [XML <table border="1"><-></table> XML] ;
: report-broken-pages ( -- html )
broken-pages get info-table ;
: report-network-failures ( -- html )
network-failures get [
dup [XML <li><a href=<->><-></a></li> XML]
] map [XML <ul><-></ul> XML] ;
: slowest-pages-table ( -- html )
slowest-pages get info-table ;
: timing-summary-table ( -- html )
mean-time get
median-time get
time-std get
[XML
<table border="1">
<tr><th>Mean</th><td><-> seconds</td></tr>
<tr><th>Median</th><td><-> seconds</td></tr>
<tr><th>Standard deviation</th><td><-> seconds</td></tr>
</table>
XML] ;
: report-timings ( -- html )
slowest-pages-table
timing-summary-table
[XML
<h2>Slowest pages</h2>
<->
<h2>Summary</h2>
<->
XML] ;
: generate-report ( -- html )
report-broken-pages
report-network-failures
report-timings
[XML
<h1>Broken pages</h1>
<->
<h1>Network failures</h1>
<->
<h1>Load times</h1>
<->
XML] ;
: spider-report ( spider -- html )
[ spidered>> process-results generate-report ] with-scope ;