allow robot-identifiers to be set for robots.txt
parent
9c45840b5d
commit
1e14a83ee1
|
@ -1,15 +1,18 @@
|
||||||
! Copyright (C) 2009 Doug Coleman.
|
! Copyright (C) 2009 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors http.client kernel unicode.categories
|
USING: accessors arrays assocs calendar.format combinators
|
||||||
sequences urls splitting combinators splitting.monotonic
|
combinators.short-circuit fry globs http.client kernel make
|
||||||
combinators.short-circuit assocs unicode.case arrays
|
math.parser multiline namespaces present regexp
|
||||||
math.parser calendar.format make fry present globs
|
regexp.combinators sequences sets splitting splitting.monotonic
|
||||||
multiline regexp.combinators regexp ;
|
unicode.case unicode.categories urls ;
|
||||||
IN: robots
|
IN: robots
|
||||||
|
|
||||||
! visit-time is GMT, request-rate is pages/second
|
! visit-time is GMT, request-rate is pages/second
|
||||||
! crawl-rate is seconds
|
! crawl-rate is seconds
|
||||||
|
|
||||||
|
SYMBOL: robot-identities
|
||||||
|
robot-identities [ { "FactorSpider" } ] initialize
|
||||||
|
|
||||||
TUPLE: robots site sitemap rules rules-quot ;
|
TUPLE: robots site sitemap rules rules-quot ;
|
||||||
|
|
||||||
: <robots> ( site sitemap rules -- robots )
|
: <robots> ( site sitemap rules -- robots )
|
||||||
|
@ -80,6 +83,13 @@ visit-time request-rate crawl-delay unknowns ;
|
||||||
derive-urls [ <glob> ] map <and> <not>
|
derive-urls [ <glob> ] map <and> <not>
|
||||||
] bi 2array <or> '[ _ matches? ] ;
|
] bi 2array <or> '[ _ matches? ] ;
|
||||||
|
|
||||||
|
: relevant-rules ( robots -- rules )
|
||||||
|
[
|
||||||
|
user-agents>> [
|
||||||
|
robot-identities get [ swap glob-matches? ] with any?
|
||||||
|
] any?
|
||||||
|
] filter ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: parse-robots.txt ( string -- sitemaps rules-seq )
|
: parse-robots.txt ( string -- sitemaps rules-seq )
|
||||||
|
|
Loading…
Reference in New Issue