diff --git a/basis/calendar/format/format.factor b/basis/calendar/format/format.factor index 916d3499fe..c2e95f2a9e 100644 --- a/basis/calendar/format/format.factor +++ b/basis/calendar/format/format.factor @@ -46,6 +46,11 @@ IN: calendar.format : read-0000 ( -- n ) 4 read string>number ; +: hhmm>timestamp ( hhmm -- timestamp ) + [ + 0 0 0 read-00 read-00 0 instant + ] with-string-reader ; + GENERIC: day. ( obj -- ) M: integer day. ( n -- ) diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor index abe830c3fa..b344ce160f 100755 --- a/extra/html/parser/analyzer/analyzer.factor +++ b/extra/html/parser/analyzer/analyzer.factor @@ -46,7 +46,7 @@ TUPLE: link attributes clickable ; : find-between-all ( vector quot -- seq ) dupd '[ _ [ closing?>> not ] bi and ] find-all - [ first2 find-between* ] with map ; + [ first2 find-between* ] with map ; inline : remove-blank-text ( vector -- vector' ) [ @@ -113,7 +113,7 @@ TUPLE: link attributes clickable ; [ clickable>> [ bl bl text>> print ] each nl ] bi ; : find-by-text ( seq quot -- tag ) - [ dup name>> text = ] prepose find drop ; + [ dup name>> text = ] prepose find drop ; inline : find-opening-tags-by-name ( name seq -- seq ) [ [ name>> = ] [ closing?>> not ] bi and ] with find-all ; diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index c445b708c5..60e5ddbf54 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -137,7 +137,7 @@ SYMBOL: tagstack ] when ; : tag-parse ( quot -- vector ) - V{ } clone tagstack [ string-parse ] with-variable ; + V{ } clone tagstack [ string-parse ] with-variable ; inline : parse-html ( string -- vector ) [ (parse-html) tagstack get ] tag-parse ; diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index cda601866e..1b3f188a78 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -5,22 +5,22 @@ IN: html.parser.state TUPLE: state string i ; -: get-i ( -- i ) state get i>> ; +: get-i ( -- i ) state get i>> ; inline : get-char ( -- char ) - state get [ i>> ] [ string>> ] bi ?nth ; + state get [ i>> ] [ string>> ] bi ?nth ; inline : get-next ( -- char ) - state get [ i>> 1+ ] [ string>> ] bi ?nth ; + state get [ i>> 1+ ] [ string>> ] bi ?nth ; inline : next ( -- ) - state get [ 1+ ] change-i drop ; + state get [ 1+ ] change-i drop ; inline : string-parse ( string quot -- ) - [ 0 state boa state ] dip with-variable ; + [ 0 state boa state ] dip with-variable ; inline : short* ( n seq -- n' seq ) - over [ nip dup length swap ] unless ; + over [ nip dup length swap ] unless ; inline : skip-until ( quot: ( -- ? ) -- ) get-char [ @@ -30,12 +30,12 @@ TUPLE: state string i ; : take-until ( quot: ( -- ? ) -- ) get-i [ skip-until ] dip get-i - state get string>> subseq ; + state get string>> subseq ; inline : string-matches? ( string circular -- ? ) - get-char over push-growing-circular sequence= ; + get-char over push-growing-circular sequence= ; inline : take-string ( match -- string ) dup length [ 2dup string-matches? ] take-until nip - dup length rot length 1- - head next ; + dup length rot length 1- - head next ; inline diff --git a/extra/mason/build/build.factor b/extra/mason/build/build.factor index 706dc12616..90ca1d31ff 100644 --- a/extra/mason/build/build.factor +++ b/extra/mason/build/build.factor @@ -5,6 +5,8 @@ io.files io.launcher mason.child mason.cleanup mason.common mason.help mason.release mason.report namespaces prettyprint ; IN: mason.build +QUALIFIED: continuations + : create-build-dir ( -- ) now datestamp stamp set build-dir make-directory ; @@ -21,10 +23,11 @@ IN: mason.build create-build-dir enter-build-dir clone-builds-factor - record-id - build-child - upload-help - release - cleanup ; + [ + record-id + build-child + upload-help + release + ] [ cleanup ] [ ] continuations:cleanup ; -MAIN: build \ No newline at end of file +MAIN: build diff --git a/extra/robots/authors.txt b/extra/robots/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/robots/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/robots/robots-tests.factor b/extra/robots/robots-tests.factor new file mode 100644 index 0000000000..a590d9eee0 --- /dev/null +++ b/extra/robots/robots-tests.factor @@ -0,0 +1,334 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: calendar io.encodings.utf8 io.files robots tools.test ; +IN: robots.tests + +[ +{ "http://www.chiplist.com/sitemap.txt" } +{ + T{ rules + { user-agents V{ "*" } } + { allows V{ } } + { disallows + V{ + "/cgi-bin/" + "/scripts/" + "/ChipList2/scripts/" + "/ChipList2/styles/" + "/ads/" + "/ChipList2/ads/" + "/advertisements/" + "/ChipList2/advertisements/" + "/graphics/" + "/ChipList2/graphics/" + } + } + { visit-time + { + T{ timestamp { hour 2 } } + T{ timestamp { hour 5 } } + } + } + { request-rate 1 } + { crawl-delay 1 } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "UbiCrawler" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "DOC" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Zao" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "sitecheck.internetseer.com" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Zealbot" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "MSIECrawler" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "SiteSnagger" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "WebStripper" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "WebCopier" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Fetch" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Offline Explorer" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Teleport" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "TeleportPro" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "WebZIP" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "linko" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "HTTrack" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Microsoft.URL.Control" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Xenu" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "larbin" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "libwww" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "ZyBORG" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "Download Ninja" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "wget" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "grub-client" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "k2spider" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "NPBot" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents V{ "WebReaper" } } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } + T{ rules + { user-agents + V{ + "abot" + "ALeadSoftbot" + "BeijingCrawler" + "BilgiBot" + "bot" + "botlist" + "BOTW Spider" + "bumblebee" + "Bumblebee" + "BuzzRankingBot" + "Charlotte" + "Clushbot" + "Crawler" + "CydralSpider" + "DataFountains" + "DiamondBot" + "Dulance bot" + "DYNAMIC" + "EARTHCOM.info" + "EDI" + "envolk" + "Exabot" + "Exabot-Images" + "Exabot-Test" + "exactseek-pagereaper" + "Exalead NG" + "FANGCrawl" + "Feed::Find" + "flatlandbot" + "Gigabot" + "GigabotSiteSearch" + "GurujiBot" + "Hatena Antenna" + "Hatena Bookmark" + "Hatena RSS" + "HatenaScreenshot" + "Helix" + "HiddenMarket" + "HyperEstraier" + "iaskspider" + "IIITBOT" + "InfociousBot" + "iVia" + "iVia Page Fetcher" + "Jetbot" + "Kolinka Forum Search" + "KRetrieve" + "LetsCrawl.com" + "Lincoln State Web Browser" + "Links4US-Crawler" + "LOOQ" + "Lsearch/sondeur" + "MapoftheInternet.com" + "NationalDirectory" + "NetCarta_WebMapper" + "NewsGator" + "NextGenSearchBot" + "ng" + "nicebot" + "NP" + "NPBot" + "Nudelsalat" + "Nutch" + "OmniExplorer_Bot" + "OpenIntelligenceData" + "Oracle Enterprise Search" + "Pajaczek" + "panscient.com" + "PeerFactor 404 crawler" + "PeerFactor Crawler" + "PlantyNet" + "PlantyNet_WebRobot" + "plinki" + "PMAFind" + "Pogodak!" + "QuickFinder Crawler" + "Radiation Retriever" + "Reaper" + "RedCarpet" + "ScorpionBot" + "Scrubby" + "Scumbot" + "searchbot" + "Seeker.lookseek.com" + "SeznamBot" + "ShowXML" + "snap.com" + "snap.com beta crawler" + "Snapbot" + "SnapPreviewBot" + "sohu" + "SpankBot" + "Speedy Spider" + "Speedy_Spider" + "SpeedySpider" + "spider" + "SquigglebotBot" + "SurveyBot" + "SynapticSearch" + "T-H-U-N-D-E-R-S-T-O-N-E" + "Talkro Web-Shot" + "Tarantula" + "TerrawizBot" + "TheInformant" + "TMCrawler" + "TridentSpider" + "Tutorial Crawler" + "Twiceler" + "unwrapbot" + "URI::Fetch" + "VengaBot" + "Vonna.com b o t" + "Vortex" + "Votay bot" + "WebAlta Crawler" + "Webbot" + "Webclipping.com" + "WebCorp" + "Webinator" + "WIRE" + "WISEbot" + "Xerka WebBot" + "XSpider" + "YodaoBot" + "Yoono" + "yoono" + } + } + { allows V{ } } + { disallows V{ "/" } } + { unknowns H{ } } + } +} +] [ "vocab:robots/robots.txt" utf8 file-contents parse-robots.txt ] unit-test diff --git a/extra/robots/robots.factor b/extra/robots/robots.factor new file mode 100644 index 0000000000..1b2422f06e --- /dev/null +++ b/extra/robots/robots.factor @@ -0,0 +1,68 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors http.client kernel unicode.categories +sequences urls splitting combinators splitting.monotonic +combinators.short-circuit assocs unicode.case arrays +math.parser calendar.format make ; +IN: robots + +! visit-time is GMT, request-rate is pages/second +! crawl-rate is seconds +TUPLE: rules user-agents allows disallows +visit-time request-rate crawl-delay unknowns ; + +robots.txt-url ( url -- url' ) + >url URL" robots.txt" derive-url ; + +: get-robots.txt ( url -- headers robots.txt ) + >robots.txt-url http-get ; + +: normalize-robots.txt ( string -- sitemaps seq ) + string-lines + [ [ blank? ] trim ] map + [ "#" head? not ] filter harvest + [ ":" split1 [ [ blank? ] trim ] bi@ [ >lower ] dip ] { } map>assoc + [ first "sitemap" = ] partition [ values ] dip + [ + { + [ [ first "user-agent" = ] bi@ and ] + [ nip first "user-agent" = not ] + } 2|| + ] monotonic-split ; + +: ( -- rules ) + rules new + V{ } clone >>user-agents + V{ } clone >>allows + V{ } clone >>disallows + H{ } clone >>unknowns ; + +: add-user-agent ( rules agent -- rules ) over user-agents>> push ; +: add-allow ( rules allow -- rules ) over allows>> push ; +: add-disallow ( rules disallow -- rules ) over disallows>> push ; + +: parse-robots.txt-line ( rules seq -- rules ) + first2 swap { + { "user-agent" [ add-user-agent ] } + { "allow" [ add-allow ] } + { "disallow" [ add-disallow ] } + { "crawl-delay" [ string>number >>crawl-delay ] } + { "request-rate" [ string>number >>request-rate ] } + { + "visit-time" [ "-" split1 [ hhmm>timestamp ] bi@ 2array + >>visit-time + ] } + [ pick unknowns>> push-at ] + } case ; + +PRIVATE> + +: parse-robots.txt ( string -- sitemaps rules-seq ) + normalize-robots.txt [ + [ dup ] dip [ parse-robots.txt-line drop ] with each + ] map ; + +: robots ( url -- sitemaps rules-seq ) + get-robots.txt nip parse-robots.txt ; diff --git a/extra/robots/robots.txt b/extra/robots/robots.txt new file mode 100644 index 0000000000..bbaaee69e1 --- /dev/null +++ b/extra/robots/robots.txt @@ -0,0 +1,279 @@ + + +# robots.txt + +Sitemap: http://www.chiplist.com/sitemap.txt + +User-Agent: * + +Disallow: /cgi-bin/ +Disallow: /scripts/ +Disallow: /ChipList2/scripts/ +#Disallow: /styles/ +Disallow: /ChipList2/styles/ + +Disallow: /ads/ +Disallow: /ChipList2/ads/ +Disallow: /advertisements/ +Disallow: /ChipList2/advertisements/ + +Disallow: /graphics/ +Disallow: /ChipList2/graphics/ + +#Disallow: /ChipList1/ + + +# robots.txt for http://www.wikipedia.org/ and friends +# +# Please note: There are a lot of pages on this site, and there are +# some misbehaved spiders out there that go _way_ too fast. If you're +# irresponsible, your access to the site may be blocked. + +# Inktomi's "Slurp" can read a minimum delay between hits; if your +# bot supports such a thing using the 'Crawl-delay' or another +# instruction, please let us know. + +# *at least* 1 second please. preferably more :D +#User-agent: * +Crawl-delay: 1 +Request-rate: 1/1 +Visit-time: 0200-0500 + +# Crawlers that are kind enough to obey, but which we'd rather not have +# unless they're feeding search engines. +User-agent: UbiCrawler +Disallow: / + +User-agent: DOC +Disallow: / + +User-agent: Zao +Disallow: / + +# Some bots are known to be trouble, particularly those designed to copy +# entire sites. Please obey robots.txt. +User-agent: sitecheck.internetseer.com +Disallow: / + +User-agent: Zealbot +Disallow: / + +User-agent: MSIECrawler +Disallow: / + +User-agent: SiteSnagger +Disallow: / + +User-agent: WebStripper +Disallow: / + +User-agent: WebCopier +Disallow: / + +User-agent: Fetch +Disallow: / + +User-agent: Offline Explorer +Disallow: / + +User-agent: Teleport +Disallow: / + +User-agent: TeleportPro +Disallow: / + +User-agent: WebZIP +Disallow: / + +User-agent: linko +Disallow: / + +User-agent: HTTrack +Disallow: / + +User-agent: Microsoft.URL.Control +Disallow: / + +User-agent: Xenu +Disallow: / + +User-agent: larbin +Disallow: / + +User-agent: libwww +Disallow: / + +User-agent: ZyBORG +Disallow: / + +User-agent: Download Ninja +Disallow: / + +# +# Sorry, wget in its recursive mode is a frequent problem. +# Please read the man page and use it properly; there is a +# --wait option you can use to set the delay between hits, +# for instance. +# +User-agent: wget +Disallow: / + +# +# The 'grub' distributed client has been *very* poorly behaved. +# +User-agent: grub-client +Disallow: / + +# +# Doesn't follow robots.txt anyway, but... +# +User-agent: k2spider +Disallow: / + +# +# Hits many times per second, not acceptable +# http://www.nameprotect.com/botinfo.html +User-agent: NPBot +Disallow: / + +# A capture bot, downloads gazillions of pages with no public benefit +# http://www.webreaper.net/ +User-agent: WebReaper +Disallow: / + + +# Provided courtesy of http://browsers.garykeith.com. +# Created on February 13, 2008 at 7:39:00 PM GMT. +# +# Place this file in the root public folder of your website. +# It will stop the following bots from indexing your website. +# +User-agent: abot +User-agent: ALeadSoftbot +User-agent: BeijingCrawler +User-agent: BilgiBot +User-agent: bot +User-agent: botlist +User-agent: BOTW Spider +User-agent: bumblebee +User-agent: Bumblebee +User-agent: BuzzRankingBot +User-agent: Charlotte +User-agent: Clushbot +User-agent: Crawler +User-agent: CydralSpider +User-agent: DataFountains +User-agent: DiamondBot +User-agent: Dulance bot +User-agent: DYNAMIC +User-agent: EARTHCOM.info +User-agent: EDI +User-agent: envolk +User-agent: Exabot +User-agent: Exabot-Images +User-agent: Exabot-Test +User-agent: exactseek-pagereaper +User-agent: Exalead NG +User-agent: FANGCrawl +User-agent: Feed::Find +User-agent: flatlandbot +User-agent: Gigabot +User-agent: GigabotSiteSearch +User-agent: GurujiBot +User-agent: Hatena Antenna +User-agent: Hatena Bookmark +User-agent: Hatena RSS +User-agent: HatenaScreenshot +User-agent: Helix +User-agent: HiddenMarket +User-agent: HyperEstraier +User-agent: iaskspider +User-agent: IIITBOT +User-agent: InfociousBot +User-agent: iVia +User-agent: iVia Page Fetcher +User-agent: Jetbot +User-agent: Kolinka Forum Search +User-agent: KRetrieve +User-agent: LetsCrawl.com +User-agent: Lincoln State Web Browser +User-agent: Links4US-Crawler +User-agent: LOOQ +User-agent: Lsearch/sondeur +User-agent: MapoftheInternet.com +User-agent: NationalDirectory +User-agent: NetCarta_WebMapper +User-agent: NewsGator +User-agent: NextGenSearchBot +User-agent: ng +User-agent: nicebot +User-agent: NP +User-agent: NPBot +User-agent: Nudelsalat +User-agent: Nutch +User-agent: OmniExplorer_Bot +User-agent: OpenIntelligenceData +User-agent: Oracle Enterprise Search +User-agent: Pajaczek +User-agent: panscient.com +User-agent: PeerFactor 404 crawler +User-agent: PeerFactor Crawler +User-agent: PlantyNet +User-agent: PlantyNet_WebRobot +User-agent: plinki +User-agent: PMAFind +User-agent: Pogodak! +User-agent: QuickFinder Crawler +User-agent: Radiation Retriever +User-agent: Reaper +User-agent: RedCarpet +User-agent: ScorpionBot +User-agent: Scrubby +User-agent: Scumbot +User-agent: searchbot +User-agent: Seeker.lookseek.com +User-agent: SeznamBot +User-agent: ShowXML +User-agent: snap.com +User-agent: snap.com beta crawler +User-agent: Snapbot +User-agent: SnapPreviewBot +User-agent: sohu +User-agent: SpankBot +User-agent: Speedy Spider +User-agent: Speedy_Spider +User-agent: SpeedySpider +User-agent: spider +User-agent: SquigglebotBot +User-agent: SurveyBot +User-agent: SynapticSearch +User-agent: T-H-U-N-D-E-R-S-T-O-N-E +User-agent: Talkro Web-Shot +User-agent: Tarantula +User-agent: TerrawizBot +User-agent: TheInformant +User-agent: TMCrawler +User-agent: TridentSpider +User-agent: Tutorial Crawler +User-agent: Twiceler +User-agent: unwrapbot +User-agent: URI::Fetch +User-agent: VengaBot +User-agent: Vonna.com b o t +User-agent: Vortex +User-agent: Votay bot +User-agent: WebAlta Crawler +User-agent: Webbot +User-agent: Webclipping.com +User-agent: WebCorp +User-agent: Webinator +User-agent: WIRE +User-agent: WISEbot +User-agent: Xerka WebBot +User-agent: XSpider +User-agent: YodaoBot +User-agent: Yoono +User-agent: yoono +Disallow: / + + diff --git a/extra/site-watcher/db/db.factor b/extra/site-watcher/db/db.factor index 0c62c7f791..a1a85f825f 100644 --- a/extra/site-watcher/db/db.factor +++ b/extra/site-watcher/db/db.factor @@ -65,9 +65,9 @@ TUPLE: reporting-site email url up? changed? last-up? error last-error ; update-tuple ; : sites-to-report ( -- seq ) - "select account.email, site.url, site.up, site.changed, site.last_up, site.error, site.last_error from account, site, watching_site where account.account_name = watching_site.account_name and site.site_id = watching_site.site_id and site.changed = '1'" sql-query + "select users.email, site.url, site.up, site.changed, site.last_up, site.error, site.last_error from users, site, watching_site where users.username = watching_site.account_name and site.site_id = watching_site.site_id and site.changed = '1'" sql-query [ [ reporting-site boa ] input dup select-tuple [ ] [ dup t >>up? insert-tuple ] ?if ; @@ -90,3 +90,8 @@ PRIVATE> : watching-sites ( username -- sites ) f select-tuples [ site-id>> site new swap >>site-id select-tuple ] map ; + +: site-watcher-path ( -- path ) "site-watcher.db" temp-file ; inline + +: with-site-watcher-db ( quot -- ) + site-watcher-path swap with-db ; inline diff --git a/extra/site-watcher/site-watcher-tests.factor b/extra/site-watcher/site-watcher-tests.factor index 68a4a440f6..dde5e65e7e 100644 --- a/extra/site-watcher/site-watcher-tests.factor +++ b/extra/site-watcher/site-watcher-tests.factor @@ -5,13 +5,6 @@ site-watcher.private kernel db io.directories io.files.temp continuations db.sqlite site-watcher.db.private ; IN: site-watcher.tests -: site-watcher-path ( -- path ) "site-watcher.db" temp-file ; inline - -[ site-watcher-path delete-file ] ignore-errors - -: with-sqlite-db ( quot -- ) - site-watcher-path swap with-db ; inline - :: fake-sites ( -- seq ) [ account ensure-table diff --git a/extra/site-watcher/site-watcher.factor b/extra/site-watcher/site-watcher.factor index 29a66afb13..114cdf3259 100644 --- a/extra/site-watcher/site-watcher.factor +++ b/extra/site-watcher/site-watcher.factor @@ -1,16 +1,17 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors alarms arrays calendar combinators -combinators.smart continuations debugger http.client -init io.streams.string kernel locals math math.parser -namespaces sequences site-watcher.db site-watcher.db.private smtp ; +combinators.smart continuations debugger http.client fry +init io.streams.string kernel locals math math.parser db +namespaces sequences site-watcher.db site-watcher.db.private +smtp ; IN: site-watcher SYMBOL: site-watcher-from "factor-site-watcher@gmail.com" site-watcher-from set-global SYMBOL: site-watcher-frequency -10 seconds site-watcher-frequency set-global +5 minutes site-watcher-frequency set-global SYMBOL: running-site-watcher [ f running-site-watcher set-global ] "site-watcher" add-init-hook @@ -44,13 +45,13 @@ SYMBOL: running-site-watcher PRIVATE> -: watch-sites ( -- ) - find-sites check-sites sites-to-report send-reports ; +: watch-sites ( db -- ) + [ find-sites check-sites sites-to-report send-reports ] with-db ; -: run-site-watcher ( -- ) - running-site-watcher get [ - [ watch-sites ] site-watcher-frequency get every - running-site-watcher set-global +: run-site-watcher ( db -- ) + [ running-site-watcher get ] dip '[ + [ _ watch-sites ] site-watcher-frequency get every + running-site-watcher set ] unless ; : stop-site-watcher ( -- ) diff --git a/extra/spider/spider-docs.factor b/extra/spider/spider-docs.factor index 41dd13e918..cdbd5e7e09 100644 --- a/extra/spider/spider-docs.factor +++ b/extra/spider/spider-docs.factor @@ -23,7 +23,7 @@ HELP: slurp-heap-while ARTICLE: "spider-tutorial" "Spider tutorial" "To create a new spider, call the " { $link } " word with a link to the site you wish to spider." -{ $code <" "http://concatentative.org" "> } +{ $code <" "http://concatenative.org" "> } "The max-depth is initialized to 0, which retrieves just the initial page. Let's initialize it to something more fun:" { $code <" 1 >>max-depth "> } "Now the spider will retrieve the first page and all the pages it links to in the same domain." $nl diff --git a/extra/webapps/site-watcher/site-watcher.factor b/extra/webapps/site-watcher/site-watcher.factor index af07ccebbb..e220cff1d4 100644 --- a/extra/webapps/site-watcher/site-watcher.factor +++ b/extra/webapps/site-watcher/site-watcher.factor @@ -122,10 +122,12 @@ CONSTANT: site-list-url URL" $site-watcher-app/" site-watcher-db main-responder set-global -: start-site-watcher ( -- ) - start-server ; - : init-db ( -- ) site-watcher-db [ { site account watching-site } [ ensure-table ] each - ] with-db ; \ No newline at end of file + ] with-db ; + +: start-site-watcher ( -- ) + init-db + site-watcher-db run-site-watcher + start-server ;