fix unit test in robots

db4
Doug Coleman 2009-04-07 10:08:01 -05:00
parent 63cf5b04e1
commit 65802b6aaa
2 changed files with 324 additions and 323 deletions

View File

@ -1,6 +1,7 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: calendar io.encodings.utf8 io.files robots tools.test ;
USING: calendar io.encodings.utf8 io.files robots tools.test
urls ;
IN: robots.tests
[
@ -11,16 +12,16 @@ IN: robots.tests
{ allows V{ } }
{ disallows
V{
"/cgi-bin/"
"/scripts/"
"/ChipList2/scripts/"
"/ChipList2/styles/"
"/ads/"
"/ChipList2/ads/"
"/advertisements/"
"/ChipList2/advertisements/"
"/graphics/"
"/ChipList2/graphics/"
URL" /cgi-bin/"
URL" /scripts/"
URL" /ChipList2/scripts/"
URL" /ChipList2/styles/"
URL" /ads/"
URL" /ChipList2/ads/"
URL" /advertisements/"
URL" /ChipList2/advertisements/"
URL" /graphics/"
URL" /ChipList2/graphics/"
}
}
{ visit-time
@ -36,163 +37,163 @@ IN: robots.tests
T{ rules
{ user-agents V{ "UbiCrawler" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "DOC" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Zao" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "sitecheck.internetseer.com" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Zealbot" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "MSIECrawler" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "SiteSnagger" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "WebStripper" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "WebCopier" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Fetch" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Offline Explorer" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Teleport" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "TeleportPro" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "WebZIP" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "linko" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "HTTrack" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Microsoft.URL.Control" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Xenu" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "larbin" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "libwww" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "ZyBORG" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "Download Ninja" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "wget" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "grub-client" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "k2spider" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "NPBot" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
{ user-agents V{ "WebReaper" } }
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
T{ rules
@ -327,7 +328,7 @@ IN: robots.tests
}
}
{ allows V{ } }
{ disallows V{ "/" } }
{ disallows V{ URL" /" } }
{ unknowns H{ } }
}
}

View File

@ -85,7 +85,7 @@ PRIVATE>
: parse-robots.txt ( string -- sitemaps rules-seq )
normalize-robots.txt [
[ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
] map first ;
] map ;
: robots ( url -- robots )
>url