New benchmark

db4
Slava Pestov 2008-09-22 02:37:48 -05:00
parent fe071a8908
commit a3d9f86340
4 changed files with 1754 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
agggtaaa|tttaccct 0
[cgt]gggtaaa|tttaccc[acg] 3
a[act]ggtaaa|tttacc[agt]t 9
ag[act]gtaaa|tttac[agt]ct 8
agg[act]taaa|ttta[agt]cct 10
aggg[acg]aaa|ttt[cgt]ccct 3
agggt[cgt]aa|tt[acg]accct 4
agggta[cgt]a|t[acg]taccct 3
agggtaa[cgt]|[acg]ttaccct 5
101745
100000
133640

View File

@ -0,0 +1,10 @@
USING: benchmark.regex-dna io io.files io.encodings.ascii
io.streams.string kernel tools.test ;
IN: benchmark.regex-dna.tests
[ t ] [
"resource:extra/benchmark/regex-dna/regex-dna-test-in.txt"
[ regex-dna ] with-string-writer
"resource:extra/benchmark/regex-dna/regex-dna-test-out.txt"
ascii file-contents =
] unit-test

View File

@ -0,0 +1,60 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors regexp prettyprint io io.encodings.ascii
io.files kernel sequences assocs namespaces ;
IN: benchmark.regex-dna
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
: strip-line-breaks ( string -- string' )
R/ >.*\n|\n/ "" re-replace ;
: count-patterns ( string -- )
{
R/ agggtaaa|tttaccct/i,
R/ [cgt]gggtaaa|tttaccc[acg]/i,
R/ a[act]ggtaaa|tttacc[agt]t/i,
R/ ag[act]gtaaa|tttac[agt]ct/i,
R/ agg[act]taaa|ttta[agt]cct/i,
R/ aggg[acg]aaa|ttt[cgt]ccct/i,
R/ agggt[cgt]aa|tt[acg]accct/i,
R/ agggta[cgt]a|t[acg]taccct/i,
R/ agggtaa[cgt]|[acg]ttaccct/i
} [
[ raw>> write bl ]
[ count-matches . ]
bi
] with each ;
: do-replacements ( string -- string' )
{
{ R/ B/ "(c|g|t)" }
{ R/ D/ "(a|g|t)" }
{ R/ H/ "(a|c|t)" }
{ R/ K/ "(g|t)" }
{ R/ M/ "(a|c)" }
{ R/ N/ "(a|c|g|t)" }
{ R/ R/ "(a|g)" }
{ R/ S/ "(c|t)" }
{ R/ V/ "(a|c|g)" }
{ R/ W/ "(a|t)" }
{ R/ Y/ "(c|t)" }
} [ re-replace ] assoc-each ;
SYMBOL: ilen
SYMBOL: clen
: regex-dna ( file -- )
ascii file-contents dup length ilen set
strip-line-breaks dup length clen set
dup count-patterns
do-replacements
nl
ilen get .
clen get .
length . ;
: regex-dna-main ( -- )
"resource:extra/benchmark/regex-dna/regex-dna-test-in.txt" regex-dna ;
MAIN: regex-dna-main