New benchmark
parent
fe071a8908
commit
a3d9f86340
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
|||
agggtaaa|tttaccct 0
|
||||
[cgt]gggtaaa|tttaccc[acg] 3
|
||||
a[act]ggtaaa|tttacc[agt]t 9
|
||||
ag[act]gtaaa|tttac[agt]ct 8
|
||||
agg[act]taaa|ttta[agt]cct 10
|
||||
aggg[acg]aaa|ttt[cgt]ccct 3
|
||||
agggt[cgt]aa|tt[acg]accct 4
|
||||
agggta[cgt]a|t[acg]taccct 3
|
||||
agggtaa[cgt]|[acg]ttaccct 5
|
||||
|
||||
101745
|
||||
100000
|
||||
133640
|
|
@ -0,0 +1,10 @@
|
|||
USING: benchmark.regex-dna io io.files io.encodings.ascii
|
||||
io.streams.string kernel tools.test ;
|
||||
IN: benchmark.regex-dna.tests
|
||||
|
||||
[ t ] [
|
||||
"resource:extra/benchmark/regex-dna/regex-dna-test-in.txt"
|
||||
[ regex-dna ] with-string-writer
|
||||
"resource:extra/benchmark/regex-dna/regex-dna-test-out.txt"
|
||||
ascii file-contents =
|
||||
] unit-test
|
|
@ -0,0 +1,60 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors regexp prettyprint io io.encodings.ascii
|
||||
io.files kernel sequences assocs namespaces ;
|
||||
IN: benchmark.regex-dna
|
||||
|
||||
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
||||
|
||||
: strip-line-breaks ( string -- string' )
|
||||
R/ >.*\n|\n/ "" re-replace ;
|
||||
|
||||
: count-patterns ( string -- )
|
||||
{
|
||||
R/ agggtaaa|tttaccct/i,
|
||||
R/ [cgt]gggtaaa|tttaccc[acg]/i,
|
||||
R/ a[act]ggtaaa|tttacc[agt]t/i,
|
||||
R/ ag[act]gtaaa|tttac[agt]ct/i,
|
||||
R/ agg[act]taaa|ttta[agt]cct/i,
|
||||
R/ aggg[acg]aaa|ttt[cgt]ccct/i,
|
||||
R/ agggt[cgt]aa|tt[acg]accct/i,
|
||||
R/ agggta[cgt]a|t[acg]taccct/i,
|
||||
R/ agggtaa[cgt]|[acg]ttaccct/i
|
||||
} [
|
||||
[ raw>> write bl ]
|
||||
[ count-matches . ]
|
||||
bi
|
||||
] with each ;
|
||||
|
||||
: do-replacements ( string -- string' )
|
||||
{
|
||||
{ R/ B/ "(c|g|t)" }
|
||||
{ R/ D/ "(a|g|t)" }
|
||||
{ R/ H/ "(a|c|t)" }
|
||||
{ R/ K/ "(g|t)" }
|
||||
{ R/ M/ "(a|c)" }
|
||||
{ R/ N/ "(a|c|g|t)" }
|
||||
{ R/ R/ "(a|g)" }
|
||||
{ R/ S/ "(c|t)" }
|
||||
{ R/ V/ "(a|c|g)" }
|
||||
{ R/ W/ "(a|t)" }
|
||||
{ R/ Y/ "(c|t)" }
|
||||
} [ re-replace ] assoc-each ;
|
||||
|
||||
SYMBOL: ilen
|
||||
SYMBOL: clen
|
||||
|
||||
: regex-dna ( file -- )
|
||||
ascii file-contents dup length ilen set
|
||||
strip-line-breaks dup length clen set
|
||||
dup count-patterns
|
||||
do-replacements
|
||||
nl
|
||||
ilen get .
|
||||
clen get .
|
||||
length . ;
|
||||
|
||||
: regex-dna-main ( -- )
|
||||
"resource:extra/benchmark/regex-dna/regex-dna-test-in.txt" regex-dna ;
|
||||
|
||||
MAIN: regex-dna-main
|
Loading…
Reference in New Issue