2008-02-12 16:51:34 -05:00
|
|
|
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=fasta&lang=java&id=2
|
2009-09-27 16:11:21 -04:00
|
|
|
USING: alien.c-types math kernel io io.files locals multiline
|
|
|
|
assocs sequences sequences.private benchmark.reverse-complement
|
|
|
|
hints io.encodings.ascii byte-arrays specialized-arrays ;
|
2009-09-09 23:33:34 -04:00
|
|
|
SPECIALIZED-ARRAY: double
|
2008-02-12 16:51:34 -05:00
|
|
|
IN: benchmark.fasta
|
|
|
|
|
2009-02-22 20:08:45 -05:00
|
|
|
CONSTANT: IM 139968
|
|
|
|
CONSTANT: IA 3877
|
|
|
|
CONSTANT: IC 29573
|
|
|
|
CONSTANT: initial-seed 42
|
|
|
|
CONSTANT: line-length 60
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: random ( seed -- n seed )
|
|
|
|
>float IA * IC + IM mod [ IM /f ] keep ; inline
|
|
|
|
|
|
|
|
HINTS: random fixnum ;
|
|
|
|
|
2009-02-22 20:08:45 -05:00
|
|
|
CONSTANT: ALU "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
|
2008-02-12 16:51:34 -05:00
|
|
|
|
2009-02-23 22:40:17 -05:00
|
|
|
CONSTANT: IUB
|
2008-02-12 16:51:34 -05:00
|
|
|
{
|
|
|
|
{ CHAR: a 0.27 }
|
|
|
|
{ CHAR: c 0.12 }
|
|
|
|
{ CHAR: g 0.12 }
|
|
|
|
{ CHAR: t 0.27 }
|
|
|
|
|
|
|
|
{ CHAR: B 0.02 }
|
|
|
|
{ CHAR: D 0.02 }
|
|
|
|
{ CHAR: H 0.02 }
|
|
|
|
{ CHAR: K 0.02 }
|
|
|
|
{ CHAR: M 0.02 }
|
|
|
|
{ CHAR: N 0.02 }
|
|
|
|
{ CHAR: R 0.02 }
|
|
|
|
{ CHAR: S 0.02 }
|
|
|
|
{ CHAR: V 0.02 }
|
|
|
|
{ CHAR: W 0.02 }
|
|
|
|
{ CHAR: Y 0.02 }
|
2009-02-23 22:40:17 -05:00
|
|
|
}
|
2008-02-12 16:51:34 -05:00
|
|
|
|
2009-02-23 22:40:17 -05:00
|
|
|
CONSTANT: homo-sapiens
|
2008-02-12 16:51:34 -05:00
|
|
|
{
|
|
|
|
{ CHAR: a 0.3029549426680 }
|
|
|
|
{ CHAR: c 0.1979883004921 }
|
|
|
|
{ CHAR: g 0.1975473066391 }
|
|
|
|
{ CHAR: t 0.3015094502008 }
|
2009-02-23 22:40:17 -05:00
|
|
|
}
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: make-cumulative ( freq -- chars floats )
|
2009-04-26 01:45:03 -04:00
|
|
|
[ keys >byte-array ]
|
|
|
|
[ values >double-array ] bi unclip [ + ] accumulate swap suffix ;
|
2008-02-12 16:51:34 -05:00
|
|
|
|
2008-03-13 04:38:01 -04:00
|
|
|
:: select-random ( seed chars floats -- seed elt )
|
2008-02-12 16:51:34 -05:00
|
|
|
floats seed random -rot
|
|
|
|
[ >= ] curry find drop
|
|
|
|
chars nth-unsafe ; inline
|
|
|
|
|
|
|
|
: make-random-fasta ( seed len chars floats -- seed )
|
2009-04-26 01:45:03 -04:00
|
|
|
[ rot drop select-random ] 2curry "" map-as print ; inline
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: write-description ( desc id -- )
|
|
|
|
">" write write bl print ; inline
|
|
|
|
|
2008-02-26 19:40:32 -05:00
|
|
|
:: split-lines ( n quot -- )
|
2008-02-12 16:51:34 -05:00
|
|
|
n line-length /mod
|
|
|
|
[ [ line-length quot call ] times ] dip
|
2009-08-11 19:15:53 -04:00
|
|
|
quot unless-zero ; inline
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: write-random-fasta ( seed n chars floats desc id -- seed )
|
|
|
|
write-description
|
|
|
|
[ make-random-fasta ] 2curry split-lines ; inline
|
|
|
|
|
2008-03-13 04:38:01 -04:00
|
|
|
:: make-repeat-fasta ( k len alu -- k' )
|
2009-10-27 22:50:31 -04:00
|
|
|
alu length :> kn
|
|
|
|
len [ k + kn mod alu nth-unsafe ] "" map-as print
|
|
|
|
k len + ; inline
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: write-repeat-fasta ( n alu desc id -- )
|
|
|
|
write-description
|
2009-10-27 22:50:31 -04:00
|
|
|
0 :> k! :> alu
|
|
|
|
[| len | k len alu make-repeat-fasta k! ] split-lines ; inline
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
: fasta ( n out -- )
|
|
|
|
homo-sapiens make-cumulative
|
|
|
|
IUB make-cumulative
|
2009-10-27 22:50:31 -04:00
|
|
|
:> homo-sapiens-floats
|
|
|
|
:> homo-sapiens-chars
|
|
|
|
:> IUB-floats
|
|
|
|
:> IUB-chars
|
|
|
|
:> out
|
|
|
|
:> n
|
|
|
|
initial-seed :> seed
|
|
|
|
|
|
|
|
out ascii [
|
|
|
|
n 2 * ALU "Homo sapiens alu" "ONE" write-repeat-fasta
|
|
|
|
|
|
|
|
initial-seed
|
|
|
|
n 3 * homo-sapiens-chars homo-sapiens-floats "IUB ambiguity codes" "TWO" write-random-fasta
|
|
|
|
n 5 * IUB-chars IUB-floats "Homo sapiens frequency" "THREE" write-random-fasta
|
|
|
|
drop
|
|
|
|
] with-file-writer ;
|
2008-02-12 16:51:34 -05:00
|
|
|
|
2008-06-09 03:14:14 -04:00
|
|
|
: run-fasta ( -- ) 2500000 reverse-complement-in fasta ;
|
2008-02-12 16:51:34 -05:00
|
|
|
|
|
|
|
MAIN: run-fasta
|