Reverse complement benchmark from computer language shootout
parent
df5f7cc4df
commit
a8853daf1d
|
@ -0,0 +1,171 @@
|
||||||
|
>ONE Homo sapiens alu
|
||||||
|
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
|
||||||
|
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
|
||||||
|
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
|
||||||
|
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
|
||||||
|
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
|
||||||
|
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
|
||||||
|
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
|
||||||
|
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
|
||||||
|
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
|
||||||
|
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
|
||||||
|
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
|
||||||
|
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
|
||||||
|
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
|
||||||
|
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
|
||||||
|
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
|
||||||
|
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
|
||||||
|
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
|
||||||
|
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
|
||||||
|
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
|
||||||
|
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
|
||||||
|
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
|
||||||
|
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
|
||||||
|
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
|
||||||
|
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
|
||||||
|
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
|
||||||
|
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
|
||||||
|
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
|
||||||
|
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
|
||||||
|
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
|
||||||
|
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
|
||||||
|
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
|
||||||
|
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
|
||||||
|
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
|
||||||
|
GGCGACAGAGCGAGACTCCG
|
||||||
|
>TWO IUB ambiguity codes
|
||||||
|
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
|
||||||
|
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
|
||||||
|
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
|
||||||
|
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
|
||||||
|
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
|
||||||
|
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
|
||||||
|
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
|
||||||
|
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
|
||||||
|
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
|
||||||
|
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
|
||||||
|
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
|
||||||
|
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
|
||||||
|
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
|
||||||
|
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
|
||||||
|
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
|
||||||
|
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
|
||||||
|
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
|
||||||
|
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
|
||||||
|
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
|
||||||
|
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
|
||||||
|
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
|
||||||
|
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
|
||||||
|
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
|
||||||
|
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
|
||||||
|
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
|
||||||
|
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
|
||||||
|
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
|
||||||
|
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
|
||||||
|
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
|
||||||
|
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
|
||||||
|
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
|
||||||
|
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
|
||||||
|
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
|
||||||
|
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
|
||||||
|
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
|
||||||
|
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
|
||||||
|
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
|
||||||
|
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
|
||||||
|
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
|
||||||
|
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
|
||||||
|
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
|
||||||
|
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
|
||||||
|
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
|
||||||
|
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
|
||||||
|
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
|
||||||
|
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
|
||||||
|
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
|
||||||
|
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
|
||||||
|
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
|
||||||
|
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
|
||||||
|
>THREE Homo sapiens frequency
|
||||||
|
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
|
||||||
|
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
|
||||||
|
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
|
||||||
|
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
|
||||||
|
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
|
||||||
|
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
|
||||||
|
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
|
||||||
|
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
|
||||||
|
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
|
||||||
|
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
|
||||||
|
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
|
||||||
|
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
|
||||||
|
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
|
||||||
|
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
|
||||||
|
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
|
||||||
|
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
|
||||||
|
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
|
||||||
|
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
|
||||||
|
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
|
||||||
|
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
|
||||||
|
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
|
||||||
|
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
|
||||||
|
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
|
||||||
|
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
|
||||||
|
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
|
||||||
|
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
|
||||||
|
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
|
||||||
|
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
|
||||||
|
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
|
||||||
|
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
|
||||||
|
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
|
||||||
|
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
|
||||||
|
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
|
||||||
|
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
|
||||||
|
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
|
||||||
|
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
|
||||||
|
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
|
||||||
|
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
|
||||||
|
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
|
||||||
|
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
|
||||||
|
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
|
||||||
|
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
|
||||||
|
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
|
||||||
|
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
|
||||||
|
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
|
||||||
|
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
|
||||||
|
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
|
||||||
|
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
|
||||||
|
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
|
||||||
|
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
|
||||||
|
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
|
||||||
|
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
|
||||||
|
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
|
||||||
|
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
|
||||||
|
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
|
||||||
|
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
|
||||||
|
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
|
||||||
|
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
|
||||||
|
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
|
||||||
|
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
|
||||||
|
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
|
||||||
|
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
|
||||||
|
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
|
||||||
|
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
|
||||||
|
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
|
||||||
|
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
|
||||||
|
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
|
||||||
|
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
|
||||||
|
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
|
||||||
|
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
|
||||||
|
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
|
||||||
|
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
|
||||||
|
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
|
||||||
|
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
|
||||||
|
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
|
||||||
|
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
|
||||||
|
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
|
||||||
|
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
|
||||||
|
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
|
||||||
|
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
|
||||||
|
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
|
||||||
|
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
|
||||||
|
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
|
||||||
|
ggaagtgaaaagataaatat
|
|
@ -0,0 +1,171 @@
|
||||||
|
>ONE Homo sapiens alu
|
||||||
|
CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC
|
||||||
|
CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA
|
||||||
|
GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT
|
||||||
|
GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA
|
||||||
|
AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC
|
||||||
|
TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG
|
||||||
|
GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC
|
||||||
|
ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG
|
||||||
|
GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA
|
||||||
|
CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT
|
||||||
|
GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC
|
||||||
|
TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT
|
||||||
|
TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT
|
||||||
|
GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA
|
||||||
|
CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC
|
||||||
|
GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC
|
||||||
|
TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA
|
||||||
|
GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT
|
||||||
|
CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT
|
||||||
|
TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC
|
||||||
|
ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG
|
||||||
|
GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT
|
||||||
|
TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG
|
||||||
|
CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG
|
||||||
|
TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG
|
||||||
|
CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC
|
||||||
|
GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG
|
||||||
|
CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC
|
||||||
|
TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG
|
||||||
|
CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA
|
||||||
|
AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC
|
||||||
|
CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC
|
||||||
|
GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC
|
||||||
|
GTGAGCCACCGCGCCCGGCC
|
||||||
|
>TWO IUB ambiguity codes
|
||||||
|
TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA
|
||||||
|
GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT
|
||||||
|
TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA
|
||||||
|
CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT
|
||||||
|
WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA
|
||||||
|
AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG
|
||||||
|
ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT
|
||||||
|
CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG
|
||||||
|
TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA
|
||||||
|
AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA
|
||||||
|
AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA
|
||||||
|
TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC
|
||||||
|
CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC
|
||||||
|
ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG
|
||||||
|
TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA
|
||||||
|
BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA
|
||||||
|
CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC
|
||||||
|
HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT
|
||||||
|
TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA
|
||||||
|
TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT
|
||||||
|
GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG
|
||||||
|
GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA
|
||||||
|
GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT
|
||||||
|
AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV
|
||||||
|
TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC
|
||||||
|
CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA
|
||||||
|
AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA
|
||||||
|
MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA
|
||||||
|
GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC
|
||||||
|
CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT
|
||||||
|
TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT
|
||||||
|
TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM
|
||||||
|
TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR
|
||||||
|
CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT
|
||||||
|
ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA
|
||||||
|
CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG
|
||||||
|
CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA
|
||||||
|
ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY
|
||||||
|
ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT
|
||||||
|
TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC
|
||||||
|
ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA
|
||||||
|
AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT
|
||||||
|
ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA
|
||||||
|
AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA
|
||||||
|
TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD
|
||||||
|
TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC
|
||||||
|
TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG
|
||||||
|
ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN
|
||||||
|
TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA
|
||||||
|
CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG
|
||||||
|
>THREE Homo sapiens frequency
|
||||||
|
ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT
|
||||||
|
GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA
|
||||||
|
ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG
|
||||||
|
TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG
|
||||||
|
ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG
|
||||||
|
AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC
|
||||||
|
GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG
|
||||||
|
TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC
|
||||||
|
ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA
|
||||||
|
ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT
|
||||||
|
CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC
|
||||||
|
TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC
|
||||||
|
GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC
|
||||||
|
TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG
|
||||||
|
GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG
|
||||||
|
TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA
|
||||||
|
AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA
|
||||||
|
ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA
|
||||||
|
AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA
|
||||||
|
CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT
|
||||||
|
GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT
|
||||||
|
CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA
|
||||||
|
CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG
|
||||||
|
AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT
|
||||||
|
CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG
|
||||||
|
CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT
|
||||||
|
CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA
|
||||||
|
AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA
|
||||||
|
AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA
|
||||||
|
CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG
|
||||||
|
TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA
|
||||||
|
TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT
|
||||||
|
CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA
|
||||||
|
GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA
|
||||||
|
GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG
|
||||||
|
CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA
|
||||||
|
ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC
|
||||||
|
ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG
|
||||||
|
ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA
|
||||||
|
GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT
|
||||||
|
GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT
|
||||||
|
TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC
|
||||||
|
AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG
|
||||||
|
TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA
|
||||||
|
GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT
|
||||||
|
AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG
|
||||||
|
AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT
|
||||||
|
AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT
|
||||||
|
CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT
|
||||||
|
ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT
|
||||||
|
CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT
|
||||||
|
CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA
|
||||||
|
ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT
|
||||||
|
GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT
|
||||||
|
ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT
|
||||||
|
TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC
|
||||||
|
TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC
|
||||||
|
AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG
|
||||||
|
GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT
|
||||||
|
TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT
|
||||||
|
GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG
|
||||||
|
ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG
|
||||||
|
TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT
|
||||||
|
AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT
|
||||||
|
TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT
|
||||||
|
GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT
|
||||||
|
GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC
|
||||||
|
ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT
|
||||||
|
ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA
|
||||||
|
AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA
|
||||||
|
GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC
|
||||||
|
TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT
|
||||||
|
AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA
|
||||||
|
TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA
|
||||||
|
CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG
|
||||||
|
CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA
|
||||||
|
TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT
|
||||||
|
TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG
|
||||||
|
CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG
|
||||||
|
TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA
|
||||||
|
ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT
|
||||||
|
ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA
|
||||||
|
GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC
|
||||||
|
ACGATACCTGGTGAAGTGTT
|
|
@ -0,0 +1,59 @@
|
||||||
|
IN: temporary
|
||||||
|
USING: compiler hashtables io kernel math math namespaces
|
||||||
|
sequences strings vectors words words ;
|
||||||
|
|
||||||
|
! Instead of a variable, we define an inline word which pushes
|
||||||
|
! the hash on the stack, for performance.
|
||||||
|
DEFER: trans-hash
|
||||||
|
|
||||||
|
[
|
||||||
|
26 [ CHAR: A + dup set ] each
|
||||||
|
26 [ CHAR: a + dup set ] each
|
||||||
|
|
||||||
|
"TGCAAKYRMBDHV"
|
||||||
|
"ACGTUMRYKVHDB"
|
||||||
|
2dup
|
||||||
|
[ set ] 2each
|
||||||
|
[ ch>lower set ] 2each
|
||||||
|
] make-hash
|
||||||
|
|
||||||
|
\ trans-hash swap unit define-compound
|
||||||
|
\ trans-hash t "inline" set-word-prop
|
||||||
|
|
||||||
|
: translate-seq ( seq -- sbuf )
|
||||||
|
[
|
||||||
|
2000000 <sbuf> building set
|
||||||
|
<reversed> [ <reversed> % ] each
|
||||||
|
building get dup [ trans-hash hash ] inject
|
||||||
|
] with-scope ;
|
||||||
|
|
||||||
|
SYMBOL: out
|
||||||
|
|
||||||
|
: seg ( sbuf n -- str )
|
||||||
|
60 * dup 60 + pick length min rot <slice> >string ;
|
||||||
|
|
||||||
|
: show-seq ( seq -- )
|
||||||
|
translate-seq dup length 59 + 60 /i
|
||||||
|
[ seg out get stream-print ] each-with ;
|
||||||
|
|
||||||
|
: clear-seq ( seq -- ) 0 swap set-length ;
|
||||||
|
|
||||||
|
: do-line ( seq line -- seq )
|
||||||
|
dup first ">;" memq? [
|
||||||
|
over show-seq out get stream-print dup clear-seq
|
||||||
|
] [
|
||||||
|
over push
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: (reverse-complement) ( seq -- )
|
||||||
|
readln [ do-line (reverse-complement) ] [ show-seq ] if* ;
|
||||||
|
|
||||||
|
: reverse-complement ( infile outfile -- )
|
||||||
|
<file-writer> [
|
||||||
|
stdio get out set
|
||||||
|
<file-reader> [
|
||||||
|
500000 <vector> (reverse-complement)
|
||||||
|
] with-stream
|
||||||
|
] with-stream ;
|
||||||
|
|
||||||
|
{ translate-seq seg clear-seq } [ compile ] each
|
Loading…
Reference in New Issue