Reverse complement benchmark from computer language shootout
parent
df5f7cc4df
commit
a8853daf1d
|
@ -0,0 +1,171 @@
|
|||
>ONE Homo sapiens alu
|
||||
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
|
||||
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
|
||||
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
|
||||
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
|
||||
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
|
||||
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
|
||||
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
|
||||
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
|
||||
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
|
||||
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
|
||||
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
|
||||
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
|
||||
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
|
||||
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
|
||||
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
|
||||
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
|
||||
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
|
||||
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
|
||||
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
|
||||
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
|
||||
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
|
||||
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
|
||||
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
|
||||
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
|
||||
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
|
||||
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
|
||||
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
|
||||
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
|
||||
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
|
||||
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
|
||||
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
|
||||
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
|
||||
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
|
||||
GGCGACAGAGCGAGACTCCG
|
||||
>TWO IUB ambiguity codes
|
||||
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
|
||||
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
|
||||
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
|
||||
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
|
||||
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
|
||||
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
|
||||
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
|
||||
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
|
||||
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
|
||||
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
|
||||
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
|
||||
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
|
||||
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
|
||||
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
|
||||
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
|
||||
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
|
||||
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
|
||||
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
|
||||
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
|
||||
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
|
||||
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
|
||||
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
|
||||
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
|
||||
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
|
||||
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
|
||||
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
|
||||
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
|
||||
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
|
||||
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
|
||||
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
|
||||
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
|
||||
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
|
||||
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
|
||||
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
|
||||
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
|
||||
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
|
||||
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
|
||||
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
|
||||
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
|
||||
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
|
||||
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
|
||||
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
|
||||
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
|
||||
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
|
||||
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
|
||||
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
|
||||
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
|
||||
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
|
||||
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
|
||||
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
|
||||
>THREE Homo sapiens frequency
|
||||
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
|
||||
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
|
||||
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
|
||||
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
|
||||
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
|
||||
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
|
||||
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
|
||||
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
|
||||
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
|
||||
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
|
||||
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
|
||||
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
|
||||
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
|
||||
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
|
||||
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
|
||||
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
|
||||
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
|
||||
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
|
||||
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
|
||||
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
|
||||
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
|
||||
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
|
||||
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
|
||||
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
|
||||
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
|
||||
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
|
||||
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
|
||||
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
|
||||
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
|
||||
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
|
||||
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
|
||||
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
|
||||
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
|
||||
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
|
||||
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
|
||||
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
|
||||
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
|
||||
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
|
||||
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
|
||||
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
|
||||
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
|
||||
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
|
||||
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
|
||||
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
|
||||
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
|
||||
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
|
||||
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
|
||||
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
|
||||
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
|
||||
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
|
||||
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
|
||||
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
|
||||
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
|
||||
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
|
||||
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
|
||||
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
|
||||
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
|
||||
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
|
||||
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
|
||||
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
|
||||
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
|
||||
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
|
||||
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
|
||||
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
|
||||
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
|
||||
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
|
||||
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
|
||||
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
|
||||
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
|
||||
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
|
||||
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
|
||||
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
|
||||
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
|
||||
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
|
||||
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
|
||||
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
|
||||
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
|
||||
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
|
||||
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
|
||||
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
|
||||
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
|
||||
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
|
||||
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
|
||||
ggaagtgaaaagataaatat
|
|
@ -0,0 +1,171 @@
|
|||
>ONE Homo sapiens alu
|
||||
CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC
|
||||
CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA
|
||||
GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT
|
||||
GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA
|
||||
AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC
|
||||
TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG
|
||||
GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC
|
||||
ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG
|
||||
GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA
|
||||
CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT
|
||||
GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC
|
||||
TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT
|
||||
TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT
|
||||
GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA
|
||||
CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC
|
||||
GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC
|
||||
TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA
|
||||
GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT
|
||||
CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT
|
||||
TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC
|
||||
ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG
|
||||
GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT
|
||||
TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG
|
||||
CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG
|
||||
TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG
|
||||
CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC
|
||||
GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG
|
||||
CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC
|
||||
TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG
|
||||
CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA
|
||||
AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC
|
||||
CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC
|
||||
GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC
|
||||
GTGAGCCACCGCGCCCGGCC
|
||||
>TWO IUB ambiguity codes
|
||||
TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA
|
||||
GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT
|
||||
TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA
|
||||
CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT
|
||||
WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA
|
||||
AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG
|
||||
ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT
|
||||
CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG
|
||||
TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA
|
||||
AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA
|
||||
AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA
|
||||
TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC
|
||||
CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC
|
||||
ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG
|
||||
TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA
|
||||
BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA
|
||||
CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC
|
||||
HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT
|
||||
TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA
|
||||
TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT
|
||||
GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG
|
||||
GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA
|
||||
GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT
|
||||
AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV
|
||||
TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC
|
||||
CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA
|
||||
AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA
|
||||
MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA
|
||||
GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC
|
||||
CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT
|
||||
TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT
|
||||
TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM
|
||||
TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR
|
||||
CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT
|
||||
ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA
|
||||
CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG
|
||||
CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA
|
||||
ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY
|
||||
ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT
|
||||
TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC
|
||||
ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA
|
||||
AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT
|
||||
ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA
|
||||
AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA
|
||||
TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD
|
||||
TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC
|
||||
TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG
|
||||
ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN
|
||||
TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA
|
||||
CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG
|
||||
>THREE Homo sapiens frequency
|
||||
ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT
|
||||
GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA
|
||||
ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG
|
||||
TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG
|
||||
ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG
|
||||
AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC
|
||||
GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG
|
||||
TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC
|
||||
ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA
|
||||
ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT
|
||||
CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC
|
||||
TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC
|
||||
GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC
|
||||
TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG
|
||||
GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG
|
||||
TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA
|
||||
AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA
|
||||
ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA
|
||||
AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA
|
||||
CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT
|
||||
GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT
|
||||
CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA
|
||||
CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG
|
||||
AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT
|
||||
CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG
|
||||
CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT
|
||||
CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA
|
||||
AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA
|
||||
AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA
|
||||
CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG
|
||||
TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA
|
||||
TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT
|
||||
CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA
|
||||
GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA
|
||||
GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG
|
||||
CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA
|
||||
ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC
|
||||
ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG
|
||||
ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA
|
||||
GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT
|
||||
GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT
|
||||
TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC
|
||||
AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG
|
||||
TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA
|
||||
GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT
|
||||
AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG
|
||||
AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT
|
||||
AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT
|
||||
CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT
|
||||
ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT
|
||||
CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT
|
||||
CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA
|
||||
ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT
|
||||
GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT
|
||||
ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT
|
||||
TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC
|
||||
TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC
|
||||
AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG
|
||||
GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT
|
||||
TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT
|
||||
GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG
|
||||
ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG
|
||||
TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT
|
||||
AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT
|
||||
TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT
|
||||
GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT
|
||||
GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC
|
||||
ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT
|
||||
ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA
|
||||
AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA
|
||||
GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC
|
||||
TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT
|
||||
AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA
|
||||
TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA
|
||||
CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG
|
||||
CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA
|
||||
TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT
|
||||
TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG
|
||||
CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG
|
||||
TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA
|
||||
ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT
|
||||
ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA
|
||||
GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC
|
||||
ACGATACCTGGTGAAGTGTT
|
|
@ -0,0 +1,59 @@
|
|||
IN: temporary
|
||||
USING: compiler hashtables io kernel math math namespaces
|
||||
sequences strings vectors words words ;
|
||||
|
||||
! Instead of a variable, we define an inline word which pushes
|
||||
! the hash on the stack, for performance.
|
||||
DEFER: trans-hash
|
||||
|
||||
[
|
||||
26 [ CHAR: A + dup set ] each
|
||||
26 [ CHAR: a + dup set ] each
|
||||
|
||||
"TGCAAKYRMBDHV"
|
||||
"ACGTUMRYKVHDB"
|
||||
2dup
|
||||
[ set ] 2each
|
||||
[ ch>lower set ] 2each
|
||||
] make-hash
|
||||
|
||||
\ trans-hash swap unit define-compound
|
||||
\ trans-hash t "inline" set-word-prop
|
||||
|
||||
: translate-seq ( seq -- sbuf )
|
||||
[
|
||||
2000000 <sbuf> building set
|
||||
<reversed> [ <reversed> % ] each
|
||||
building get dup [ trans-hash hash ] inject
|
||||
] with-scope ;
|
||||
|
||||
SYMBOL: out
|
||||
|
||||
: seg ( sbuf n -- str )
|
||||
60 * dup 60 + pick length min rot <slice> >string ;
|
||||
|
||||
: show-seq ( seq -- )
|
||||
translate-seq dup length 59 + 60 /i
|
||||
[ seg out get stream-print ] each-with ;
|
||||
|
||||
: clear-seq ( seq -- ) 0 swap set-length ;
|
||||
|
||||
: do-line ( seq line -- seq )
|
||||
dup first ">;" memq? [
|
||||
over show-seq out get stream-print dup clear-seq
|
||||
] [
|
||||
over push
|
||||
] if ;
|
||||
|
||||
: (reverse-complement) ( seq -- )
|
||||
readln [ do-line (reverse-complement) ] [ show-seq ] if* ;
|
||||
|
||||
: reverse-complement ( infile outfile -- )
|
||||
<file-writer> [
|
||||
stdio get out set
|
||||
<file-reader> [
|
||||
500000 <vector> (reverse-complement)
|
||||
] with-stream
|
||||
] with-stream ;
|
||||
|
||||
{ translate-seq seg clear-seq } [ compile ] each
|
Loading…
Reference in New Issue