diff --git a/library/test/benchmark/reverse-complement-test-in.txt b/library/test/benchmark/reverse-complement-test-in.txt new file mode 100644 index 0000000000..f1caba0d62 --- /dev/null +++ b/library/test/benchmark/reverse-complement-test-in.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA +TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT +AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG +GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG +CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT +GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA +GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA +TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG +AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA +GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT +AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC +AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG +GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC +CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG +AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT +TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA +TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT +GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG +TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT +CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG +CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG +TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA +CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG +AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG +GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC +TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA +TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA +GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT +GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC +ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT +TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC +CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG +CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG +GGCGACAGAGCGAGACTCCG +>TWO IUB ambiguity codes +cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg +tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa +NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt +cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga +gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa +HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca +tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt +tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt +acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct +tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt +gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa +accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt +RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt +tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag +cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg +ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat +actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg +YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa +KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata +aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa +aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg +gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc +tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK +tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt +ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg +ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa +BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt +aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc +tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc +cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac +aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga +tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga +aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD +gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg +ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV +taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa +ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat +gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg +gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa +tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt +tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt +taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca +cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag +aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt +cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt +ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW +attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag +ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa +attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc +tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta +>THREE Homo sapiens frequency +aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga +atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc +ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc +atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa +tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca +tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag +gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat +tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt +gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc +gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc +atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc +taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta +ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg +acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag +ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg +ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt +cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg +ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt +aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag +attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac +acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat +tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca +attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt +aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt +tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg +ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga +gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac +caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct +taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga +ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg +ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat +gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga +ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact +aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc +cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt +gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat +ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt +tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata +tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac +ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga +tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac +gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat +ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc +actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc +gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca +ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata +tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca +atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata +aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat +tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt +ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat +acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga +gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata +gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg +tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac +gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga +gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat +tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta +acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga +tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata +catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga +attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt +ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt +ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg +gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa +tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg +tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct +ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc +gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta +ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact +tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc +ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc +tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt +ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca +actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac +gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc +gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag +accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga +gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct +cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta +tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat +atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt +ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta +ggaagtgaaaagataaatat diff --git a/library/test/benchmark/reverse-complement-test-out.txt b/library/test/benchmark/reverse-complement-test-out.txt new file mode 100644 index 0000000000..14d792ade8 --- /dev/null +++ b/library/test/benchmark/reverse-complement-test-out.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC +CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA +GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT +GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA +AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC +TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG +GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC +ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG +GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA +CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT +GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC +TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT +TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT +GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA +CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC +GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC +TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA +GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT +CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT +TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC +ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG +GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT +TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG +CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG +TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG +CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC +GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG +CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC +TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG +CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA +AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC +CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC +GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC +GTGAGCCACCGCGCCCGGCC +>TWO IUB ambiguity codes +TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA +GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT +TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA +CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT +WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA +AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG +ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT +CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG +TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA +AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA +AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA +TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC +CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC +ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG +TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA +BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA +CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC +HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT +TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA +TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT +GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG +GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA +GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT +AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV +TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC +CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA +AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA +MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA +GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC +CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT +TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT +TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM +TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR +CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT +ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA +CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG +CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA +ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY +ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT +TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC +ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA +AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT +ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA +AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA +TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD +TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC +TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG +ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN +TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA +CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG +>THREE Homo sapiens frequency +ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT +GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA +ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG +TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG +ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG +AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC +GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG +TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC +ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA +ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT +CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC +TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC +GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC +TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG +GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG +TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA +AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA +ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA +AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA +CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT +GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT +CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA +CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG +AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT +CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG +CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT +CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA +AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA +AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA +CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG +TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA +TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT +CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA +GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA +GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG +CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA +ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC +ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG +ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA +GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT +GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT +TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC +AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG +TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA +GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT +AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG +AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT +AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT +CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT +ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT +CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT +CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA +ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT +GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT +ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT +TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC +TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC +AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG +GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT +TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT +GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG +ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG +TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT +AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT +TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT +GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT +GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC +ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT +ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA +AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA +GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC +TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT +AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA +TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA +CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG +CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA +TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT +TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG +CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG +TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA +ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT +ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA +GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC +ACGATACCTGGTGAAGTGTT diff --git a/library/test/benchmark/reverse-complement.factor b/library/test/benchmark/reverse-complement.factor new file mode 100644 index 0000000000..fa2f4834d8 --- /dev/null +++ b/library/test/benchmark/reverse-complement.factor @@ -0,0 +1,59 @@ +IN: temporary +USING: compiler hashtables io kernel math math namespaces +sequences strings vectors words words ; + +! Instead of a variable, we define an inline word which pushes +! the hash on the stack, for performance. +DEFER: trans-hash + +[ + 26 [ CHAR: A + dup set ] each + 26 [ CHAR: a + dup set ] each + + "TGCAAKYRMBDHV" + "ACGTUMRYKVHDB" + 2dup + [ set ] 2each + [ ch>lower set ] 2each +] make-hash + +\ trans-hash swap unit define-compound +\ trans-hash t "inline" set-word-prop + +: translate-seq ( seq -- sbuf ) + [ + 2000000 building set + [ % ] each + building get dup [ trans-hash hash ] inject + ] with-scope ; + +SYMBOL: out + +: seg ( sbuf n -- str ) + 60 * dup 60 + pick length min rot >string ; + +: show-seq ( seq -- ) + translate-seq dup length 59 + 60 /i + [ seg out get stream-print ] each-with ; + +: clear-seq ( seq -- ) 0 swap set-length ; + +: do-line ( seq line -- seq ) + dup first ">;" memq? [ + over show-seq out get stream-print dup clear-seq + ] [ + over push + ] if ; + +: (reverse-complement) ( seq -- ) + readln [ do-line (reverse-complement) ] [ show-seq ] if* ; + +: reverse-complement ( infile outfile -- ) + [ + stdio get out set + [ + 500000 (reverse-complement) + ] with-stream + ] with-stream ; + +{ translate-seq seg clear-seq } [ compile ] each