From 12f990bd1ae894d784c9668fa3eca8b2317bf229 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 4 Jan 2008 20:02:54 -0600 Subject: [PATCH 1/6] change the parser to use file-lines add lines-crc32 update source-files for the contents -> lines change --- core/io/crc32/crc32.factor | 3 +++ core/parser/parser.factor | 6 +++--- core/source-files/source-files.factor | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/core/io/crc32/crc32.factor b/core/io/crc32/crc32.factor index 1c0c2e9f5c..d6a812c3de 100644 --- a/core/io/crc32/crc32.factor +++ b/core/io/crc32/crc32.factor @@ -28,3 +28,6 @@ DEFER: crc32-table inline >r HEX: ffffffff dup r> [ (crc32) ] each bitxor ; : file-crc32 ( path -- n ) file-contents crc32 ; + +: lines-crc32 ( seq -- n ) + HEX: ffffffff tuck [ [ (crc32) ] each ] reduce bitxor ; diff --git a/core/parser/parser.factor b/core/parser/parser.factor index 235d0e935a..2579542af0 100644 --- a/core/parser/parser.factor +++ b/core/parser/parser.factor @@ -382,7 +382,7 @@ SYMBOL: parse-hook dup file set source-file-definitions clone old-definitions set ] [ drop ] if - contents \ contents set ; + lines \ lines set ; : smudged-usage-warning ( usages removed -- ) parser-notes? [ @@ -426,7 +426,7 @@ SYMBOL: parse-hook file get dup [ [ record-form ] keep [ record-modified ] keep - [ \ contents get record-checksum ] keep + [ \ lines get record-checksum ] keep record-definitions forget-smudged ] [ @@ -443,7 +443,7 @@ SYMBOL: parse-hook [ [ start-parsing - \ contents get string-lines parse-fresh + \ lines get parse-fresh dup finish-parsing ] [ ] [ undo-parsing ] cleanup ] no-parse-hook ; diff --git a/core/source-files/source-files.factor b/core/source-files/source-files.factor index 4df59e5dd9..f8fdc45fe9 100644 --- a/core/source-files/source-files.factor +++ b/core/source-files/source-files.factor @@ -33,8 +33,8 @@ uses definitions ; dup source-file-path ?resource-path file-modified swap set-source-file-modified ; -: record-checksum ( source-file contents -- ) - crc32 swap set-source-file-checksum ; +: record-checksum ( source-file lines -- ) + lines-crc32 swap set-source-file-checksum ; : (xref-source) ( source-file -- pathname uses ) dup source-file-path swap source-file-uses @@ -63,7 +63,7 @@ uses definitions ; : reset-checksums ( -- ) source-files get [ swap ?resource-path dup exists? - [ file-contents record-checksum ] [ 2drop ] if + [ file-lines record-checksum ] [ 2drop ] if ] assoc-each ; M: pathname where pathname-string 1 2array ; From 990bb86295279523c989fd9c9c2749bcc420b181 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 4 Jan 2008 20:51:02 -0600 Subject: [PATCH 2/6] Fix the reverse-complement benchmark paths --- .../benchmark/reverse-complement/reverse-complement.factor | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor index 4da3972e34..049fcdd9cb 100644 --- a/extra/benchmark/reverse-complement/reverse-complement.factor +++ b/extra/benchmark/reverse-complement/reverse-complement.factor @@ -37,9 +37,9 @@ HINTS: do-line vector string ; ] with-stream ; : reverse-complement-main ( -- ) - "reverse-complement-in.txt" - "reverse-complement-out.txt" - [ home swap path+ ] 2apply + "extra/benchmark/reverse-complement-test-in.txt" + "extra/benchmark/reverse-complement-out.txt" + [ resource-path ] 2apply reverse-complement ; MAIN: reverse-complement-main From df07e61b7b0fbcdb247d5bd618980f3cb537dfb8 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 4 Jan 2008 20:55:58 -0600 Subject: [PATCH 3/6] fix path --- extra/benchmark/reverse-complement/reverse-complement.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor index 049fcdd9cb..1792f42381 100644 --- a/extra/benchmark/reverse-complement/reverse-complement.factor +++ b/extra/benchmark/reverse-complement/reverse-complement.factor @@ -38,7 +38,7 @@ HINTS: do-line vector string ; : reverse-complement-main ( -- ) "extra/benchmark/reverse-complement-test-in.txt" - "extra/benchmark/reverse-complement-out.txt" + "extra/benchmark/reverse-complement-test-out.txt" [ resource-path ] 2apply reverse-complement ; From 21a489673661dcc487b4c242c21c12dfe8850a21 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 4 Jan 2008 21:02:09 -0600 Subject: [PATCH 4/6] Move reverse-complement files to reverse-complement/ --- .../reverse-complement-test-in.txt | 171 ++++++++++++++++++ .../reverse-complement-test-out.txt | 171 ++++++++++++++++++ .../reverse-complement.factor | 4 +- 3 files changed, 344 insertions(+), 2 deletions(-) create mode 100644 extra/benchmark/reverse-complement/reverse-complement-test-in.txt create mode 100644 extra/benchmark/reverse-complement/reverse-complement-test-out.txt diff --git a/extra/benchmark/reverse-complement/reverse-complement-test-in.txt b/extra/benchmark/reverse-complement/reverse-complement-test-in.txt new file mode 100644 index 0000000000..f1caba0d62 --- /dev/null +++ b/extra/benchmark/reverse-complement/reverse-complement-test-in.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA +TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT +AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG +GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG +CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT +GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA +GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA +TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG +AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA +GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT +AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC +AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG +GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC +CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG +AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT +TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA +TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT +GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG +TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT +CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG +CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG +TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA +CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG +AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG +GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC +TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA +TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA +GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT +GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC +ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT +TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC +CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG +CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG +GGCGACAGAGCGAGACTCCG +>TWO IUB ambiguity codes +cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg +tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa +NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt +cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga +gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa +HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca +tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt +tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt +acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct +tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt +gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa +accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt +RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt +tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag +cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg +ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat +actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg +YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa +KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata +aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa +aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg +gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc +tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK +tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt +ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg +ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa +BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt +aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc +tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc +cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac +aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga +tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga +aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD +gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg +ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV +taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa +ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat +gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg +gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa +tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt +tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt +taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca +cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag +aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt +cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt +ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW +attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag +ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa +attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc +tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta +>THREE Homo sapiens frequency +aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga +atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc +ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc +atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa +tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca +tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag +gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat +tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt +gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc +gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc +atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc +taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta +ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg +acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag +ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg +ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt +cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg +ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt +aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag +attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac +acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat +tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca +attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt +aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt +tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg +ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga +gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac +caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct +taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga +ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg +ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat +gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga +ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact +aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc +cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt +gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat +ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt +tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata +tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac +ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga +tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac +gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat +ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc +actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc +gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca +ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata +tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca +atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata +aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat +tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt +ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat +acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga +gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata +gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg +tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac +gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga +gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat +tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta +acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga +tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata +catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga +attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt +ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt +ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg +gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa +tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg +tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct +ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc +gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta +ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact +tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc +ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc +tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt +ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca +actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac +gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc +gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag +accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga +gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct +cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta +tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat +atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt +ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta +ggaagtgaaaagataaatat diff --git a/extra/benchmark/reverse-complement/reverse-complement-test-out.txt b/extra/benchmark/reverse-complement/reverse-complement-test-out.txt new file mode 100644 index 0000000000..14d792ade8 --- /dev/null +++ b/extra/benchmark/reverse-complement/reverse-complement-test-out.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC +CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA +GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT +GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA +AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC +TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG +GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC +ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG +GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA +CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT +GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC +TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT +TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT +GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA +CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC +GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC +TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA +GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT +CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT +TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC +ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG +GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT +TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG +CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG +TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG +CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC +GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG +CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC +TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG +CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA +AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC +CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC +GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC +GTGAGCCACCGCGCCCGGCC +>TWO IUB ambiguity codes +TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA +GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT +TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA +CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT +WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA +AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG +ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT +CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG +TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA +AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA +AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA +TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC +CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC +ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG +TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA +BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA +CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC +HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT +TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA +TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT +GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG +GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA +GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT +AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV +TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC +CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA +AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA +MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA +GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC +CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT +TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT +TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM +TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR +CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT +ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA +CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG +CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA +ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY +ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT +TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC +ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA +AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT +ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA +AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA +TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD +TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC +TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG +ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN +TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA +CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG +>THREE Homo sapiens frequency +ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT +GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA +ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG +TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG +ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG +AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC +GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG +TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC +ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA +ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT +CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC +TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC +GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC +TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG +GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG +TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA +AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA +ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA +AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA +CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT +GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT +CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA +CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG +AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT +CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG +CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT +CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA +AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA +AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA +CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG +TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA +TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT +CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA +GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA +GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG +CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA +ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC +ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG +ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA +GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT +GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT +TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC +AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG +TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA +GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT +AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG +AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT +AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT +CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT +ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT +CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT +CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA +ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT +GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT +ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT +TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC +TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC +AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG +GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT +TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT +GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG +ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG +TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT +AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT +TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT +GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT +GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC +ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT +ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA +AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA +GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC +TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT +AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA +TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA +CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG +CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA +TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT +TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG +CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG +TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA +ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT +ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA +GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC +ACGATACCTGGTGAAGTGTT diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor index 1792f42381..332489abed 100644 --- a/extra/benchmark/reverse-complement/reverse-complement.factor +++ b/extra/benchmark/reverse-complement/reverse-complement.factor @@ -37,8 +37,8 @@ HINTS: do-line vector string ; ] with-stream ; : reverse-complement-main ( -- ) - "extra/benchmark/reverse-complement-test-in.txt" - "extra/benchmark/reverse-complement-test-out.txt" + "extra/benchmark/reverse-complement/reverse-complement-test-in.txt" + "extra/benchmark/reverse-complement/reverse-complement-test-out.txt" [ resource-path ] 2apply reverse-complement ; From 8ca13a55706983d25ff26e60a334e9b7228a8d63 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 4 Jan 2008 21:10:49 -0600 Subject: [PATCH 5/6] renamed two files --- .../benchmark/reverse-complement-test-in.txt | 171 ------------------ .../benchmark/reverse-complement-test-out.txt | 171 ------------------ 2 files changed, 342 deletions(-) delete mode 100644 extra/benchmark/reverse-complement-test-in.txt delete mode 100644 extra/benchmark/reverse-complement-test-out.txt diff --git a/extra/benchmark/reverse-complement-test-in.txt b/extra/benchmark/reverse-complement-test-in.txt deleted file mode 100644 index f1caba0d62..0000000000 --- a/extra/benchmark/reverse-complement-test-in.txt +++ /dev/null @@ -1,171 +0,0 @@ ->ONE Homo sapiens alu -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCG ->TWO IUB ambiguity codes -cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg -tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa -NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt -cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga -gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa -HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca -tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt -tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt -acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct -tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt -gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa -accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt -RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt -tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag -cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg -ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat -actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg -YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa -KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata -aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa -aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg -gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc -tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK -tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt -ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg -ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa -BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt -aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc -tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc -cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac -aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga -tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga -aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD -gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg -ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV -taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa -ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat -gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg -gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa -tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt -tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt -taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca -cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag -aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt -cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt -ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW -attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag -ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa -attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc -tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta ->THREE Homo sapiens frequency -aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga -atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc -ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc -atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa -tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca -tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag -gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat -tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt -gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc -gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc -atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc -taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta -ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg -acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag -ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg -ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt -cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg -ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt -aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag -attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac -acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat -tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca -attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt -aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt -tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg -ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga -gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac -caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct -taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga -ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg -ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat -gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga -ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact -aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc -cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt -gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat -ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt -tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata -tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac -ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga -tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac -gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat -ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc -actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc -gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca -ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata -tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca -atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata -aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat -tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt -ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat -acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga -gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata -gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg -tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac -gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga -gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat -tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta -acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga -tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata -catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga -attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt -ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt -ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg -gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa -tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg -tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct -ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc -gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta -ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact -tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc -ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc -tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt -ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca -actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac -gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc -gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag -accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga -gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct -cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta -tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat -atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt -ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta -ggaagtgaaaagataaatat diff --git a/extra/benchmark/reverse-complement-test-out.txt b/extra/benchmark/reverse-complement-test-out.txt deleted file mode 100644 index 14d792ade8..0000000000 --- a/extra/benchmark/reverse-complement-test-out.txt +++ /dev/null @@ -1,171 +0,0 @@ ->ONE Homo sapiens alu -CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC -CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA -GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT -GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA -AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC -TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG -GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC -ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG -GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA -CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT -GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC -TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT -TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT -GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA -CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC -GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC -TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA -GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT -CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT -TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC -ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG -GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT -TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG -CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG -TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG -CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC -GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG -CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC -TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG -CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA -AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC -CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC -GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC -GTGAGCCACCGCGCCCGGCC ->TWO IUB ambiguity codes -TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA -GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT -TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA -CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT -WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA -AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG -ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT -CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG -TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA -AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA -AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA -TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC -CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC -ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG -TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA -BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA -CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC -HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT -TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA -TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT -GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG -GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA -GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT -AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV -TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC -CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA -AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA -MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA -GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC -CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT -TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT -TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM -TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR -CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT -ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA -CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG -CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA -ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY -ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT -TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC -ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA -AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT -ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA -AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA -TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD -TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC -TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG -ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN -TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA -CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG ->THREE Homo sapiens frequency -ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT -GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA -ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG -TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG -ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG -AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC -GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG -TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC -ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA -ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT -CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC -TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC -GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC -TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG -GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG -TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA -AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA -ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA -AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA -CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT -GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT -CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA -CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG -AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT -CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG -CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT -CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA -AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA -AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA -CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG -TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA -TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT -CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA -GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA -GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG -CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA -ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC -ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG -ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA -GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT -GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT -TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC -AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG -TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA -GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT -AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG -AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT -AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT -CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT -ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT -CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT -CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA -ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT -GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT -ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT -TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC -TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC -AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG -GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT -TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT -GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG -ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG -TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT -AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT -TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT -GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT -GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC -ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT -ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA -AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA -GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC -TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT -AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA -TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA -CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG -CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA -TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT -TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG -CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG -TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA -ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT -ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA -GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC -ACGATACCTGGTGAAGTGTT From 3f1370f2b54ac50262219641f47257d43ea3fc5b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 5 Jan 2008 21:46:39 -0600 Subject: [PATCH 6/6] lines-crc32 now crcs a \n at the end of each line. it is still off by one \n if the last line doesn't have a newline, but it's good enough for the parser --- core/io/crc32/crc32.factor | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/io/crc32/crc32.factor b/core/io/crc32/crc32.factor index d6a812c3de..82af0cdb23 100644 --- a/core/io/crc32/crc32.factor +++ b/core/io/crc32/crc32.factor @@ -30,4 +30,6 @@ DEFER: crc32-table inline : file-crc32 ( path -- n ) file-contents crc32 ; : lines-crc32 ( seq -- n ) - HEX: ffffffff tuck [ [ (crc32) ] each ] reduce bitxor ; + HEX: ffffffff tuck [ + [ (crc32) ] each CHAR: \n (crc32) + ] reduce bitxor ;