New extra/tr/ vocab for fast translation of ASCII strings; improves reverse-complement performance by 11%; add soundex vocab which uses tr
parent
eba4b990af
commit
442bde22e5
|
@ -1,30 +1,20 @@
|
|||
USING: io io.files io.streams.duplex kernel sequences
|
||||
sequences.private strings vectors words memoize splitting
|
||||
grouping hints unicode.case continuations io.encodings.ascii ;
|
||||
grouping hints tr continuations io.encodings.ascii
|
||||
unicode.case ;
|
||||
IN: benchmark.reverse-complement
|
||||
|
||||
MEMO: trans-map ( -- str )
|
||||
256 >string
|
||||
"TGCAAKYRMBDHV" "ACGTUMRYKVHDB"
|
||||
[ pick set-nth ] 2each ;
|
||||
|
||||
: do-trans-map ( str -- )
|
||||
[ ch>upper trans-map nth ] change-each ;
|
||||
|
||||
HINTS: do-trans-map string ;
|
||||
TR: trans-map ch>upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ;
|
||||
|
||||
: translate-seq ( seq -- str )
|
||||
concat dup reverse-here dup do-trans-map ;
|
||||
concat dup reverse-here dup trans-map-fast ;
|
||||
|
||||
: show-seq ( seq -- )
|
||||
translate-seq 60 <groups> [ print ] each ;
|
||||
|
||||
: do-line ( seq line -- seq )
|
||||
dup first ">;" memq? [
|
||||
over show-seq print dup delete-all
|
||||
] [
|
||||
over push
|
||||
] if ;
|
||||
dup first ">;" memq?
|
||||
[ over show-seq print dup delete-all ] [ over push ] if ;
|
||||
|
||||
HINTS: do-line vector string ;
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,4 @@
|
|||
IN: soundex.tests
|
||||
USING: soundex tools.test ;
|
||||
|
||||
[ "S162" ] [ "supercalifrag" soundex ] unit-test
|
|
@ -0,0 +1,33 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences sequences.lib grouping assocs kernel ascii
|
||||
unicode.case tr ;
|
||||
IN: soundex
|
||||
|
||||
TR: soundex-tr
|
||||
ch>upper
|
||||
"AEHIOUWYBFPVCGJKQSXZDTLMNR"
|
||||
"00000000111122222222334556" ;
|
||||
|
||||
: remove-duplicates ( seq -- seq' )
|
||||
#! Remove _consecutive_ duplicates (unlike prune which removes
|
||||
#! all duplicates).
|
||||
[ 2 <clumps> [ = not ] assoc-filter values ] [ first ] bi prefix ;
|
||||
|
||||
: first>upper ( seq -- seq' ) 1 head >upper ;
|
||||
: trim-first ( seq -- seq' ) dup first [ = ] curry left-trim ;
|
||||
: remove-zeroes ( seq -- seq' ) CHAR: 0 swap remove ;
|
||||
: remove-non-alpha ( seq -- seq' ) [ alpha? ] filter ;
|
||||
: pad-4 ( first seq -- seq' ) "000" 3append 4 head ;
|
||||
|
||||
: soundex ( string -- soundex )
|
||||
remove-non-alpha [ f ] [
|
||||
[ first>upper ]
|
||||
[
|
||||
soundex-tr
|
||||
trim-first
|
||||
remove-duplicates
|
||||
remove-zeroes
|
||||
] bi
|
||||
pad-4
|
||||
] if-empty ;
|
|
@ -0,0 +1 @@
|
|||
Soundex is a phonetic algorithm for indexing names by sound
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1 @@
|
|||
Fast character-to-character translation of ASCII strings
|
|
@ -0,0 +1,7 @@
|
|||
IN: tr.tests
|
||||
USING: tr tools.test unicode.case ;
|
||||
|
||||
TR: tr-test ch>upper "ABC" "XYZ" ;
|
||||
|
||||
[ "XXYY" ] [ "aabb" tr-test ] unit-test
|
||||
[ "XXYY" ] [ "AABB" tr-test ] unit-test
|
|
@ -0,0 +1,35 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: byte-arrays strings sequences sequences.private
|
||||
fry kernel words parser lexer assocs ;
|
||||
IN: tr
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: compute-tr ( quot from to -- mapping )
|
||||
zip [ 256 ] 2dip '[ [ @ , at ] keep or ] B{ } map-as ; inline
|
||||
|
||||
: tr-hints ( word -- )
|
||||
{ { byte-array } { string } } "specializer" set-word-prop ;
|
||||
|
||||
: create-tr ( token -- word )
|
||||
create-in dup tr-hints ;
|
||||
|
||||
: define-tr ( word mapping -- )
|
||||
'[ [ , nth ] map ]
|
||||
(( seq -- translated ))
|
||||
define-declared ;
|
||||
|
||||
: define-fast-tr ( word mapping -- )
|
||||
'[ [ , nth-unsafe ] change-each ]
|
||||
(( seq -- ))
|
||||
define-declared ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: TR:
|
||||
scan parse-definition
|
||||
unclip-last [ unclip-last ] dip compute-tr
|
||||
[ [ create-tr ] dip define-tr ]
|
||||
[ [ "-fast" append create-tr ] dip define-fast-tr ] 2bi ;
|
||||
parsing
|
Loading…
Reference in New Issue