factor/basis/soundex/soundex.factor

33 lines
1011 B
Factor

! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: sequences grouping assocs kernel ascii ascii tr ;
IN: soundex
TR: soundex-tr
ch>upper
"AEHIOUWYBFPVCGJKQSXZDTLMNR"
"00000000111122222222334556" ;
: remove-duplicates ( seq -- seq' )
#! Remove _consecutive_ duplicates (unlike prune which removes
#! all duplicates).
[ 2 <clumps> [ = not ] assoc-filter values ] [ first ] bi prefix ;
: first>upper ( seq -- seq' ) 1 head >upper ;
: trim-first ( seq -- seq' ) dup first [ = ] curry trim-left ;
: remove-zeroes ( seq -- seq' ) CHAR: 0 swap remove ;
: remove-non-alpha ( seq -- seq' ) [ alpha? ] filter ;
: pad-4 ( first seq -- seq' ) "000" 3append 4 head ;
: soundex ( string -- soundex )
remove-non-alpha [ f ] [
[ first>upper ]
[
soundex-tr
[ "" ] [ trim-first ] if-empty
[ "" ] [ remove-duplicates ] if-empty
remove-zeroes
] bi
pad-4
] if-empty ;