2008-07-09 20:25:24 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2008 Slava Pestov.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							
								
									
										
										
										
											2009-05-15 00:23:06 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								USING: sequences grouping assocs kernel ascii tr ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-07-09 20:25:24 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: soundex
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								TR: soundex-tr
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ch>upper
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "AEHIOUWYBFPVCGJKQSXZDTLMNR"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "00000000111122222222334556" ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: remove-duplicates ( seq -- seq' )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    #! Remove _consecutive_ duplicates (unlike prune which removes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    #! all duplicates).
							 | 
						
					
						
							
								
									
										
										
										
											2015-05-12 22:08:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ 2 <clumps> [ = ] assoc-reject values ] [ first ] bi prefix ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-07-09 20:25:24 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: first>upper ( seq -- seq' ) 1 head >upper ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-29 23:19:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: trim-first ( seq -- seq' ) dup first [ = ] curry trim-head ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-07-09 20:25:24 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: remove-zeroes ( seq -- seq' ) CHAR: 0 swap remove ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: remove-non-alpha ( seq -- seq' ) [ alpha? ] filter ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: pad-4 ( first seq -- seq' ) "000" 3append 4 head ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: soundex ( string -- soundex )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    remove-non-alpha [ f ] [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ first>upper ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            soundex-tr
							 | 
						
					
						
							
								
									
										
										
										
											2008-07-15 21:57:56 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ "" ] [ trim-first ] if-empty
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ "" ] [ remove-duplicates ] if-empty
							 | 
						
					
						
							
								
									
										
										
										
											2008-07-09 20:25:24 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            remove-zeroes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] bi
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pad-4
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] if-empty ;
							 |