79 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Factor
		
	
	
			
		
		
	
	
			79 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Factor
		
	
	
USING: arrays ascii assocs combinators combinators.smart fry
 | 
						|
http.client io.encodings.ascii io.files io.files.temp kernel
 | 
						|
locals math math.statistics memoize sequences sorting splitting
 | 
						|
strings urls ;
 | 
						|
IN: spelling
 | 
						|
 | 
						|
! http://norvig.com/spell-correct.html
 | 
						|
 | 
						|
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
 | 
						|
 | 
						|
: splits ( word -- sequence )
 | 
						|
    dup length iota [ cut 2array ] with map ;
 | 
						|
 | 
						|
: deletes ( sequence -- sequence' )
 | 
						|
    [ second length 0 > ] filter [ first2 rest append ] map ;
 | 
						|
 | 
						|
: transposes ( sequence -- sequence' )
 | 
						|
    [ second length 1 > ] filter [
 | 
						|
        [
 | 
						|
            {
 | 
						|
                [ first ]
 | 
						|
                [ second second 1string ]
 | 
						|
                [ second first 1string ]
 | 
						|
                [ second 2 tail ]
 | 
						|
            } cleave
 | 
						|
        ] "" append-outputs-as
 | 
						|
    ] map ;
 | 
						|
 | 
						|
: replaces ( sequence -- sequence' )
 | 
						|
    [ second length 0 > ] filter [
 | 
						|
        [ ALPHABET ] dip first2
 | 
						|
        '[ 1string _ _ rest surround ] { } map-as
 | 
						|
    ] map concat ;
 | 
						|
 | 
						|
: inserts ( sequence -- sequence' )
 | 
						|
    [
 | 
						|
        ALPHABET
 | 
						|
        [ [ first2 ] dip 1string glue ] with { } map-as
 | 
						|
    ] map concat ;
 | 
						|
 | 
						|
: edits1 ( word -- edits )
 | 
						|
    [
 | 
						|
        splits {
 | 
						|
            [ deletes ]
 | 
						|
            [ transposes ]
 | 
						|
            [ replaces ]
 | 
						|
            [ inserts ]
 | 
						|
        } cleave
 | 
						|
    ] append-outputs ;
 | 
						|
 | 
						|
: edits2 ( word -- edits )
 | 
						|
    edits1 [ edits1 ] map concat ;
 | 
						|
 | 
						|
: filter-known ( words dictionary -- words' )
 | 
						|
    '[ _ key? ] filter ;
 | 
						|
 | 
						|
:: corrections ( word dictionary -- words )
 | 
						|
    word 1array dictionary filter-known
 | 
						|
    [ word edits1 dictionary filter-known ] when-empty
 | 
						|
    [ word edits2 dictionary filter-known ] when-empty
 | 
						|
    [ dictionary at 1 or ] sort-with ;
 | 
						|
 | 
						|
: words ( string -- words )
 | 
						|
    >lower [ letter? not ] split-when harvest ;
 | 
						|
 | 
						|
: load-dictionary ( file -- assoc )
 | 
						|
    ascii file-contents words histogram ;
 | 
						|
 | 
						|
MEMO: default-dictionary ( -- counts )
 | 
						|
    "big.txt" temp-file dup exists?
 | 
						|
    [ URL" http://norvig.com/big.txt" over download-to ] unless
 | 
						|
    load-dictionary ;
 | 
						|
 | 
						|
: (correct) ( word dictionary -- word/f )
 | 
						|
    corrections ?first ;
 | 
						|
 | 
						|
: correct ( word -- word/f )
 | 
						|
    default-dictionary (correct) ;
 |