79 lines
2.1 KiB
Factor
79 lines
2.1 KiB
Factor
USING: arrays ascii assocs combinators combinators.smart fry
|
|
http.client io.encodings.ascii io.files io.files.temp kernel
|
|
locals math math.statistics memoize sequences sorting splitting
|
|
strings urls ;
|
|
IN: spelling
|
|
|
|
! http://norvig.com/spell-correct.html
|
|
|
|
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
|
|
|
|
: splits ( word -- sequence )
|
|
dup length iota [ cut 2array ] with map ;
|
|
|
|
: deletes ( sequence -- sequence' )
|
|
[ second length 0 > ] filter [ first2 rest append ] map ;
|
|
|
|
: transposes ( sequence -- sequence' )
|
|
[ second length 1 > ] filter [
|
|
[
|
|
{
|
|
[ first ]
|
|
[ second second 1string ]
|
|
[ second first 1string ]
|
|
[ second 2 tail ]
|
|
} cleave
|
|
] "" append-outputs-as
|
|
] map ;
|
|
|
|
: replaces ( sequence -- sequence' )
|
|
[ second length 0 > ] filter [
|
|
[ ALPHABET ] dip first2
|
|
'[ 1string _ _ rest surround ] { } map-as
|
|
] map concat ;
|
|
|
|
: inserts ( sequence -- sequence' )
|
|
[
|
|
ALPHABET
|
|
[ [ first2 ] dip 1string glue ] with { } map-as
|
|
] map concat ;
|
|
|
|
: edits1 ( word -- edits )
|
|
[
|
|
splits {
|
|
[ deletes ]
|
|
[ transposes ]
|
|
[ replaces ]
|
|
[ inserts ]
|
|
} cleave
|
|
] append-outputs ;
|
|
|
|
: edits2 ( word -- edits )
|
|
edits1 [ edits1 ] map concat ;
|
|
|
|
: filter-known ( words dictionary -- words' )
|
|
'[ _ key? ] filter ;
|
|
|
|
:: corrections ( word dictionary -- words )
|
|
word 1array dictionary filter-known
|
|
[ word edits1 dictionary filter-known ] when-empty
|
|
[ word edits2 dictionary filter-known ] when-empty
|
|
[ dictionary at 1 or ] sort-with ;
|
|
|
|
: words ( string -- words )
|
|
>lower [ letter? not ] split-when harvest ;
|
|
|
|
: load-dictionary ( file -- assoc )
|
|
ascii file-contents words histogram ;
|
|
|
|
MEMO: default-dictionary ( -- counts )
|
|
"big.txt" temp-file dup exists?
|
|
[ URL" http://norvig.com/big.txt" over download-to ] unless
|
|
load-dictionary ;
|
|
|
|
: (correct) ( word dictionary -- word/f )
|
|
corrections [ f ] [ first ] if-empty ;
|
|
|
|
: correct ( word -- word/f )
|
|
default-dictionary (correct) ;
|