factor/extra/spelling/spelling.factor

70 lines
1.9 KiB
Factor

USING: arrays ascii assocs combinators combinators.smart fry
http.client io.encodings.ascii io.files io.files.temp kernel
literals locals math math.ranges math.statistics memoize
sequences sequences.private sets sorting splitting strings urls ;
IN: spelling
! http://norvig.com/spell-correct.html
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
: deletes ( word -- edits )
[ length iota ] keep '[ _ remove-nth ] map ;
: transposes ( word -- edits )
[ length [1,b) ] keep '[
dup 1 - _ clone [ exchange-unsafe ] keep
] map ;
: replaces ( word -- edits )
[ length iota ] keep '[
ALPHABET [
swap _ clone [ set-nth-unsafe ] keep
] with { } map-as
] map concat ;
: inserts ( word -- edits )
[ length [0,b] ] keep '[
CHAR: ? over _ insert-nth ALPHABET swap [
swapd clone [ set-nth-unsafe ] keep
] curry with { } map-as
] map concat ;
: edits1 ( word -- edits )
[
{
[ deletes ]
[ transposes ]
[ replaces ]
[ inserts ]
} cleave
] append-outputs ;
: edits2 ( word -- edits )
edits1 [ edits1 ] map concat ;
: filter-known ( edits dictionary -- words )
'[ _ key? ] filter ;
:: corrections ( word dictionary -- words )
word 1array dictionary filter-known
[ word edits1 dictionary filter-known ] when-empty
[ word edits2 dictionary filter-known ] when-empty
[ dictionary at ] sort-with reverse! ;
: words ( string -- words )
>lower [ letter? not ] split-when harvest ;
: load-dictionary ( file -- assoc )
ascii file-contents words histogram ;
MEMO: default-dictionary ( -- counts )
URL" http://norvig.com/big.txt" "big.txt" temp-file
[ ?download-to ] [ load-dictionary ] bi ;
: (correct) ( word dictionary -- word/f )
corrections ?first ;
: correct ( word -- word/f )
default-dictionary (correct) ;