68 lines
1.9 KiB
Factor
68 lines
1.9 KiB
Factor
! Copyright (C) 2010 John Benediktsson
|
|
! See http://factorcode.org/license.txt for BSD license
|
|
USING: arrays ascii assocs combinators combinators.smart fry
|
|
http.client io.encodings.ascii io.files io.files.temp kernel
|
|
locals math math.ranges math.statistics memoize sequences
|
|
sequences.private sorting splitting urls ;
|
|
IN: spelling
|
|
|
|
! http://norvig.com/spell-correct.html
|
|
|
|
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
|
|
|
|
: deletes ( word -- edits )
|
|
[ length <iota> ] keep '[ _ remove-nth ] map ;
|
|
|
|
: transposes ( word -- edits )
|
|
[ length [1,b) ] keep
|
|
'[ dup 1 - _ clone [ exchange-unsafe ] keep ] map ;
|
|
|
|
: replace1 ( i word -- words )
|
|
[ ALPHABET ] 2dip bounds-check
|
|
'[ _ _ clone [ set-nth-unsafe ] keep ] { } map-as ;
|
|
|
|
: replaces ( word -- edits )
|
|
[ length <iota> ] keep '[ _ replace1 ] map concat ;
|
|
|
|
: inserts ( word -- edits )
|
|
[ length [0,b] ] keep
|
|
'[ CHAR: ? over _ insert-nth replace1 ] map concat ;
|
|
|
|
: edits1 ( word -- edits )
|
|
[
|
|
{
|
|
[ deletes ]
|
|
[ transposes ]
|
|
[ replaces ]
|
|
[ inserts ]
|
|
} cleave
|
|
] append-outputs ;
|
|
|
|
: edits2 ( word -- edits )
|
|
edits1 [ edits1 ] map concat ;
|
|
|
|
: filter-known ( edits dictionary -- words )
|
|
'[ _ key? ] filter ;
|
|
|
|
:: corrections ( word dictionary -- words )
|
|
word 1array dictionary filter-known
|
|
[ word edits1 dictionary filter-known ] when-empty
|
|
[ word edits2 dictionary filter-known ] when-empty
|
|
[ dictionary at ] sort-with reverse! ;
|
|
|
|
: words ( string -- words )
|
|
>lower [ letter? not ] split-when harvest ;
|
|
|
|
: load-dictionary ( file -- assoc )
|
|
ascii file-contents words histogram ;
|
|
|
|
MEMO: default-dictionary ( -- counts )
|
|
URL" http://norvig.com/big.txt" "big.txt" temp-file
|
|
[ ?download-to ] [ load-dictionary ] bi ;
|
|
|
|
: (correct) ( word dictionary -- word/f )
|
|
corrections ?first ;
|
|
|
|
: correct ( word -- word/f )
|
|
default-dictionary (correct) ;
|