spelling: fix splits and sorting of corrections, other cleanup.

db4
John Benediktsson 2013-05-22 11:02:36 -07:00
parent 3394b6e727
commit 61c6b37e5e
1 changed files with 18 additions and 29 deletions

View File

@ -1,42 +1,31 @@
USING: arrays ascii assocs combinators combinators.smart fry USING: arrays ascii assocs combinators combinators.smart fry
http.client io.encodings.ascii io.files io.files.temp kernel http.client io.encodings.ascii io.files io.files.temp kernel
locals math math.statistics memoize sequences sorting splitting literals locals math math.ranges math.statistics memoize
strings urls ; sequences sets sorting splitting strings urls ;
IN: spelling IN: spelling
! http://norvig.com/spell-correct.html ! http://norvig.com/spell-correct.html
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz" CONSTANT: ALPHABET $[
"abcdefghijklmnopqrstuvwxyz" [ 1string ] { } map-as
]
: splits ( word -- sequence ) : splits ( word -- splits )
dup length iota [ cut 2array ] with map ; dup length [0,b] [ cut 2array ] with map ;
: deletes ( sequence -- sequence' ) : deletes ( splits -- edits )
[ second length 0 > ] filter [ first2 rest append ] map ; [ second length 0 > ] filter [ first2 rest append ] map ;
: transposes ( sequence -- sequence' ) : transposes ( splits -- edits )
[ second length 1 > ] filter [ [ second length 1 > ] filter
[ [ first2 2 cut swap reverse! glue ] map ;
{
[ first ]
[ second second 1string ]
[ second first 1string ]
[ second 2 tail ]
} cleave
] "" append-outputs-as
] map ;
: replaces ( sequence -- sequence' ) : replaces ( splits -- edits )
[ second length 0 > ] filter [ [ second length 0 > ] filter ALPHABET
[ ALPHABET ] dip first2 [ [ first2 rest ] [ glue ] bi* ] cartesian-map concat ;
'[ 1string _ _ rest surround ] { } map-as
] map concat ;
: inserts ( sequence -- sequence' ) : inserts ( splits -- edits )
[ ALPHABET [ [ first2 ] [ glue ] bi* ] cartesian-map concat ;
ALPHABET
[ [ first2 ] dip 1string glue ] with { } map-as
] map concat ;
: edits1 ( word -- edits ) : edits1 ( word -- edits )
[ [
@ -51,14 +40,14 @@ CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
: edits2 ( word -- edits ) : edits2 ( word -- edits )
edits1 [ edits1 ] map concat ; edits1 [ edits1 ] map concat ;
: filter-known ( words dictionary -- words' ) : filter-known ( edits dictionary -- words )
'[ _ key? ] filter ; '[ _ key? ] filter ;
:: corrections ( word dictionary -- words ) :: corrections ( word dictionary -- words )
word 1array dictionary filter-known word 1array dictionary filter-known
[ word edits1 dictionary filter-known ] when-empty [ word edits1 dictionary filter-known ] when-empty
[ word edits2 dictionary filter-known ] when-empty [ word edits2 dictionary filter-known ] when-empty
[ dictionary at 1 or ] sort-with ; [ dictionary at ] sort-with reverse! ;
: words ( string -- words ) : words ( string -- words )
>lower [ letter? not ] split-when harvest ; >lower [ letter? not ] split-when harvest ;