spelling: fix splits and sorting of corrections, other cleanup.
parent
3394b6e727
commit
61c6b37e5e
|
@ -1,42 +1,31 @@
|
|||
USING: arrays ascii assocs combinators combinators.smart fry
|
||||
http.client io.encodings.ascii io.files io.files.temp kernel
|
||||
locals math math.statistics memoize sequences sorting splitting
|
||||
strings urls ;
|
||||
literals locals math math.ranges math.statistics memoize
|
||||
sequences sets sorting splitting strings urls ;
|
||||
IN: spelling
|
||||
|
||||
! http://norvig.com/spell-correct.html
|
||||
|
||||
CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
|
||||
CONSTANT: ALPHABET $[
|
||||
"abcdefghijklmnopqrstuvwxyz" [ 1string ] { } map-as
|
||||
]
|
||||
|
||||
: splits ( word -- sequence )
|
||||
dup length iota [ cut 2array ] with map ;
|
||||
: splits ( word -- splits )
|
||||
dup length [0,b] [ cut 2array ] with map ;
|
||||
|
||||
: deletes ( sequence -- sequence' )
|
||||
: deletes ( splits -- edits )
|
||||
[ second length 0 > ] filter [ first2 rest append ] map ;
|
||||
|
||||
: transposes ( sequence -- sequence' )
|
||||
[ second length 1 > ] filter [
|
||||
[
|
||||
{
|
||||
[ first ]
|
||||
[ second second 1string ]
|
||||
[ second first 1string ]
|
||||
[ second 2 tail ]
|
||||
} cleave
|
||||
] "" append-outputs-as
|
||||
] map ;
|
||||
: transposes ( splits -- edits )
|
||||
[ second length 1 > ] filter
|
||||
[ first2 2 cut swap reverse! glue ] map ;
|
||||
|
||||
: replaces ( sequence -- sequence' )
|
||||
[ second length 0 > ] filter [
|
||||
[ ALPHABET ] dip first2
|
||||
'[ 1string _ _ rest surround ] { } map-as
|
||||
] map concat ;
|
||||
: replaces ( splits -- edits )
|
||||
[ second length 0 > ] filter ALPHABET
|
||||
[ [ first2 rest ] [ glue ] bi* ] cartesian-map concat ;
|
||||
|
||||
: inserts ( sequence -- sequence' )
|
||||
[
|
||||
ALPHABET
|
||||
[ [ first2 ] dip 1string glue ] with { } map-as
|
||||
] map concat ;
|
||||
: inserts ( splits -- edits )
|
||||
ALPHABET [ [ first2 ] [ glue ] bi* ] cartesian-map concat ;
|
||||
|
||||
: edits1 ( word -- edits )
|
||||
[
|
||||
|
@ -51,14 +40,14 @@ CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
|
|||
: edits2 ( word -- edits )
|
||||
edits1 [ edits1 ] map concat ;
|
||||
|
||||
: filter-known ( words dictionary -- words' )
|
||||
: filter-known ( edits dictionary -- words )
|
||||
'[ _ key? ] filter ;
|
||||
|
||||
:: corrections ( word dictionary -- words )
|
||||
word 1array dictionary filter-known
|
||||
[ word edits1 dictionary filter-known ] when-empty
|
||||
[ word edits2 dictionary filter-known ] when-empty
|
||||
[ dictionary at 1 or ] sort-with ;
|
||||
[ dictionary at ] sort-with reverse! ;
|
||||
|
||||
: words ( string -- words )
|
||||
>lower [ letter? not ] split-when harvest ;
|
||||
|
|
Loading…
Reference in New Issue