spelling: some performance improvements, and minor fixes.

db4
John Benediktsson 2013-06-25 15:50:42 -07:00
parent aa779eaba8
commit c9f24cd04f
2 changed files with 27 additions and 19 deletions

View File

@ -1,6 +1,11 @@
USING: spelling tools.test memoize ; USING: kernel sequences spelling tools.test memoize ;
IN: spelling.tests IN: spelling.tests
{ { "bc" "ac" "ab" } } [ "abc" deletes ] unit-test
{ { "bac" "acb" } } [ "abc" transposes ] unit-test
{ t } [ "a" replaces concat ALPHABET = ] unit-test
{ 104 } [ "abc" inserts length ] unit-test
MEMO: test-dictionary ( -- assoc ) MEMO: test-dictionary ( -- assoc )
"vocab:spelling/test.txt" load-dictionary ; "vocab:spelling/test.txt" load-dictionary ;

View File

@ -1,35 +1,38 @@
USING: arrays ascii assocs combinators combinators.smart fry USING: arrays ascii assocs combinators combinators.smart fry
http.client io.encodings.ascii io.files io.files.temp kernel http.client io.encodings.ascii io.files io.files.temp kernel
literals locals math math.ranges math.statistics memoize literals locals math math.ranges math.statistics memoize
sequences sets sorting splitting strings urls ; sequences sequences.private sets sorting splitting strings urls ;
IN: spelling IN: spelling
! http://norvig.com/spell-correct.html ! http://norvig.com/spell-correct.html
CONSTANT: ALPHABET $[ CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
"abcdefghijklmnopqrstuvwxyz" [ 1string ] { } map-as
]
: splits ( word -- splits ) : deletes ( word -- edits )
dup length [0,b] [ cut 2array ] with map ; [ length iota ] keep '[ _ remove-nth ] map ;
: deletes ( splits -- edits ) : transposes ( word -- edits )
[ second length 0 > ] filter [ first2 rest append ] map ; [ length [1,b) ] keep '[
dup 1 - _ clone [ exchange-unsafe ] keep
] map ;
: transposes ( splits -- edits ) : replaces ( word -- edits )
[ second length 1 > ] filter [ length iota ] keep '[
[ first2 2 cut swap reverse! glue ] map ; ALPHABET [
swap _ clone [ set-nth-unsafe ] keep
] with { } map-as
] map concat ;
: replaces ( splits -- edits ) : inserts ( word -- edits )
[ second length 0 > ] filter ALPHABET [ length [0,b] ] keep '[
[ [ first2 rest ] [ glue ] bi* ] cartesian-map concat ; CHAR: ? over _ insert-nth ALPHABET swap [
swapd clone [ set-nth-unsafe ] keep
: inserts ( splits -- edits ) ] curry with { } map-as
ALPHABET [ [ first2 ] [ glue ] bi* ] cartesian-map concat ; ] map concat ;
: edits1 ( word -- edits ) : edits1 ( word -- edits )
[ [
splits { {
[ deletes ] [ deletes ]
[ transposes ] [ transposes ]
[ replaces ] [ replaces ]