From c9f24cd04f51f8fbf779f1c8415d433d34488573 Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Tue, 25 Jun 2013 15:50:42 -0700 Subject: [PATCH] spelling: some performance improvements, and minor fixes. --- extra/spelling/spelling-tests.factor | 7 ++++- extra/spelling/spelling.factor | 39 +++++++++++++++------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/extra/spelling/spelling-tests.factor b/extra/spelling/spelling-tests.factor index f32363854b..7ffc617122 100644 --- a/extra/spelling/spelling-tests.factor +++ b/extra/spelling/spelling-tests.factor @@ -1,6 +1,11 @@ -USING: spelling tools.test memoize ; +USING: kernel sequences spelling tools.test memoize ; IN: spelling.tests +{ { "bc" "ac" "ab" } } [ "abc" deletes ] unit-test +{ { "bac" "acb" } } [ "abc" transposes ] unit-test +{ t } [ "a" replaces concat ALPHABET = ] unit-test +{ 104 } [ "abc" inserts length ] unit-test + MEMO: test-dictionary ( -- assoc ) "vocab:spelling/test.txt" load-dictionary ; diff --git a/extra/spelling/spelling.factor b/extra/spelling/spelling.factor index 6b0868765a..0bf702da5a 100644 --- a/extra/spelling/spelling.factor +++ b/extra/spelling/spelling.factor @@ -1,35 +1,38 @@ USING: arrays ascii assocs combinators combinators.smart fry http.client io.encodings.ascii io.files io.files.temp kernel literals locals math math.ranges math.statistics memoize -sequences sets sorting splitting strings urls ; +sequences sequences.private sets sorting splitting strings urls ; IN: spelling ! http://norvig.com/spell-correct.html -CONSTANT: ALPHABET $[ - "abcdefghijklmnopqrstuvwxyz" [ 1string ] { } map-as -] +CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz" -: splits ( word -- splits ) - dup length [0,b] [ cut 2array ] with map ; +: deletes ( word -- edits ) + [ length iota ] keep '[ _ remove-nth ] map ; -: deletes ( splits -- edits ) - [ second length 0 > ] filter [ first2 rest append ] map ; +: transposes ( word -- edits ) + [ length [1,b) ] keep '[ + dup 1 - _ clone [ exchange-unsafe ] keep + ] map ; -: transposes ( splits -- edits ) - [ second length 1 > ] filter - [ first2 2 cut swap reverse! glue ] map ; +: replaces ( word -- edits ) + [ length iota ] keep '[ + ALPHABET [ + swap _ clone [ set-nth-unsafe ] keep + ] with { } map-as + ] map concat ; -: replaces ( splits -- edits ) - [ second length 0 > ] filter ALPHABET - [ [ first2 rest ] [ glue ] bi* ] cartesian-map concat ; - -: inserts ( splits -- edits ) - ALPHABET [ [ first2 ] [ glue ] bi* ] cartesian-map concat ; +: inserts ( word -- edits ) + [ length [0,b] ] keep '[ + CHAR: ? over _ insert-nth ALPHABET swap [ + swapd clone [ set-nth-unsafe ] keep + ] curry with { } map-as + ] map concat ; : edits1 ( word -- edits ) [ - splits { + { [ deletes ] [ transposes ] [ replaces ]