diff --git a/extra/suffix-arrays/suffix-arrays-docs.factor b/extra/suffix-arrays/suffix-arrays-docs.factor index 3bea1d26fd..879839d8cd 100755 --- a/extra/suffix-arrays/suffix-arrays-docs.factor +++ b/extra/suffix-arrays/suffix-arrays-docs.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Marc Fauconneau. ! See http://factorcode.org/license.txt for BSD license. USING: arrays help.markup help.syntax io.streams.string -sequences strings math ; +sequences strings math suffix-arrays.private ; IN: suffix-arrays HELP: >suffix-array @@ -32,7 +32,7 @@ HELP: query { $description "Returns " } ; ARTICLE: "suffix-arrays" "Suffix arrays" -"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences." $nl +"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences. This suffix array implementation is a sorted array of suffixes. Querying it for matches uses binary search for efficiency." $nl "Creating new suffix arrays:" { $subsection >suffix-array } diff --git a/extra/suffix-arrays/suffix-arrays-tests.factor b/extra/suffix-arrays/suffix-arrays-tests.factor index f4efd8e002..8904d35362 100755 --- a/extra/suffix-arrays/suffix-arrays-tests.factor +++ b/extra/suffix-arrays/suffix-arrays-tests.factor @@ -1,27 +1,30 @@ ! Copyright (C) 2008 Marc Fauconneau. ! See http://factorcode.org/license.txt for BSD license. -USING: tools.test suffix-arrays kernel namespaces ; +USING: tools.test suffix-arrays kernel namespaces sequences ; IN: suffix-arrays.tests ! built from [ all-words 10 head [ name>> ] map ] -{ - "run-tests" - "must-fail-with" - "test-all" - "short-effect" - "failure" - "test" - "" - "this-test" - "(unit-test)" - "unit-test" -} "strings" set +[ ] [ + SA{ + "run-tests" + "must-fail-with" + "test-all" + "short-effect" + "failure" + "test" + "" + "this-test" + "(unit-test)" + "unit-test" + } "suffix-array" set +] unit-test -[ "strings" get >suffix-array "" swap query ] must-fail +[ "suffix-array" get "" swap query ] must-fail -[ { } >suffix-array "something" swap query ] must-fail +[ SA{ } "something" swap query ] must-fail [ V{ "unit-test" "(unit-test)" } ] -[ "strings" get >suffix-array "unit-test" swap query ] unit-test +[ "suffix-array" get "unit-test" swap query ] unit-test -[ V{ } ] [ "strings" get >suffix-array "something else" swap query ] unit-test +[ t ] +[ "suffix-array" get "something else" swap query empty? ] unit-test diff --git a/extra/suffix-arrays/suffix-arrays.factor b/extra/suffix-arrays/suffix-arrays.factor index d51548017b..2cf2076732 100755 --- a/extra/suffix-arrays/suffix-arrays.factor +++ b/extra/suffix-arrays/suffix-arrays.factor @@ -4,28 +4,27 @@ USING: parser kernel arrays math accessors sequences math.vectors math.order sorting binary-search sets assocs fry ; IN: suffix-arrays -! this suffix array is a sorted array of suffixes -! query is efficient through binary searches - + ( begin seq -- <=> ) + [ <=> ] [ swap head? ] 2bi [ drop +eq+ ] when ; + +: find-index ( begin suffix-array -- index ) + [ prefix<=> ] with search drop ; + +: from-to ( index begin suffix-array -- from to ) + swap '[ _ head? not ] + [ find-last-from drop 1+ ] + [ find-from drop ] 3bi ; +PRIVATE> + : >suffix-array ( seq -- array ) [ suffixes ] map concat natural-sort ; : SA{ \ } [ >suffix-array ] parse-literal ; parsing -: prefix<=> ( seq begin -- <=> ) - [ swap <=> ] [ head? ] 2bi [ drop +eq+ ] when ; - -: find-index ( suffix-array begin -- index ) - '[ _ prefix<=> ] search drop ; - -: from-to ( index suffix-array begin -- from to ) - '[ _ head? not ] - [ find-last-from drop 1+ ] - [ find-from drop ] 3bi ; - : query ( begin suffix-array -- matches ) - [ swap [ find-index ] 2keep from-to [ min ] keep ] keep + [ [ find-index ] 2keep from-to [ min ] keep ] keep [ seq>> ] map prune ;