diff --git a/extra/suffix-arrays/suffix-arrays-docs.factor b/extra/suffix-arrays/suffix-arrays-docs.factor index 879839d8cd..55a78b2cb6 100755 --- a/extra/suffix-arrays/suffix-arrays-docs.factor +++ b/extra/suffix-arrays/suffix-arrays-docs.factor @@ -8,7 +8,7 @@ HELP: >suffix-array { $values { "seq" sequence } { "array" array } } -{ $description "Creates a suffix array from the input sequence." } ; +{ $description "Creates a suffix array from the input sequence. Suffix arrays are arrays of slices." } ; HELP: SA{ { $description "Creates a new literal suffix array at parse-time." } ; @@ -21,15 +21,15 @@ HELP: suffixes HELP: from-to { $values - { "index" integer } { "suffix-array" "a suffix-array" } { "begin" string } + { "index" integer } { "suffix-array" "a suffix-array" } { "begin" sequence } { "from" integer } { "to" integer } } { $notes "Slices are [m,n) and we want (m,n) so we increment." } ; HELP: query { $values - { "begin" string } { "suffix-array" "a suffix-array" } + { "begin" sequence } { "suffix-array" "a suffix-array" } { "matches" array } } -{ $description "Returns " } ; +{ $description "Returns a sequence of sequences from the suffix-array that contain the input sequence. An empty array is returned when there are no matches." } ; ARTICLE: "suffix-arrays" "Suffix arrays" "The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences. This suffix array implementation is a sorted array of suffixes. Querying it for matches uses binary search for efficiency." $nl diff --git a/extra/suffix-arrays/suffix-arrays-tests.factor b/extra/suffix-arrays/suffix-arrays-tests.factor index 8904d35362..5149804ce6 100755 --- a/extra/suffix-arrays/suffix-arrays-tests.factor +++ b/extra/suffix-arrays/suffix-arrays-tests.factor @@ -4,8 +4,8 @@ USING: tools.test suffix-arrays kernel namespaces sequences ; IN: suffix-arrays.tests ! built from [ all-words 10 head [ name>> ] map ] -[ ] [ - SA{ +[ ] [ + { "run-tests" "must-fail-with" "test-all" @@ -16,15 +16,23 @@ IN: suffix-arrays.tests "this-test" "(unit-test)" "unit-test" - } "suffix-array" set + } >suffix-array "suffix-array" set ] unit-test -[ "suffix-array" get "" swap query ] must-fail +[ t ] +[ "suffix-array" get "" swap query empty? not ] unit-test -[ SA{ } "something" swap query ] must-fail +[ { } ] +[ SA{ } "something" swap query ] unit-test [ V{ "unit-test" "(unit-test)" } ] [ "suffix-array" get "unit-test" swap query ] unit-test [ t ] [ "suffix-array" get "something else" swap query empty? ] unit-test + +[ V{ "rofl" } ] [ SA{ "rofl" } "r" swap query ] unit-test +[ V{ "rofl" } ] [ SA{ "rofl" } "o" swap query ] unit-test +[ V{ "rofl" } ] [ SA{ "rofl" } "f" swap query ] unit-test +[ V{ "rofl" } ] [ SA{ "rofl" } "l" swap query ] unit-test +[ V{ } ] [ SA{ "rofl" } "t" swap query ] unit-test diff --git a/extra/suffix-arrays/suffix-arrays.factor b/extra/suffix-arrays/suffix-arrays.factor index 2cf2076732..719496243c 100755 --- a/extra/suffix-arrays/suffix-arrays.factor +++ b/extra/suffix-arrays/suffix-arrays.factor @@ -11,13 +11,22 @@ IN: suffix-arrays : prefix<=> ( begin seq -- <=> ) [ <=> ] [ swap head? ] 2bi [ drop +eq+ ] when ; -: find-index ( begin suffix-array -- index ) +: find-index ( begin suffix-array -- index/f ) [ prefix<=> ] with search drop ; -: from-to ( index begin suffix-array -- from to ) +: from-to ( index begin suffix-array -- from/f to/f ) swap '[ _ head? not ] - [ find-last-from drop 1+ ] + [ find-last-from drop dup [ 1+ ] when ] [ find-from drop ] 3bi ; + +: ( from/f to/f seq -- slice ) + [ + tuck + [ drop [ 0 ] unless* ] + [ dupd length ? ] 2bi* + [ min ] keep + ] keep ; + PRIVATE> : >suffix-array ( seq -- array ) @@ -26,5 +35,6 @@ PRIVATE> : SA{ \ } [ >suffix-array ] parse-literal ; parsing : query ( begin suffix-array -- matches ) - [ [ find-index ] 2keep from-to [ min ] keep ] keep - [ seq>> ] map prune ; + 2dup find-index + [ -rot [ from-to ] keep [ seq>> ] map prune ] + [ 2drop { } ] if* ;