handle boundary cases better in suffix-arrays. more unit tests
parent
4c502165e7
commit
64af1a9e67
|
@ -8,7 +8,7 @@ HELP: >suffix-array
|
||||||
{ $values
|
{ $values
|
||||||
{ "seq" sequence }
|
{ "seq" sequence }
|
||||||
{ "array" array } }
|
{ "array" array } }
|
||||||
{ $description "Creates a suffix array from the input sequence." } ;
|
{ $description "Creates a suffix array from the input sequence. Suffix arrays are arrays of slices." } ;
|
||||||
|
|
||||||
HELP: SA{
|
HELP: SA{
|
||||||
{ $description "Creates a new literal suffix array at parse-time." } ;
|
{ $description "Creates a new literal suffix array at parse-time." } ;
|
||||||
|
@ -21,15 +21,15 @@ HELP: suffixes
|
||||||
|
|
||||||
HELP: from-to
|
HELP: from-to
|
||||||
{ $values
|
{ $values
|
||||||
{ "index" integer } { "suffix-array" "a suffix-array" } { "begin" string }
|
{ "index" integer } { "suffix-array" "a suffix-array" } { "begin" sequence }
|
||||||
{ "from" integer } { "to" integer } }
|
{ "from" integer } { "to" integer } }
|
||||||
{ $notes "Slices are [m,n) and we want (m,n) so we increment." } ;
|
{ $notes "Slices are [m,n) and we want (m,n) so we increment." } ;
|
||||||
|
|
||||||
HELP: query
|
HELP: query
|
||||||
{ $values
|
{ $values
|
||||||
{ "begin" string } { "suffix-array" "a suffix-array" }
|
{ "begin" sequence } { "suffix-array" "a suffix-array" }
|
||||||
{ "matches" array } }
|
{ "matches" array } }
|
||||||
{ $description "Returns " } ;
|
{ $description "Returns a sequence of sequences from the suffix-array that contain the input sequence. An empty array is returned when there are no matches." } ;
|
||||||
|
|
||||||
ARTICLE: "suffix-arrays" "Suffix arrays"
|
ARTICLE: "suffix-arrays" "Suffix arrays"
|
||||||
"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences. This suffix array implementation is a sorted array of suffixes. Querying it for matches uses binary search for efficiency." $nl
|
"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences. This suffix array implementation is a sorted array of suffixes. Querying it for matches uses binary search for efficiency." $nl
|
||||||
|
|
|
@ -4,8 +4,8 @@ USING: tools.test suffix-arrays kernel namespaces sequences ;
|
||||||
IN: suffix-arrays.tests
|
IN: suffix-arrays.tests
|
||||||
|
|
||||||
! built from [ all-words 10 head [ name>> ] map ]
|
! built from [ all-words 10 head [ name>> ] map ]
|
||||||
[ ] [
|
[ ] [
|
||||||
SA{
|
{
|
||||||
"run-tests"
|
"run-tests"
|
||||||
"must-fail-with"
|
"must-fail-with"
|
||||||
"test-all"
|
"test-all"
|
||||||
|
@ -16,15 +16,23 @@ IN: suffix-arrays.tests
|
||||||
"this-test"
|
"this-test"
|
||||||
"(unit-test)"
|
"(unit-test)"
|
||||||
"unit-test"
|
"unit-test"
|
||||||
} "suffix-array" set
|
} >suffix-array "suffix-array" set
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[ "suffix-array" get "" swap query ] must-fail
|
[ t ]
|
||||||
|
[ "suffix-array" get "" swap query empty? not ] unit-test
|
||||||
|
|
||||||
[ SA{ } "something" swap query ] must-fail
|
[ { } ]
|
||||||
|
[ SA{ } "something" swap query ] unit-test
|
||||||
|
|
||||||
[ V{ "unit-test" "(unit-test)" } ]
|
[ V{ "unit-test" "(unit-test)" } ]
|
||||||
[ "suffix-array" get "unit-test" swap query ] unit-test
|
[ "suffix-array" get "unit-test" swap query ] unit-test
|
||||||
|
|
||||||
[ t ]
|
[ t ]
|
||||||
[ "suffix-array" get "something else" swap query empty? ] unit-test
|
[ "suffix-array" get "something else" swap query empty? ] unit-test
|
||||||
|
|
||||||
|
[ V{ "rofl" } ] [ SA{ "rofl" } "r" swap query ] unit-test
|
||||||
|
[ V{ "rofl" } ] [ SA{ "rofl" } "o" swap query ] unit-test
|
||||||
|
[ V{ "rofl" } ] [ SA{ "rofl" } "f" swap query ] unit-test
|
||||||
|
[ V{ "rofl" } ] [ SA{ "rofl" } "l" swap query ] unit-test
|
||||||
|
[ V{ } ] [ SA{ "rofl" } "t" swap query ] unit-test
|
||||||
|
|
|
@ -11,13 +11,22 @@ IN: suffix-arrays
|
||||||
: prefix<=> ( begin seq -- <=> )
|
: prefix<=> ( begin seq -- <=> )
|
||||||
[ <=> ] [ swap head? ] 2bi [ drop +eq+ ] when ;
|
[ <=> ] [ swap head? ] 2bi [ drop +eq+ ] when ;
|
||||||
|
|
||||||
: find-index ( begin suffix-array -- index )
|
: find-index ( begin suffix-array -- index/f )
|
||||||
[ prefix<=> ] with search drop ;
|
[ prefix<=> ] with search drop ;
|
||||||
|
|
||||||
: from-to ( index begin suffix-array -- from to )
|
: from-to ( index begin suffix-array -- from/f to/f )
|
||||||
swap '[ _ head? not ]
|
swap '[ _ head? not ]
|
||||||
[ find-last-from drop 1+ ]
|
[ find-last-from drop dup [ 1+ ] when ]
|
||||||
[ find-from drop ] 3bi ;
|
[ find-from drop ] 3bi ;
|
||||||
|
|
||||||
|
: <funky-slice> ( from/f to/f seq -- slice )
|
||||||
|
[
|
||||||
|
tuck
|
||||||
|
[ drop [ 0 ] unless* ]
|
||||||
|
[ dupd length ? ] 2bi*
|
||||||
|
[ min ] keep
|
||||||
|
] keep <slice> ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: >suffix-array ( seq -- array )
|
: >suffix-array ( seq -- array )
|
||||||
|
@ -26,5 +35,6 @@ PRIVATE>
|
||||||
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
|
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
|
||||||
|
|
||||||
: query ( begin suffix-array -- matches )
|
: query ( begin suffix-array -- matches )
|
||||||
[ [ find-index ] 2keep from-to [ min ] keep ] keep
|
2dup find-index
|
||||||
<slice> [ seq>> ] map prune ;
|
[ -rot [ from-to ] keep <funky-slice> [ seq>> ] map prune ]
|
||||||
|
[ 2drop { } ] if* ;
|
||||||
|
|
Loading…
Reference in New Issue