a little bit of cleanup, trying to get rid of some stack shuffling
parent
27c38f6d30
commit
4c502165e7
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Marc Fauconneau.
|
! Copyright (C) 2008 Marc Fauconneau.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: arrays help.markup help.syntax io.streams.string
|
USING: arrays help.markup help.syntax io.streams.string
|
||||||
sequences strings math ;
|
sequences strings math suffix-arrays.private ;
|
||||||
IN: suffix-arrays
|
IN: suffix-arrays
|
||||||
|
|
||||||
HELP: >suffix-array
|
HELP: >suffix-array
|
||||||
|
@ -32,7 +32,7 @@ HELP: query
|
||||||
{ $description "Returns " } ;
|
{ $description "Returns " } ;
|
||||||
|
|
||||||
ARTICLE: "suffix-arrays" "Suffix arrays"
|
ARTICLE: "suffix-arrays" "Suffix arrays"
|
||||||
"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences." $nl
|
"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences. This suffix array implementation is a sorted array of suffixes. Querying it for matches uses binary search for efficiency." $nl
|
||||||
|
|
||||||
"Creating new suffix arrays:"
|
"Creating new suffix arrays:"
|
||||||
{ $subsection >suffix-array }
|
{ $subsection >suffix-array }
|
||||||
|
|
|
@ -1,27 +1,30 @@
|
||||||
! Copyright (C) 2008 Marc Fauconneau.
|
! Copyright (C) 2008 Marc Fauconneau.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: tools.test suffix-arrays kernel namespaces ;
|
USING: tools.test suffix-arrays kernel namespaces sequences ;
|
||||||
IN: suffix-arrays.tests
|
IN: suffix-arrays.tests
|
||||||
|
|
||||||
! built from [ all-words 10 head [ name>> ] map ]
|
! built from [ all-words 10 head [ name>> ] map ]
|
||||||
{
|
[ ] [
|
||||||
"run-tests"
|
SA{
|
||||||
"must-fail-with"
|
"run-tests"
|
||||||
"test-all"
|
"must-fail-with"
|
||||||
"short-effect"
|
"test-all"
|
||||||
"failure"
|
"short-effect"
|
||||||
"test"
|
"failure"
|
||||||
"<failure>"
|
"test"
|
||||||
"this-test"
|
"<failure>"
|
||||||
"(unit-test)"
|
"this-test"
|
||||||
"unit-test"
|
"(unit-test)"
|
||||||
} "strings" set
|
"unit-test"
|
||||||
|
} "suffix-array" set
|
||||||
|
] unit-test
|
||||||
|
|
||||||
[ "strings" get >suffix-array "" swap query ] must-fail
|
[ "suffix-array" get "" swap query ] must-fail
|
||||||
|
|
||||||
[ { } >suffix-array "something" swap query ] must-fail
|
[ SA{ } "something" swap query ] must-fail
|
||||||
|
|
||||||
[ V{ "unit-test" "(unit-test)" } ]
|
[ V{ "unit-test" "(unit-test)" } ]
|
||||||
[ "strings" get >suffix-array "unit-test" swap query ] unit-test
|
[ "suffix-array" get "unit-test" swap query ] unit-test
|
||||||
|
|
||||||
[ V{ } ] [ "strings" get >suffix-array "something else" swap query ] unit-test
|
[ t ]
|
||||||
|
[ "suffix-array" get "something else" swap query empty? ] unit-test
|
||||||
|
|
|
@ -4,28 +4,27 @@ USING: parser kernel arrays math accessors sequences
|
||||||
math.vectors math.order sorting binary-search sets assocs fry ;
|
math.vectors math.order sorting binary-search sets assocs fry ;
|
||||||
IN: suffix-arrays
|
IN: suffix-arrays
|
||||||
|
|
||||||
! this suffix array is a sorted array of suffixes
|
<PRIVATE
|
||||||
! query is efficient through binary searches
|
|
||||||
|
|
||||||
: suffixes ( string -- suffixes-seq )
|
: suffixes ( string -- suffixes-seq )
|
||||||
dup length [ tail-slice ] with map ;
|
dup length [ tail-slice ] with map ;
|
||||||
|
|
||||||
|
: prefix<=> ( begin seq -- <=> )
|
||||||
|
[ <=> ] [ swap head? ] 2bi [ drop +eq+ ] when ;
|
||||||
|
|
||||||
|
: find-index ( begin suffix-array -- index )
|
||||||
|
[ prefix<=> ] with search drop ;
|
||||||
|
|
||||||
|
: from-to ( index begin suffix-array -- from to )
|
||||||
|
swap '[ _ head? not ]
|
||||||
|
[ find-last-from drop 1+ ]
|
||||||
|
[ find-from drop ] 3bi ;
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
: >suffix-array ( seq -- array )
|
: >suffix-array ( seq -- array )
|
||||||
[ suffixes ] map concat natural-sort ;
|
[ suffixes ] map concat natural-sort ;
|
||||||
|
|
||||||
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
|
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
|
||||||
|
|
||||||
: prefix<=> ( seq begin -- <=> )
|
|
||||||
[ swap <=> ] [ head? ] 2bi [ drop +eq+ ] when ;
|
|
||||||
|
|
||||||
: find-index ( suffix-array begin -- index )
|
|
||||||
'[ _ prefix<=> ] search drop ;
|
|
||||||
|
|
||||||
: from-to ( index suffix-array begin -- from to )
|
|
||||||
'[ _ head? not ]
|
|
||||||
[ find-last-from drop 1+ ]
|
|
||||||
[ find-from drop ] 3bi ;
|
|
||||||
|
|
||||||
: query ( begin suffix-array -- matches )
|
: query ( begin suffix-array -- matches )
|
||||||
[ swap [ find-index ] 2keep from-to [ min ] keep ] keep
|
[ [ find-index ] 2keep from-to [ min ] keep ] keep
|
||||||
<slice> [ seq>> ] map prune ;
|
<slice> [ seq>> ] map prune ;
|
||||||
|
|
Loading…
Reference in New Issue