checkin for prunedtree, i wrote some docs and spaced things a little better

db4
Doug Coleman 2008-10-01 14:19:28 -05:00
parent 3360f5a3ac
commit 27c38f6d30
7 changed files with 124 additions and 0 deletions

View File

@ -0,0 +1 @@
Marc Fauconneau

View File

@ -0,0 +1,44 @@
! Copyright (C) 2008 Marc Fauconneau.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays help.markup help.syntax io.streams.string
sequences strings math ;
IN: suffix-arrays
HELP: >suffix-array
{ $values
{ "seq" sequence }
{ "array" array } }
{ $description "Creates a suffix array from the input sequence." } ;
HELP: SA{
{ $description "Creates a new literal suffix array at parse-time." } ;
HELP: suffixes
{ $values
{ "string" string }
{ "suffixes-seq" "a sequence of slices" } }
{ $description "Returns a sequence of tail slices of the input string." } ;
HELP: from-to
{ $values
{ "index" integer } { "suffix-array" "a suffix-array" } { "begin" string }
{ "from" integer } { "to" integer } }
{ $notes "Slices are [m,n) and we want (m,n) so we increment." } ;
HELP: query
{ $values
{ "begin" string } { "suffix-array" "a suffix-array" }
{ "matches" array } }
{ $description "Returns " } ;
ARTICLE: "suffix-arrays" "Suffix arrays"
"The " { $vocab-link "suffix-arrays" } " vocabulary implements the suffix array data structure for efficient lookup of subsequences." $nl
"Creating new suffix arrays:"
{ $subsection >suffix-array }
"Literal suffix arrays:"
{ $subsection POSTPONE: SA{ }
"Querying suffix arrays:"
{ $subsection query } ;
ABOUT: "suffix-arrays"

View File

@ -0,0 +1,27 @@
! Copyright (C) 2008 Marc Fauconneau.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test suffix-arrays kernel namespaces ;
IN: suffix-arrays.tests
! built from [ all-words 10 head [ name>> ] map ]
{
"run-tests"
"must-fail-with"
"test-all"
"short-effect"
"failure"
"test"
"<failure>"
"this-test"
"(unit-test)"
"unit-test"
} "strings" set
[ "strings" get >suffix-array "" swap query ] must-fail
[ { } >suffix-array "something" swap query ] must-fail
[ V{ "unit-test" "(unit-test)" } ]
[ "strings" get >suffix-array "unit-test" swap query ] unit-test
[ V{ } ] [ "strings" get >suffix-array "something else" swap query ] unit-test

View File

@ -0,0 +1,31 @@
! Copyright (C) 2008 Marc Fauconneau.
! See http://factorcode.org/license.txt for BSD license.
USING: parser kernel arrays math accessors sequences
math.vectors math.order sorting binary-search sets assocs fry ;
IN: suffix-arrays
! this suffix array is a sorted array of suffixes
! query is efficient through binary searches
: suffixes ( string -- suffixes-seq )
dup length [ tail-slice ] with map ;
: >suffix-array ( seq -- array )
[ suffixes ] map concat natural-sort ;
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
: prefix<=> ( seq begin -- <=> )
[ swap <=> ] [ head? ] 2bi [ drop +eq+ ] when ;
: find-index ( suffix-array begin -- index )
'[ _ prefix<=> ] search drop ;
: from-to ( index suffix-array begin -- from to )
'[ _ head? not ]
[ find-last-from drop 1+ ]
[ find-from drop ] 3bi ;
: query ( begin suffix-array -- matches )
[ swap [ find-index ] 2keep from-to [ min ] keep ] keep
<slice> [ seq>> ] map prune ;

View File

@ -0,0 +1 @@
Suffix arrays

1
extra/suffix-arrays/tags.txt Executable file
View File

@ -0,0 +1 @@
collections

View File

@ -0,0 +1,19 @@
! Copyright (C) 2008 Marc Fauconneau.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays math accessors sequences math.vectors
math.order sorting binary-search sets assocs fry suffix-arrays ;
IN: suffix-arrays.words
! to search on word names
: new-word-sa ( words -- sa )
[ name>> ] map >suffix-array ;
: name>word-map ( words -- map )
dup [ name>> V{ } clone ] H{ } map>assoc
[ '[ dup name>> _ at push ] each ] keep ;
: query-word-sa ( map begin sa -- matches ) query '[ _ at ] map concat ;
! usage example :
! clear all-words 100 head dup name>word-map "test" rot new-word-sa query .