Refactor binary search

db4
Slava Pestov 2008-07-15 17:16:08 -05:00
parent b4fc1e0d5f
commit ad87a38ab8
12 changed files with 136 additions and 80 deletions

View File

@ -0,0 +1,43 @@
IN: binary-search
USING: help.markup help.syntax sequences kernel math.order ;
ARTICLE: "binary-search" "Binary search"
"The " { $emphasis "binary search" } " algorithm allows elements to be located in sorted sequence in " { $snippet "O(log n)" } " time."
{ $subsection search }
"Variants of sequence words optimized for sorted sequences:"
{ $subsection sorted-index }
{ $subsection sorted-member? }
{ $subsection sorted-memq? }
{ $see-also "order-specifiers" "sequences-sorting" } ;
ABOUT: "binary-search"
HELP: search
{ $values { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( elt -- <=> )" } } { "i" "an index, or " { $link f } } { "elt" "an element, or " { $link f } } }
{ $description "Performs a binary search on a sequence, calling the quotation to decide whether to end the search (" { $link +eq+ } "), search lower (" { $link +lt+ } ") or search higher (" { $link +gt+ } ")."
$nl
"If the sequence is non-empty, outputs the index and value of the closest match, which is either an element for which the quotation output " { $link +eq+ } ", or failing that, least element for which the quotation output " { $link +lt+ } "."
$nl
"If the sequence is empty, outputs " { $link f } " " { $link f } "." }
{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link find } "." } ;
{ find find-from find-last find-last find-last-from search } related-words
HELP: sorted-index
{ $values { "elt" object } { "seq" "a sorted sequence" } { "i" "an index, or " { $link f } } { "elt" "an element, or " { $link f } } }
{ $description "Outputs the index and value of the element closest to " { $snippet "elt" } " in the sequence. See " { $link search } " for details." }
{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link index } "." } ;
{ index index-from last-index last-index-from sorted-index } related-words
HELP: sorted-member?
{ $values { "elt" object } { "seq" "a sorted sequence" } { "?" "a boolean" } }
{ $description "Tests if the sorted sequence contains " { $snippet "elt" } ". Equality is tested with " { $link = } "." } ;
{ member? sorted-member? } related-words
HELP: sorted-memq?
{ $values { "elt" object } { "seq" "a sorted sequence" } { "?" "a boolean" } }
{ $description "Tests if the sorted sequence contains " { $snippet "elt" } ". Equality is tested with " { $link eq? } "." } ;
{ memq? sorted-memq? } related-words

View File

@ -0,0 +1,17 @@
IN: binary-search.tests
USING: binary-search math.order vectors kernel tools.test ;
\ sorted-member? must-infer
[ f ] [ 3 { } [ <=> ] with search drop ] unit-test
[ 0 ] [ 3 { 3 } [ <=> ] with search drop ] unit-test
[ 1 ] [ 2 { 1 2 3 } [ <=> ] with search drop ] unit-test
[ 3 ] [ 4 { 1 2 3 4 5 6 } [ <=> ] with search drop ] unit-test
[ 2 ] [ 3.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test
[ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test
[ 10 ] [ 10 20 >vector [ <=> ] with search drop ] unit-test
[ t ] [ "hello" { "alligrator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test
[ 3 ] [ "hey" { "alligrator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test
[ f ] [ "hello" { "alligrator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test
[ f ] [ "zebra" { "alligrator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test

View File

@ -0,0 +1,46 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences sequences.private accessors math
math.order combinators ;
IN: binary-search
<PRIVATE
: midpoint ( seq -- elt )
[ midpoint@ ] keep nth-unsafe ; inline
: decide ( quot seq -- quot seq <=> )
[ midpoint swap call ] 2keep rot ; inline
: finish ( quot slice -- i elt )
[ [ from>> ] [ midpoint@ ] bi + ] [ seq>> ] bi
[ drop ] [ dup ] [ ] tri* nth ; inline
: (search) ( quot seq -- i elt )
dup length 1 <= [
finish
] [
decide {
{ +eq+ [ finish ] }
{ +lt+ [ dup midpoint@ head-slice (search) ] }
{ +gt+ [ dup midpoint@ tail-slice (search) ] }
} case
] if ; inline
PRIVATE>
: search ( seq quot -- i elt )
over empty? [ 2drop f f ] [ swap <flat-slice> (search) ] if ;
inline
: natural-search ( obj seq -- i elt )
[ <=> ] with search ;
: sorted-index ( obj seq -- i )
natural-search drop ;
: sorted-member? ( obj seq -- ? )
dupd natural-search nip = ;
: sorted-memq? ( obj seq -- ? )
dupd natural-search nip eq? ;

View File

@ -243,6 +243,7 @@ $nl
{ $subsection "sequences-destructive" }
{ $subsection "sequences-stacks" }
{ $subsection "sequences-sorting" }
{ $subsection "binary-search" }
{ $subsection "sets" }
"For inner loops:"
{ $subsection "sequences-unsafe" } ;
@ -585,8 +586,6 @@ HELP: index
{ $values { "obj" object } { "seq" sequence } { "n" "an index" } }
{ $description "Outputs the index of the first element in the sequence equal to " { $snippet "obj" } ". If no element is found, outputs " { $link f } "." } ;
{ index index-from last-index last-index-from member? memq? } related-words
HELP: index-from
{ $values { "obj" object } { "i" "a start index" } { "seq" sequence } { "n" "an index" } }
{ $description "Outputs the index of the first element in the sequence equal to " { $snippet "obj" } ", starting the search from the " { $snippet "i" } "th element. If no element is found, outputs " { $link f } "." } ;

View File

@ -2,18 +2,15 @@ USING: help.markup help.syntax kernel words math
sequences math.order ;
IN: sorting
ARTICLE: "sequences-sorting" "Sorting and binary search"
"Sorting and binary search combinators all take comparator quotations with stack effect " { $snippet "( elt1 elt2 -- <=> )" } ", where the output value is one of the three " { $link "order-specifiers" } "."
ARTICLE: "sequences-sorting" "Sorting sequences"
"Sorting combinators all take comparator quotations with stack effect " { $snippet "( elt1 elt2 -- <=> )" } ", where the output value is one of the three " { $link "order-specifiers" } "."
$nl
"Sorting a sequence with a custom comparator:"
{ $subsection sort }
"Sorting a sequence with common comparators:"
{ $subsection natural-sort }
{ $subsection sort-keys }
{ $subsection sort-values }
"Binary search:"
{ $subsection binsearch }
{ $subsection binsearch* } ;
{ $subsection sort-values } ;
ABOUT: "sequences-sorting"
@ -41,24 +38,4 @@ HELP: midpoint@
{ $values { "seq" "a sequence" } { "n" integer } }
{ $description "Outputs the index of the midpoint of " { $snippet "seq" } "." } ;
HELP: midpoint
{ $values { "seq" "a sequence" } { "elt" object } }
{ $description "Outputs the element at the midpoint of a sequence." } ;
HELP: partition
{ $values { "seq" "a sequence" } { "n" integer } { "slice" slice } }
{ $description "Outputs a slice of the first or second half of the sequence, respectively, depending on the integer's sign." } ;
HELP: binsearch
{ $values { "elt" object } { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( obj1 obj2 -- <=> )" } } { "i" "the index of the search result" } }
{ $description "Given a sequence that is sorted with respect to the " { $snippet "quot" } " comparator, searches for an element equal to " { $snippet "elt" } ", or failing that, the greatest element smaller than " { $snippet "elt" } ". Comparison is performed with " { $snippet "quot" } "."
$nl
"Outputs f if the sequence is empty. If the sequence has at least one element, this word always outputs a valid index." } ;
HELP: binsearch*
{ $values { "elt" object } { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( obj1 obj2 -- <=> )" } } { "result" "the search result" } }
{ $description "Variant of " { $link binsearch } " which outputs the found element rather than its index in the sequence."
$nl
"Outputs " { $link f } " if the sequence is empty. If the sequence has at least one element, this word always outputs a sequence element." } ;
{ <=> compare natural-sort sort-keys sort-values } related-words

View File

@ -16,13 +16,3 @@ unit-test
] unit-test
[ ] [ { 1 2 } [ 2drop 1 ] sort drop ] unit-test
[ 3 ] [ { 1 2 3 4 } midpoint ] unit-test
[ f ] [ 3 { } [ <=> ] binsearch ] unit-test
[ 0 ] [ 3 { 3 } [ <=> ] binsearch ] unit-test
[ 1 ] [ 2 { 1 2 3 } [ <=> ] binsearch ] unit-test
[ 3 ] [ 4 { 1 2 3 4 5 6 } [ <=> ] binsearch ] unit-test
[ 2 ] [ 3.5 { 1 2 3 4 5 6 7 8 } [ <=> ] binsearch ] unit-test
[ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] binsearch ] unit-test
[ 10 ] [ 10 20 >vector [ <=> ] binsearch ] unit-test

View File

@ -1,4 +1,4 @@
! Copyright (C) 2005, 2007 Slava Pestov.
! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays kernel math sequences vectors math.order
sequences sequences.private math.order ;
@ -53,25 +53,3 @@ PRIVATE>
: sort-values ( seq -- sortedseq ) [ [ second ] compare ] sort ;
: sort-pair ( a b -- c d ) 2dup after? [ swap ] when ;
: midpoint ( seq -- elt )
[ midpoint@ ] keep nth-unsafe ; inline
: partition ( seq n -- slice )
+gt+ eq? not swap halves ? ; inline
: (binsearch) ( elt quot seq -- i )
dup length 1 <= [
slice-from 2nip
] [
[ midpoint swap call ] 3keep roll dup +eq+ eq?
[ drop dup slice-from swap midpoint@ + 2nip ]
[ partition (binsearch) ] if
] if ; inline
: binsearch ( elt seq quot -- i )
swap dup empty?
[ 3drop f ] [ <flat-slice> (binsearch) ] if ; inline
: binsearch* ( elt seq quot -- result )
over >r binsearch [ r> ?nth ] [ r> drop f ] if* ; inline

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs sequences sorting math math.order
arrays combinators kernel ;
USING: accessors assocs sequences sorting binary-search math
math.order arrays combinators kernel ;
IN: cords
<PRIVATE
@ -23,7 +23,7 @@ M: multi-cord length count>> ;
M: multi-cord virtual@
dupd
seqs>> [ first <=> ] binsearch*
seqs>> [ first <=> ] with search nip
[ first - ] [ second ] bi ;
M: multi-cord virtual-seq

View File

@ -1,5 +1,7 @@
USING: kernel sequences arrays accessors grouping
math.order sorting math assocs locals namespaces ;
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences arrays accessors grouping math.order
sorting binary-search math assocs locals namespaces ;
IN: interval-maps
TUPLE: interval-map array ;
@ -7,7 +9,7 @@ TUPLE: interval-map array ;
<PRIVATE
: find-interval ( key interval-map -- interval-node )
[ first <=> ] binsearch* ;
[ first <=> ] with search nip ;
: interval-contains? ( key interval-node -- ? )
first2 between? ;

View File

@ -1,7 +1,8 @@
! Copyright (C) 2007 Samuel Tardieu.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators kernel lists.lazy math math.functions math.miller-rabin
math.order math.primes.list math.ranges sequences sorting ;
math.order math.primes.list math.ranges sequences sorting
binary-search ;
IN: math.primes
<PRIVATE
@ -13,14 +14,14 @@ PRIVATE>
: next-prime ( n -- p )
dup 999983 < [
primes-under-million [ [ <=> ] binsearch 1+ ] keep nth
primes-under-million [ natural-search drop 1+ ] keep nth
] [
next-odd find-prime-miller-rabin
] if ; foldable
: prime? ( n -- ? )
dup 1000000 < [
dup primes-under-million [ <=> ] binsearch* =
dup primes-under-million natural-search nip =
] [
miller-rabin
] if ; foldable
@ -37,7 +38,7 @@ PRIVATE>
{
{ [ dup 2 < ] [ drop { } ] }
{ [ dup 1000003 < ]
[ primes-under-million [ [ <=> ] binsearch 1+ 0 swap ] keep <slice> ] }
[ primes-under-million [ natural-search drop 1+ 0 swap ] keep <slice> ] }
[ primes-under-million 1000003 lprimes-from
rot [ <= ] curry lwhile list>array append ]
} cond ; foldable
@ -45,6 +46,6 @@ PRIVATE>
: primes-between ( low high -- seq )
primes-upto
[ 1- next-prime ] dip
[ [ <=> ] binsearch ] keep [ length ] keep <slice> ; foldable
[ natural-search drop ] keep [ length ] keep <slice> ; foldable
: coprime? ( a b -- ? ) gcd nip 1 = ; foldable

View File

@ -1,9 +1,9 @@
! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays hashtables kernel models math namespaces
sequences quotations math.vectors combinators sorting vectors
dlists dequeues models threads concurrency.flags
math.order math.geometry.rect ;
sequences quotations math.vectors combinators sorting
binary-search vectors dlists dequeues models threads
concurrency.flags math.order math.geometry.rect ;
IN: ui.gadgets
@ -70,12 +70,15 @@ GENERIC: children-on ( rect/point gadget -- seq )
M: gadget children-on nip children>> ;
: (fast-children-on) ( dim axis gadgets -- i )
swapd [ rect-loc v- over v. 0 <=> ] binsearch nip ;
: ((fast-children-on)) ( gadget dim axis -- <=> )
[ swap loc>> v- ] dip v. 0 <=> ;
: (fast-children-on) ( dim axis children -- i )
-rot [ ((fast-children-on)) ] 2curry search drop ;
: fast-children-on ( rect axis children -- from to )
[ >r >r rect-loc r> r> (fast-children-on) 0 or ]
[ >r >r dup rect-loc swap rect-dim v+ r> r> (fast-children-on) ?1+ ]
[ [ rect-loc ] 2dip (fast-children-on) 0 or ]
[ [ rect-bounds v+ ] 2dip (fast-children-on) ?1+ ]
3bi ;
: inside? ( bounds gadget -- ? )

View File

@ -50,4 +50,4 @@ MEMO: cities-named-in ( name state -- cities )
] with with filter ;
: find-zip-code ( code -- city )
cities [ first-zip>> <=> ] binsearch* ;
cities [ first-zip>> <=> ] with search nip ;