Faster mergesort conses less and no longer does slice fiddling

db4
Slava Pestov 2008-07-15 20:48:25 -05:00
parent ad87a38ab8
commit 2a1f6885fb
3 changed files with 121 additions and 30 deletions

View File

@ -143,6 +143,14 @@ IN: optimizer.known-words
{ [ dup optimize-instance? ] [ optimize-instance ] }
} define-optimizers
! This is a special-case hack
: redundant-array-capacity-check? ( #call -- ? )
dup in-d>> first node-literal [ 0 = ] [ fixnum? ] bi and ;
\ array-capacity? {
{ [ dup redundant-array-capacity-check? ] [ [ drop t ] f splice-quot ] }
} define-optimizers
! eq? on the same object is always t
{ eq? = } {
{ { @ @ } [ 2drop t ] }

View File

@ -1,8 +1,8 @@
USING: sorting sequences kernel math math.order random
tools.test vectors ;
tools.test vectors sets ;
IN: sorting.tests
[ [ ] ] [ [ ] natural-sort ] unit-test
[ { } ] [ { } natural-sort ] unit-test
[ { 270000000 270000001 } ]
[ T{ slice f 270000000 270000002 270000002 } natural-sort ]
@ -11,7 +11,9 @@ unit-test
[ t ] [
100 [
drop
100 [ 20 random [ 1000 random ] replicate ] replicate natural-sort [ before=? ] monotonic?
100 [ 20 random [ 1000 random ] replicate ] replicate
dup natural-sort
[ set= ] [ nip [ before=? ] monotonic? ] 2bi and
] all?
] unit-test

View File

@ -4,46 +4,127 @@ USING: accessors arrays kernel math sequences vectors math.order
sequences sequences.private math.order ;
IN: sorting
DEFER: sort
! Optimized merge-sort:
!
! 1) only allocates 2 temporary arrays
! 2) first phase (interchanging pairs x[i], x[i+1] where
! x[i] > x[i+1]) is handled specially
<PRIVATE
: <iterator> 0 tail-slice ; inline
TUPLE: merge
{ seq array }
{ accum vector }
{ accum1 vector }
{ accum2 vector }
{ from1 array-capacity }
{ to1 array-capacity }
{ from2 array-capacity }
{ to2 array-capacity } ;
: this ( slice -- obj )
dup slice-from swap slice-seq nth-unsafe ; inline
: dump ( from to seq accum -- )
#! Optimize common case where to - from = 1.
>r >r 2dup swap - 1 =
[ drop r> nth-unsafe r> push ]
[ r> <slice> r> push-all ]
if ; inline
: next ( iterator -- )
dup slice-from 1+ swap set-slice-from ; inline
: l-elt [ from1>> ] [ seq>> ] bi nth-unsafe ; inline
: r-elt [ from2>> ] [ seq>> ] bi nth-unsafe ; inline
: l-done? [ from1>> ] [ to1>> ] bi number= ; inline
: r-done? [ from2>> ] [ to2>> ] bi number= ; inline
: dump-l [ [ from1>> ] [ to1>> ] [ seq>> ] tri ] [ accum>> ] bi dump ; inline
: dump-r [ [ from2>> ] [ to2>> ] [ seq>> ] tri ] [ accum>> ] bi dump ; inline
: l-next [ [ l-elt ] [ [ 1+ ] change-from1 drop ] bi ] [ accum>> ] bi push ; inline
: r-next [ [ r-elt ] [ [ 1+ ] change-from2 drop ] bi ] [ accum>> ] bi push ; inline
: decide [ [ l-elt ] [ r-elt ] bi ] dip call +lt+ eq? ; inline
: smallest ( iter1 iter2 quot -- elt )
>r over this over this r> call +lt+ eq?
-rot ? [ this ] keep next ; inline
: (merge) ( iter1 iter2 quot accum -- )
>r pick empty? [
drop nip r> push-all
] [
over empty? [
2drop r> push-all
] [
3dup smallest r> [ push ] keep (merge)
: (merge) ( merge quot -- )
over l-done? [ drop dump-r ] [
over r-done? [ drop dump-l ] [
2dup decide
[ over l-next ] [ over r-next ] if
(merge)
] if
] if ; inline
: merge ( sorted1 sorted2 quot -- result )
>r [ [ <iterator> ] bi@ ] 2keep r>
rot length rot length + <vector>
[ (merge) ] [ underlying>> ] bi ; inline
: flip-accum ( merge -- )
dup [ accum>> ] [ accum1>> ] bi eq? [
dup accum1>> underlying>> >>seq
dup accum2>> >>accum
] [
dup accum1>> >>accum
dup accum2>> underlying>> >>seq
] if
dup accum>> 0 >>length 2drop ; inline
: conquer ( first second quot -- result )
[ tuck >r >r sort r> r> sort ] keep merge ; inline
: <merge> ( seq -- merge )
\ merge new
over >vector >>accum1
swap length <vector> >>accum2
dup accum1>> underlying>> >>seq
dup accum2>> >>accum
dup accum>> 0 >>length drop ; inline
: compute-midpoint ( merge -- merge )
dup [ from1>> ] [ to2>> ] bi + 2/ >>to1 ; inline
: merging ( from to merge -- )
swap >>to2
swap >>from1
compute-midpoint
dup [ to1>> ] [ seq>> length ] bi min >>to1
dup [ to2>> ] [ seq>> length ] bi min >>to2
dup to1>> >>from2
drop ; inline
: nth-chunk ( n size -- from to ) [ * dup ] keep + ; inline
: chunks ( length size -- n ) [ align ] keep /i ; inline
: each-chunk ( length size quot -- )
[ [ chunks ] keep ] dip
[ nth-chunk ] prepose curry
each-integer ; inline
: merge ( from to merge quot -- )
[ [ merging ] keep ] dip (merge) ; inline
: sort-pass ( merge size quot -- )
[
over flip-accum
over [ seq>> length ] 2dip
] dip
[ merge ] 2curry each-chunk ; inline
: sort-loop ( merge quot -- )
2 swap
[ pick seq>> length pick > ]
[ [ dup ] [ 1 shift ] [ ] tri* [ sort-pass ] 2keep ]
[ ] while 3drop ; inline
: each-pair ( seq quot -- )
[ [ length 1+ 2/ ] keep ] dip
[ [ 1 shift dup 1+ ] dip ] prepose curry each-integer ; inline
: (sort-pairs) ( i1 i2 seq quot accum -- )
>r >r 2dup length = [
nip nth r> drop r> push
] [
tuck [ nth-unsafe ] 2bi@ 2dup r> call +gt+ eq?
[ swap ] when r> tuck [ push ] 2bi@
] if ; inline
: sort-pairs ( merge quot -- )
[ [ seq>> ] [ accum>> ] bi ] dip swap
[ (sort-pairs) ] 2curry each-pair ; inline
PRIVATE>
: sort ( seq quot -- sortedseq )
over length 1 <=
[ drop ] [ over >r >r halves r> conquer r> like ] if ;
: sort ( seq quot -- seq' )
[ <merge> ] dip
[ sort-pairs ] [ sort-loop ] [ drop accum>> underlying>> ] 2tri ;
inline
: natural-sort ( seq -- sortedseq ) [ <=> ] sort ;