bloom-filters: tons of speed.

db4
John Benediktsson 2013-04-04 07:10:13 -07:00
parent 1f45b8a4e3
commit 1cd3be1fb2
2 changed files with 22 additions and 25 deletions

View File

@ -38,8 +38,6 @@ IN: bloom-filters.tests
! lot of hash codes, and it's better to do this earlier than later. ! lot of hash codes, and it's better to do this earlier than later.
[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test [ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test
[ ?{ t f t f t f } ] [ { 0 2 4 } 6 <bit-array> [ set-indices ] keep ] unit-test
: empty-bloom-filter ( -- bloom-filter ) : empty-bloom-filter ( -- bloom-filter )
0.01 2000 <bloom-filter> ; 0.01 2000 <bloom-filter> ;

View File

@ -1,8 +1,8 @@
! Copyright (C) 2009 Alec Berryman. ! Copyright (C) 2009 Alec Berryman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays bit-arrays fry kernel layouts locals USING: accessors arrays bit-arrays fry kernel kernel.private
math math.functions math.order math.private multiline sequences layouts locals math math.functions math.order math.private
sequences.private typed ; multiline sequences sequences.private typed ;
FROM: math.ranges => [1,b] ; FROM: math.ranges => [1,b] ;
IN: bloom-filters IN: bloom-filters
@ -110,38 +110,37 @@ PRIVATE>
! Dillinger and Panagiotis Manolios, section 5.2, "Enhanced ! Dillinger and Panagiotis Manolios, section 5.2, "Enhanced
! Double Hashing": ! Double Hashing":
! http://www.cc.gatech.edu/~manolios/research/bloom-filters-verification.html ! http://www.cc.gatech.edu/~manolios/research/bloom-filters-verification.html
TYPED:: enhanced-double-hash ( index: fixnum hash0: fixnum hash1: fixnum -- hash ) : enhanced-double-hash ( index hash0 hash1 -- hash )
hash0 index fixnum*fast hash1 fixnum+fast { fixnum fixnum fixnum } declare
index 3 ^ index - 6 /i + abs ; [ [ [ 3 ^ ] [ - ] bi 6 /i ] keep ]
[ fixnum*fast ] [ fixnum+fast ] tri* + abs ;
: enhanced-double-hashes ( hash0 hash1 n -- seq ) : enhanced-double-hashes ( hash0 hash1 length -- quot: ( elt -- n ) )
-rot '[ _ _ enhanced-double-hash ] { } map-integers ; '[ _ _ enhanced-double-hash _ mod ] ; inline
! Make sure it's a fixnum here to speed up double-hashing. ! Make sure it's a fixnum here to speed up double-hashing.
: hashcodes-from-object ( obj -- n n ) : hashcodes-from-object ( object -- n n )
hashcode >fixnum dup most-positive-fixnum bitxor >fixnum ; hashcode >fixnum dup most-positive-fixnum bitxor >fixnum ;
TYPED: set-indices ( indices: array bit-array: bit-array -- ) : increment-n-objects ( bloom-filter -- )
[ t ] 2dip [ set-nth-unsafe ] curry with each ; inline
TYPED: increment-n-objects ( bloom-filter: bloom-filter -- )
[ 1 + ] change-current-n-objects drop ; inline [ 1 + ] change-current-n-objects drop ; inline
TYPED: n-hashes-and-length ( bloom-filter: bloom-filter -- n-hashes length ) : n-hashes-and-length ( bloom-filter -- n-hashes length )
[ n-hashes>> ] [ bits>> length ] bi ; [ n-hashes>> ] [ bits>> length ] bi ; inline
TYPED: relevant-indices ( value bloom-filter: bloom-filter -- indices ) : relevant-indices ( object bloom-filter -- n quot: ( elt -- n ) )
[ hashcodes-from-object ] [ n-hashes-and-length ] bi* [ hashcodes-from-object ] [ n-hashes-and-length ] bi*
[ enhanced-double-hashes ] dip '[ _ mod ] map ; [ -rot ] dip enhanced-double-hashes ; inline
PRIVATE> PRIVATE>
: bloom-filter-insert ( object bloom-filter -- ) TYPED: bloom-filter-insert ( object bloom-filter: bloom-filter -- )
[ increment-n-objects ] [ increment-n-objects ]
[ relevant-indices ] [ relevant-indices ]
[ bits>> set-indices ] [ bits>> [ [ t ] 2dip set-nth-unsafe ] curry ]
tri ; tri compose each-integer ;
: bloom-filter-member? ( object bloom-filter -- ? ) TYPED: bloom-filter-member? ( object bloom-filter: bloom-filter -- ? )
[ relevant-indices ] [ bits>> ] bi [ relevant-indices ]
[ nth-unsafe ] curry all? ; [ bits>> [ nth-unsafe ] curry ]
bi compose all-integers? ;