bloom-filters: simplify several functions
parent
640ed967c9
commit
c2482fe2bf
|
@ -29,20 +29,20 @@ IN: bloom-filters.tests
|
||||||
|
|
||||||
! Should not generate bignum hash codes. Enhanced double hashing may generate a
|
! Should not generate bignum hash codes. Enhanced double hashing may generate a
|
||||||
! lot of hash codes, and it's better to do this earlier than later.
|
! lot of hash codes, and it's better to do this earlier than later.
|
||||||
[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ t = ] all? ] unit-test
|
[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test
|
||||||
|
|
||||||
[ ?{ t f t f t f } ] [ { 0 2 4 } 6 <bit-array> [ set-indices ] keep ] unit-test
|
[ ?{ t f t f t f } ] [ { 0 2 4 } 6 <bit-array> [ set-indices ] keep ] unit-test
|
||||||
|
|
||||||
: empty-bloom-filter ( -- bloom-filter )
|
: empty-bloom-filter ( -- bloom-filter )
|
||||||
0.01 2000 <bloom-filter> ;
|
0.01 2000 <bloom-filter> ;
|
||||||
|
|
||||||
[ 1 ] [ empty-bloom-filter [ increment-n-objects ] keep current-n-objects>> ] unit-test
|
[ 1 ] [ empty-bloom-filter increment-n-objects current-n-objects>> ] unit-test
|
||||||
|
|
||||||
: basic-insert-test-setup ( -- bloom-filter )
|
: basic-insert-test-setup ( -- bloom-filter )
|
||||||
1 empty-bloom-filter [ bloom-filter-insert ] keep ;
|
1 empty-bloom-filter [ bloom-filter-insert ] keep ;
|
||||||
|
|
||||||
! Basic tests that insert does something
|
! Basic tests that insert does something
|
||||||
[ t ] [ basic-insert-test-setup bits>> [ t = ] any? ] unit-test
|
[ t ] [ basic-insert-test-setup bits>> [ ] any? ] unit-test
|
||||||
[ 1 ] [ basic-insert-test-setup current-n-objects>> ] unit-test
|
[ 1 ] [ basic-insert-test-setup current-n-objects>> ] unit-test
|
||||||
|
|
||||||
: non-empty-bloom-filter ( -- bloom-filter )
|
: non-empty-bloom-filter ( -- bloom-filter )
|
||||||
|
@ -59,13 +59,13 @@ IN: bloom-filters.tests
|
||||||
[ t ] [ 2000 iota
|
[ t ] [ 2000 iota
|
||||||
full-bloom-filter
|
full-bloom-filter
|
||||||
[ bloom-filter-member? ] curry map
|
[ bloom-filter-member? ] curry map
|
||||||
[ t = ] all? ] unit-test
|
[ ] all? ] unit-test
|
||||||
|
|
||||||
! We shouldn't have more than 0.01 false-positive rate.
|
! We shouldn't have more than 0.01 false-positive rate.
|
||||||
[ t ] [ 1000 iota [ drop most-positive-fixnum random 1000 + ] map
|
[ t ] [ 1000 iota [ drop most-positive-fixnum random 1000 + ] map
|
||||||
full-bloom-filter
|
full-bloom-filter
|
||||||
[ bloom-filter-member? ] curry map
|
[ bloom-filter-member? ] curry map
|
||||||
[ t = ] filter
|
[ ] filter
|
||||||
! TODO: This should be 10, but the false positive rate is currently very
|
! TODO: This should be 10, but the false positive rate is currently very
|
||||||
! high. It shouldn't be much more than this.
|
! high. It shouldn't be much more than this.
|
||||||
length 150 <= ] unit-test
|
length 150 <= ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2009 Alec Berryman.
|
! Copyright (C) 2009 Alec Berryman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs bit-arrays kernel layouts locals math
|
USING: accessors arrays bit-arrays fry kernel layouts locals math math.functions
|
||||||
math.functions math.ranges multiline sequences ;
|
math.ranges multiline sequences ;
|
||||||
IN: bloom-filters
|
IN: bloom-filters
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -70,8 +70,8 @@ TUPLE: bloom-filter
|
||||||
map
|
map
|
||||||
n-hashes-range zip ;
|
n-hashes-range zip ;
|
||||||
|
|
||||||
:: smallest-first ( seq1 seq2 -- seq )
|
: smallest-first ( seq1 seq2 -- seq )
|
||||||
seq1 first seq2 first <= [ seq1 ] [ seq2 ] if ;
|
[ [ first ] bi@ <= ] most ;
|
||||||
|
|
||||||
! The consensus on the tradeoff between increasing the number of bits and
|
! The consensus on the tradeoff between increasing the number of bits and
|
||||||
! increasing the number of hash functions seems to be "go for the smallest
|
! increasing the number of hash functions seems to be "go for the smallest
|
||||||
|
@ -118,9 +118,7 @@ PRIVATE>
|
||||||
array-size mod ;
|
array-size mod ;
|
||||||
|
|
||||||
: enhanced-double-hashes ( n hash0 hash1 array-size -- seq )
|
: enhanced-double-hashes ( n hash0 hash1 array-size -- seq )
|
||||||
[ enhanced-double-hash ] 3curry
|
'[ _ _ _ enhanced-double-hash ] [ [0,b) ] dip map ;
|
||||||
[ [0,b) ] dip
|
|
||||||
map ;
|
|
||||||
|
|
||||||
! Stupid, should pick something good.
|
! Stupid, should pick something good.
|
||||||
: hashcodes-from-hashcode ( n -- n n )
|
: hashcodes-from-hashcode ( n -- n n )
|
||||||
|
@ -138,24 +136,23 @@ PRIVATE>
|
||||||
: set-indices ( indices bit-array -- )
|
: set-indices ( indices bit-array -- )
|
||||||
[ [ drop t ] change-nth ] curry each ;
|
[ [ drop t ] change-nth ] curry each ;
|
||||||
|
|
||||||
: increment-n-objects ( bloom-filter -- )
|
: increment-n-objects ( bloom-filter -- bloom-filter )
|
||||||
dup current-n-objects>> 1 + >>current-n-objects drop ;
|
[ 1 + ] change-current-n-objects ;
|
||||||
|
|
||||||
|
: n-hashes-and-bits ( bloom-filter -- n-hashes n-bits )
|
||||||
|
[ n-hashes>> ] [ bits>> length ] bi ;
|
||||||
|
|
||||||
! This would be better as an each-relevant-hash that didn't cons.
|
|
||||||
: relevant-indices ( value bloom-filter -- indices )
|
: relevant-indices ( value bloom-filter -- indices )
|
||||||
[ n-hashes>> ] [ bits>> length ] bi ! value n array-size
|
n-hashes-and-bits
|
||||||
swapd [ hashcodes-from-object ] dip ! n value1 value2 array-size
|
[ swap hashcodes-from-object ] dip
|
||||||
enhanced-double-hashes ;
|
enhanced-double-hashes ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: bloom-filter-insert ( object bloom-filter -- )
|
: bloom-filter-insert ( object bloom-filter -- )
|
||||||
[ relevant-indices ]
|
increment-n-objects
|
||||||
[ bits>> set-indices ]
|
[ relevant-indices ] [ bits>> set-indices ] bi ;
|
||||||
[ increment-n-objects ]
|
|
||||||
tri ;
|
|
||||||
|
|
||||||
: bloom-filter-member? ( value bloom-filter -- ? )
|
: bloom-filter-member? ( value bloom-filter -- ? )
|
||||||
[ relevant-indices ]
|
[ relevant-indices ] keep
|
||||||
[ bits>> [ nth ] curry map [ t = ] all? ]
|
bits>> nths [ ] all? ;
|
||||||
bi ;
|
|
||||||
|
|
Loading…
Reference in New Issue