diff --git a/extra/bloom-filters/bloom-filters-tests.factor b/extra/bloom-filters/bloom-filters-tests.factor index b7a5d7ebc2..40fd1469b2 100644 --- a/extra/bloom-filters/bloom-filters-tests.factor +++ b/extra/bloom-filters/bloom-filters-tests.factor @@ -29,20 +29,20 @@ IN: bloom-filters.tests ! Should not generate bignum hash codes. Enhanced double hashing may generate a ! lot of hash codes, and it's better to do this earlier than later. -[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ t = ] all? ] unit-test +[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test [ ?{ t f t f t f } ] [ { 0 2 4 } 6 [ set-indices ] keep ] unit-test : empty-bloom-filter ( -- bloom-filter ) 0.01 2000 ; -[ 1 ] [ empty-bloom-filter [ increment-n-objects ] keep current-n-objects>> ] unit-test +[ 1 ] [ empty-bloom-filter increment-n-objects current-n-objects>> ] unit-test : basic-insert-test-setup ( -- bloom-filter ) 1 empty-bloom-filter [ bloom-filter-insert ] keep ; ! Basic tests that insert does something -[ t ] [ basic-insert-test-setup bits>> [ t = ] any? ] unit-test +[ t ] [ basic-insert-test-setup bits>> [ ] any? ] unit-test [ 1 ] [ basic-insert-test-setup current-n-objects>> ] unit-test : non-empty-bloom-filter ( -- bloom-filter ) @@ -59,13 +59,13 @@ IN: bloom-filters.tests [ t ] [ 2000 iota full-bloom-filter [ bloom-filter-member? ] curry map - [ t = ] all? ] unit-test + [ ] all? ] unit-test ! We shouldn't have more than 0.01 false-positive rate. [ t ] [ 1000 iota [ drop most-positive-fixnum random 1000 + ] map full-bloom-filter [ bloom-filter-member? ] curry map - [ t = ] filter + [ ] filter ! TODO: This should be 10, but the false positive rate is currently very ! high. It shouldn't be much more than this. length 150 <= ] unit-test diff --git a/extra/bloom-filters/bloom-filters.factor b/extra/bloom-filters/bloom-filters.factor index 94d0dd070f..3e0aba175c 100644 --- a/extra/bloom-filters/bloom-filters.factor +++ b/extra/bloom-filters/bloom-filters.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Alec Berryman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays assocs bit-arrays kernel layouts locals math -math.functions math.ranges multiline sequences ; +USING: accessors arrays bit-arrays fry kernel layouts locals math math.functions +math.ranges multiline sequences ; IN: bloom-filters /* @@ -70,8 +70,8 @@ TUPLE: bloom-filter map n-hashes-range zip ; -:: smallest-first ( seq1 seq2 -- seq ) - seq1 first seq2 first <= [ seq1 ] [ seq2 ] if ; +: smallest-first ( seq1 seq2 -- seq ) + [ [ first ] bi@ <= ] most ; ! The consensus on the tradeoff between increasing the number of bits and ! increasing the number of hash functions seems to be "go for the smallest @@ -118,9 +118,7 @@ PRIVATE> array-size mod ; : enhanced-double-hashes ( n hash0 hash1 array-size -- seq ) - [ enhanced-double-hash ] 3curry - [ [0,b) ] dip - map ; + '[ _ _ _ enhanced-double-hash ] [ [0,b) ] dip map ; ! Stupid, should pick something good. : hashcodes-from-hashcode ( n -- n n ) @@ -138,24 +136,23 @@ PRIVATE> : set-indices ( indices bit-array -- ) [ [ drop t ] change-nth ] curry each ; -: increment-n-objects ( bloom-filter -- ) - dup current-n-objects>> 1 + >>current-n-objects drop ; +: increment-n-objects ( bloom-filter -- bloom-filter ) + [ 1 + ] change-current-n-objects ; + +: n-hashes-and-bits ( bloom-filter -- n-hashes n-bits ) + [ n-hashes>> ] [ bits>> length ] bi ; -! This would be better as an each-relevant-hash that didn't cons. : relevant-indices ( value bloom-filter -- indices ) - [ n-hashes>> ] [ bits>> length ] bi ! value n array-size - swapd [ hashcodes-from-object ] dip ! n value1 value2 array-size + n-hashes-and-bits + [ swap hashcodes-from-object ] dip enhanced-double-hashes ; PRIVATE> : bloom-filter-insert ( object bloom-filter -- ) - [ relevant-indices ] - [ bits>> set-indices ] - [ increment-n-objects ] - tri ; + increment-n-objects + [ relevant-indices ] [ bits>> set-indices ] bi ; : bloom-filter-member? ( value bloom-filter -- ? ) - [ relevant-indices ] - [ bits>> [ nth ] curry map [ t = ] all? ] - bi ; + [ relevant-indices ] keep + bits>> nths [ ] all? ;