bloom-filters: tons of speed.
							parent
							
								
									1f45b8a4e3
								
							
						
					
					
						commit
						1cd3be1fb2
					
				| 
						 | 
					@ -38,8 +38,6 @@ IN: bloom-filters.tests
 | 
				
			||||||
! lot of hash codes, and it's better to do this earlier than later.
 | 
					! lot of hash codes, and it's better to do this earlier than later.
 | 
				
			||||||
[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test
 | 
					[ t ] [ 10000 iota [ hashcodes-from-object [ fixnum? ] both? ] map [ ] all? ] unit-test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[ ?{ t f t f t f } ] [ { 0 2 4 } 6 <bit-array> [ set-indices ] keep ] unit-test
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
: empty-bloom-filter ( -- bloom-filter )
 | 
					: empty-bloom-filter ( -- bloom-filter )
 | 
				
			||||||
    0.01 2000 <bloom-filter> ;
 | 
					    0.01 2000 <bloom-filter> ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,8 +1,8 @@
 | 
				
			||||||
! Copyright (C) 2009 Alec Berryman.
 | 
					! Copyright (C) 2009 Alec Berryman.
 | 
				
			||||||
! See http://factorcode.org/license.txt for BSD license.
 | 
					! See http://factorcode.org/license.txt for BSD license.
 | 
				
			||||||
USING: accessors arrays bit-arrays fry kernel layouts locals
 | 
					USING: accessors arrays bit-arrays fry kernel kernel.private
 | 
				
			||||||
math math.functions math.order math.private multiline sequences
 | 
					layouts locals math math.functions math.order math.private
 | 
				
			||||||
sequences.private typed ;
 | 
					multiline sequences sequences.private typed ;
 | 
				
			||||||
FROM: math.ranges => [1,b] ;
 | 
					FROM: math.ranges => [1,b] ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IN: bloom-filters
 | 
					IN: bloom-filters
 | 
				
			||||||
| 
						 | 
					@ -110,38 +110,37 @@ PRIVATE>
 | 
				
			||||||
! Dillinger and Panagiotis Manolios, section 5.2, "Enhanced
 | 
					! Dillinger and Panagiotis Manolios, section 5.2, "Enhanced
 | 
				
			||||||
! Double Hashing":
 | 
					! Double Hashing":
 | 
				
			||||||
! http://www.cc.gatech.edu/~manolios/research/bloom-filters-verification.html
 | 
					! http://www.cc.gatech.edu/~manolios/research/bloom-filters-verification.html
 | 
				
			||||||
TYPED:: enhanced-double-hash ( index: fixnum hash0: fixnum hash1: fixnum -- hash )
 | 
					: enhanced-double-hash ( index hash0 hash1 -- hash )
 | 
				
			||||||
    hash0 index fixnum*fast hash1 fixnum+fast
 | 
					    { fixnum fixnum fixnum } declare
 | 
				
			||||||
    index 3 ^ index - 6 /i + abs ;
 | 
					    [ [ [ 3 ^ ] [ - ] bi 6 /i ] keep ]
 | 
				
			||||||
 | 
					    [ fixnum*fast ] [ fixnum+fast ] tri* + abs ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: enhanced-double-hashes ( hash0 hash1 n -- seq )
 | 
					: enhanced-double-hashes ( hash0 hash1 length -- quot: ( elt -- n ) )
 | 
				
			||||||
    -rot '[ _ _ enhanced-double-hash ] { } map-integers ;
 | 
					    '[ _ _ enhanced-double-hash _ mod ] ; inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
! Make sure it's a fixnum here to speed up double-hashing.
 | 
					! Make sure it's a fixnum here to speed up double-hashing.
 | 
				
			||||||
: hashcodes-from-object ( obj -- n n )
 | 
					: hashcodes-from-object ( object -- n n )
 | 
				
			||||||
    hashcode >fixnum dup most-positive-fixnum bitxor >fixnum ;
 | 
					    hashcode >fixnum dup most-positive-fixnum bitxor >fixnum ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TYPED: set-indices ( indices: array bit-array: bit-array -- )
 | 
					: increment-n-objects ( bloom-filter -- )
 | 
				
			||||||
    [ t ] 2dip [ set-nth-unsafe ] curry with each ; inline
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
TYPED: increment-n-objects ( bloom-filter: bloom-filter -- )
 | 
					 | 
				
			||||||
    [ 1 + ] change-current-n-objects drop ; inline
 | 
					    [ 1 + ] change-current-n-objects drop ; inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TYPED: n-hashes-and-length ( bloom-filter: bloom-filter -- n-hashes length )
 | 
					: n-hashes-and-length ( bloom-filter -- n-hashes length )
 | 
				
			||||||
    [ n-hashes>> ] [ bits>> length ] bi ;
 | 
					    [ n-hashes>> ] [ bits>> length ] bi ; inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TYPED: relevant-indices ( value bloom-filter: bloom-filter -- indices )
 | 
					: relevant-indices ( object bloom-filter -- n quot: ( elt -- n ) )
 | 
				
			||||||
    [ hashcodes-from-object ] [ n-hashes-and-length ] bi*
 | 
					    [ hashcodes-from-object ] [ n-hashes-and-length ] bi*
 | 
				
			||||||
    [ enhanced-double-hashes ] dip '[ _ mod ] map ;
 | 
					    [ -rot ] dip enhanced-double-hashes ; inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PRIVATE>
 | 
					PRIVATE>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: bloom-filter-insert ( object bloom-filter -- )
 | 
					TYPED: bloom-filter-insert ( object bloom-filter: bloom-filter -- )
 | 
				
			||||||
    [ increment-n-objects ]
 | 
					    [ increment-n-objects ]
 | 
				
			||||||
    [ relevant-indices ]
 | 
					    [ relevant-indices ]
 | 
				
			||||||
    [ bits>> set-indices ]
 | 
					    [ bits>> [ [ t ] 2dip set-nth-unsafe ] curry ]
 | 
				
			||||||
    tri ;
 | 
					    tri compose each-integer ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
: bloom-filter-member? ( object bloom-filter -- ? )
 | 
					TYPED: bloom-filter-member? ( object bloom-filter: bloom-filter -- ? )
 | 
				
			||||||
    [ relevant-indices ] [ bits>> ] bi
 | 
					    [ relevant-indices ]
 | 
				
			||||||
    [ nth-unsafe ] curry all? ;
 | 
					    [ bits>> [ nth-unsafe ] curry ]
 | 
				
			||||||
 | 
					    bi compose all-integers? ;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue