math.cardinality: adding a cardinality estimator.
							parent
							
								
									ec4aaee505
								
							
						
					
					
						commit
						7e9f3d665b
					
				| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					John Benediktsson
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,12 @@
 | 
				
			||||||
 | 
					! Copyright (C) 2012 John Benediktsson
 | 
				
			||||||
 | 
					! See http://factorcode.org/license.txt for BSD license
 | 
				
			||||||
 | 
					USING: help.markup help.syntax layouts math sequences ;
 | 
				
			||||||
 | 
					IN: math.cardinality
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					HELP: trailing-zeros
 | 
				
			||||||
 | 
					{ $values { "m" number } { "n" number } }
 | 
				
			||||||
 | 
					{ $description "Counts the number of trailing 0 bits in " { $snippet "m" } ", returning " { $link fixnum-bits } " if the number is zero." } ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					HELP: estimate-cardinality
 | 
				
			||||||
 | 
					{ $values { "seq" sequence } { "k" number } { "n" number } }
 | 
				
			||||||
 | 
					{ $description "Estimates the number of unique elements in " { $snippet "seq" } "." $nl "The number " { $snippet "k" } " controls how many bits of hash to use, creating " { $snippet "2^k" } " buckets." } ;
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,25 @@
 | 
				
			||||||
 | 
					! Copyright (C) 2012 John Benediktsson
 | 
				
			||||||
 | 
					! See http://factorcode.org/license.txt for BSD license
 | 
				
			||||||
 | 
					USING: arrays kernel layouts locals math math.functions
 | 
				
			||||||
 | 
					math.order math.statistics sequences ;
 | 
				
			||||||
 | 
					IN: math.cardinality
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GENERIC: trailing-zeros ( m -- n )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					M: fixnum trailing-zeros
 | 
				
			||||||
 | 
					    [ fixnum-bits ] [
 | 
				
			||||||
 | 
					        0 [ over even? ] [ [ 2/ ] [ 1 + ] bi* ] while nip
 | 
				
			||||||
 | 
					    ] if-zero ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					:: estimate-cardinality ( seq k -- n )
 | 
				
			||||||
 | 
					    k 2^                         :> num_buckets
 | 
				
			||||||
 | 
					    num_buckets 0 <array>        :> max_zeros
 | 
				
			||||||
 | 
					    seq [
 | 
				
			||||||
 | 
					        hashcode >fixnum         :> h
 | 
				
			||||||
 | 
					        h num_buckets 1 - bitand :> bucket
 | 
				
			||||||
 | 
					        h k neg shift            :> bucket_hash
 | 
				
			||||||
 | 
					        bucket max_zeros [
 | 
				
			||||||
 | 
					            bucket_hash trailing-zeros max
 | 
				
			||||||
 | 
					        ] change-nth
 | 
				
			||||||
 | 
					    ] each
 | 
				
			||||||
 | 
					    max_zeros [ mean 2 swap ^ ] [ length * ] bi 0.79402 * ;
 | 
				
			||||||
		Loading…
	
		Reference in New Issue