math.cardinality: adding a cardinality estimator.
parent
ec4aaee505
commit
7e9f3d665b
|
@ -0,0 +1 @@
|
|||
John Benediktsson
|
|
@ -0,0 +1,12 @@
|
|||
! Copyright (C) 2012 John Benediktsson
|
||||
! See http://factorcode.org/license.txt for BSD license
|
||||
USING: help.markup help.syntax layouts math sequences ;
|
||||
IN: math.cardinality
|
||||
|
||||
HELP: trailing-zeros
|
||||
{ $values { "m" number } { "n" number } }
|
||||
{ $description "Counts the number of trailing 0 bits in " { $snippet "m" } ", returning " { $link fixnum-bits } " if the number is zero." } ;
|
||||
|
||||
HELP: estimate-cardinality
|
||||
{ $values { "seq" sequence } { "k" number } { "n" number } }
|
||||
{ $description "Estimates the number of unique elements in " { $snippet "seq" } "." $nl "The number " { $snippet "k" } " controls how many bits of hash to use, creating " { $snippet "2^k" } " buckets." } ;
|
|
@ -0,0 +1,25 @@
|
|||
! Copyright (C) 2012 John Benediktsson
|
||||
! See http://factorcode.org/license.txt for BSD license
|
||||
USING: arrays kernel layouts locals math math.functions
|
||||
math.order math.statistics sequences ;
|
||||
IN: math.cardinality
|
||||
|
||||
GENERIC: trailing-zeros ( m -- n )
|
||||
|
||||
M: fixnum trailing-zeros
|
||||
[ fixnum-bits ] [
|
||||
0 [ over even? ] [ [ 2/ ] [ 1 + ] bi* ] while nip
|
||||
] if-zero ;
|
||||
|
||||
:: estimate-cardinality ( seq k -- n )
|
||||
k 2^ :> num_buckets
|
||||
num_buckets 0 <array> :> max_zeros
|
||||
seq [
|
||||
hashcode >fixnum :> h
|
||||
h num_buckets 1 - bitand :> bucket
|
||||
h k neg shift :> bucket_hash
|
||||
bucket max_zeros [
|
||||
bucket_hash trailing-zeros max
|
||||
] change-nth
|
||||
] each
|
||||
max_zeros [ mean 2 swap ^ ] [ length * ] bi 0.79402 * ;
|
Loading…
Reference in New Issue