diff --git a/extra/cuckoo-filters/authors.txt b/extra/cuckoo-filters/authors.txt new file mode 100644 index 0000000000..e091bb8164 --- /dev/null +++ b/extra/cuckoo-filters/authors.txt @@ -0,0 +1 @@ +John Benediktsson diff --git a/extra/cuckoo-filters/cuckoo-filters-tests.factor b/extra/cuckoo-filters/cuckoo-filters-tests.factor new file mode 100644 index 0000000000..3255d2da40 --- /dev/null +++ b/extra/cuckoo-filters/cuckoo-filters-tests.factor @@ -0,0 +1,28 @@ +USING: accessors combinators combinators.short-circuit +cuckoo-filters kernel math.parser sequences tools.test ; + +{ t 1 t t f 0 } [ + "factor" 100 { + [ cuckoo-insert ] + [ nip size>> ] + [ cuckoo-lookup ] + [ cuckoo-delete ] + [ cuckoo-lookup ] + [ nip size>> ] + } 2cleave +] unit-test + +{ 250,000 0 } [ + 250,000 + 250,000 [ number>string ] { } map-integers + [ + [ + { + [ over cuckoo-lookup not ] + [ over cuckoo-insert ] + } 1&& + ] count swap + ] + [ [ over cuckoo-delete drop ] each ] bi + size>> +] unit-test diff --git a/extra/cuckoo-filters/cuckoo-filters.factor b/extra/cuckoo-filters/cuckoo-filters.factor new file mode 100644 index 0000000000..8c3db68220 --- /dev/null +++ b/extra/cuckoo-filters/cuckoo-filters.factor @@ -0,0 +1,96 @@ +! Copyright (C) 2016 John Benediktsson +! See http://factorcode.org/license.txt for BSD license + +USING: accessors arrays checksums checksums.sha +combinators.short-circuit io.binary kernel locals math +math.bitwise random sequences ; + +IN: cuckoo-filters + + [ 2 * ] when ; + +: ( capacity -- buckets ) + #buckets [ bucket-size f ] replicate ; + +: tag-index ( hash -- tag index ) + 4 cut 4 head [ be> ] bi@ ; + +: alt-index ( tag index -- altindex ) + [ 0x5bd1e995 w* ] [ bitxor ] bi* ; + +: tag-indices ( bytes cuckoo-filter -- tag i1 i2 ) + checksum>> checksum-bytes tag-index 2dup alt-index ; + +: bucket-lookup ( fingerprint bucket -- ? ) + member? ; + +: bucket-insert ( fingerprint bucket -- ? ) + dup [ not ] find drop [ swap set-nth t ] [ 2drop f ] if* ; + +: bucket-delete ( fingerprint bucket -- ? ) + [ f ] 2dip [ index ] keep over [ set-nth t ] [ 3drop f ] if ; + +: bucket-swap ( fingerprint bucket -- fingerprint' ) + [ length random ] keep [ swap ] change-nth ; + +PRIVATE> + +TUPLE: cuckoo-filter buckets checksum size ; + +: ( capacity -- cuckoo-filter ) + sha1 0 cuckoo-filter boa ; + +:: cuckoo-insert ( obj cuckoo-filter -- ? ) + obj cuckoo-filter tag-indices :> ( tag! i1 i2 ) + cuckoo-filter buckets>> :> buckets + buckets length :> cuckoo-size + { + [ tag i1 cuckoo-size mod buckets nth bucket-insert ] + [ tag i2 cuckoo-size mod buckets nth bucket-insert ] + } 0|| [ + cuckoo-filter [ 1 + ] change-size drop t + ] [ + cuckoo-filter checksum>> :> checksum + { i1 i2 } random :> i! + max-cuckoo-count [ + drop + tag i cuckoo-size mod buckets nth bucket-swap tag! + tag i alt-index i! + + tag i cuckoo-size mod buckets nth bucket-insert + dup [ cuckoo-filter [ 1 + ] change-size drop ] when + ] find-integer >boolean + ] if ; + +:: cuckoo-lookup ( obj cuckoo-filter -- ? ) + obj cuckoo-filter tag-indices :> ( tag i1 i2 ) + cuckoo-filter buckets>> :> buckets + buckets length :> cuckoo-size + { + [ tag i1 cuckoo-size mod buckets nth bucket-lookup ] + [ tag i2 cuckoo-size mod buckets nth bucket-lookup ] + } 0|| ; + +:: cuckoo-delete ( obj cuckoo-filter -- ? ) + obj cuckoo-filter tag-indices :> ( tag i1 i2 ) + cuckoo-filter buckets>> :> buckets + buckets length :> cuckoo-size + { + [ tag i1 cuckoo-size mod buckets nth bucket-delete ] + [ tag i2 cuckoo-size mod buckets nth bucket-delete ] + } 0|| + dup [ cuckoo-filter [ 1 - ] change-size drop ] when ; diff --git a/extra/cuckoo-filters/summary.txt b/extra/cuckoo-filters/summary.txt new file mode 100644 index 0000000000..078b781f7a --- /dev/null +++ b/extra/cuckoo-filters/summary.txt @@ -0,0 +1 @@ +Cuckoo filters