checksums.metrohash: adding a first version of MetroHash algorithm.
parent
b2de82042c
commit
7a23242460
|
@ -0,0 +1 @@
|
|||
John Benediktsson
|
|
@ -0,0 +1,16 @@
|
|||
USING: help.markup help.syntax ;
|
||||
IN: checksums.metrohash
|
||||
|
||||
HELP: metrohash-64
|
||||
{ $class-description "MetroHash 64-bit checksum algorithm." } ;
|
||||
|
||||
HELP: metrohash-128
|
||||
{ $class-description "MetroHash 128-bit checksum algorithm." } ;
|
||||
|
||||
ARTICLE: "checksums.metrohash" "MetroHash checksum"
|
||||
"MetroHash is a set of non-cryptographic hash functions."
|
||||
{ $subsections
|
||||
metrohash-64
|
||||
metrohash-128 } ;
|
||||
|
||||
ABOUT: "checksums.metrohash"
|
|
@ -0,0 +1,86 @@
|
|||
|
||||
USING: checksums checksums.metrohash tools.test ;
|
||||
|
||||
{ 17099979927131455419 } [
|
||||
"abc" T{ metrohash-64 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 5688461416820429545 } [
|
||||
"abc" T{ metrohash-64 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 1767508563557181619 } [
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
T{ metrohash-64 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 2460573209396975646 } [
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
T{ metrohash-64 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 878430475465696418 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
T{ metrohash-64 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 14883773106412686490 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
T{ metrohash-64 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 14883773106412686490 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
>byte-array T{ metrohash-64 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 14883773106412686490 } [
|
||||
T{ metrohash-64 { seed 1234 } } [
|
||||
"this is a really " add-checksum-bytes
|
||||
"long sentence that " add-checksum-bytes
|
||||
"needs to be hashed" add-checksum-bytes
|
||||
get-checksum
|
||||
] with-checksum-state
|
||||
] unit-test
|
||||
|
||||
{ 182995299641628952910564950850867298725 } [
|
||||
"abc" T{ metrohash-128 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 61180998041120637609836805276498424729 } [
|
||||
"abc" T{ metrohash-128 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 34499071879213198976518413085708640177 } [
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
T{ metrohash-128 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 179174851912813597938406577526685531497 } [
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
T{ metrohash-128 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 212255213697664751676685499681764114896 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
T{ metrohash-128 { seed 0 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 182531630340317658385091745884975528732 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
T{ metrohash-128 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 182531630340317658385091745884975528732 } [
|
||||
"this is a really long sentence that needs to be hashed"
|
||||
>byte-array T{ metrohash-128 { seed 1234 } } checksum-bytes
|
||||
] unit-test
|
||||
|
||||
{ 182531630340317658385091745884975528732 } [
|
||||
T{ metrohash-128 { seed 1234 } } [
|
||||
"this is a really " add-checksum-bytes
|
||||
"long sentence that " add-checksum-bytes
|
||||
"needs to be hashed" add-checksum-bytes
|
||||
get-checksum
|
||||
] with-checksum-state
|
||||
] unit-test
|
|
@ -0,0 +1,175 @@
|
|||
! Copyright (C) 2018 John Benediktsson.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types alien.data byte-arrays checksums
|
||||
combinators grouping io.binary kernel locals math math.bitwise
|
||||
sequences specialized-arrays ;
|
||||
SPECIALIZED-ARRAY: uint64_t
|
||||
SPECIALIZED-ARRAY: uint32_t
|
||||
SPECIALIZED-ARRAY: uint16_t
|
||||
SPECIALIZED-ARRAY: uint8_t
|
||||
|
||||
IN: checksums.metrohash
|
||||
|
||||
TUPLE: metrohash-64 seed ;
|
||||
|
||||
C: <metrohash-64> metrohash-64
|
||||
|
||||
<PRIVATE
|
||||
|
||||
:: native-mapper ( from to bytes c-type -- seq )
|
||||
from to bytes <slice>
|
||||
bytes byte-array? little-endian? and
|
||||
[ c-type cast-array ]
|
||||
[ c-type heap-size <groups> [ le> ] map ] if ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
M:: metrohash-64 checksum-bytes ( bytes checksum -- value )
|
||||
0xD6D018F5 :> k0
|
||||
0xA2AA033B :> k1
|
||||
0x62992FC1 :> k2
|
||||
0x30BC5B29 :> k3
|
||||
|
||||
checksum seed>> :> seed
|
||||
bytes length :> len
|
||||
|
||||
len dup 32 mod - :> len/32
|
||||
len dup 16 mod - :> len/16
|
||||
len dup 8 mod - :> len/8
|
||||
len dup 4 mod - :> len/4
|
||||
len dup 2 mod - :> len/2
|
||||
|
||||
seed k2 W+ k0 W* :> h
|
||||
|
||||
h h h h :> ( v0! v1! v2! v3! )
|
||||
|
||||
len 32 >= [
|
||||
0 len/32 bytes uint64_t native-mapper 4 <groups> [
|
||||
first4 {
|
||||
[ k0 W* v0 W+ -29 bitroll-64 v2 W+ v0! ]
|
||||
[ k1 W* v1 W+ -29 bitroll-64 v3 W+ v1! ]
|
||||
[ k2 W* v2 W+ -29 bitroll-64 v0 W+ v2! ]
|
||||
[ k3 W* v3 W+ -29 bitroll-64 v1 W+ v3! ]
|
||||
} spread
|
||||
] each
|
||||
|
||||
v0 v3 W+ k0 W* v1 W+ -37 bitroll-64 k1 W* v2 bitxor v2!
|
||||
v1 v2 W+ k1 W* v0 W+ -37 bitroll-64 k0 W* v3 bitxor v3!
|
||||
v0 v2 W+ k0 W* v3 W+ -37 bitroll-64 k1 W* v0 bitxor v0!
|
||||
v1 v3 W+ k1 W* v2 W+ -37 bitroll-64 k0 W* v1 bitxor v1!
|
||||
|
||||
v0 v1 bitxor h W+ v0!
|
||||
] when
|
||||
|
||||
len/32 len/16 bytes uint64_t native-mapper [
|
||||
first2
|
||||
[ k2 W* v0 W+ -29 bitroll-64 k3 W* v1! ]
|
||||
[ k2 W* v0 W+ -29 bitroll-64 k3 W* v2! ] bi*
|
||||
v1 k0 W* -21 bitroll-64 v2 W+ v1 bitxor v1!
|
||||
v2 k3 W* -21 bitroll-64 v1 W+ v2 bitxor v2!
|
||||
v2 v0 W+ v0!
|
||||
] unless-empty
|
||||
|
||||
len/16 len/8 bytes uint64_t native-mapper [
|
||||
first k3 W* v0 W+ v0!
|
||||
v0 -55 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
len/8 len/4 bytes uint32_t native-mapper [
|
||||
first k3 W* v0 W+ v0!
|
||||
v0 -26 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
len/4 len/2 bytes uint16_t native-mapper [
|
||||
first k3 W* v0 W+ v0!
|
||||
v0 -48 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
bytes len/2 tail-slice [
|
||||
first k3 W* v0 W+ v0!
|
||||
v0 -37 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
v0 -28 bitroll-64 v0 bitxor v0!
|
||||
v0 k0 W* v0!
|
||||
v0 -29 bitroll-64 v0 bitxor v0!
|
||||
v0 ;
|
||||
|
||||
INSTANCE: metrohash-64 checksum
|
||||
|
||||
TUPLE: metrohash-128 seed ;
|
||||
|
||||
C: <metrohash-128> metrohash-128
|
||||
|
||||
M:: metrohash-128 checksum-bytes ( bytes checksum -- value )
|
||||
0xC83A91E1 :> k0
|
||||
0x8648DBDB :> k1
|
||||
0x7BDEC03B :> k2
|
||||
0x2F5870A5 :> k3
|
||||
|
||||
checksum seed>> :> seed
|
||||
bytes length :> len
|
||||
|
||||
len dup 32 mod - :> len/32
|
||||
len dup 16 mod - :> len/16
|
||||
len dup 8 mod - :> len/8
|
||||
len dup 4 mod - :> len/4
|
||||
len dup 2 mod - :> len/2
|
||||
|
||||
seed k0 W- k3 W* :> v0!
|
||||
seed k1 W+ k2 W* :> v1!
|
||||
seed k0 W+ k2 W* :> v2!
|
||||
seed k1 W- k3 W* :> v3!
|
||||
|
||||
len 32 >= [
|
||||
0 len/32 bytes uint64_t native-mapper 4 <groups> [
|
||||
first4 {
|
||||
[ k0 W* v0 W+ -29 bitroll-64 v2 W+ v0! ]
|
||||
[ k1 W* v1 W+ -29 bitroll-64 v3 W+ v1! ]
|
||||
[ k2 W* v2 W+ -29 bitroll-64 v0 W+ v2! ]
|
||||
[ k3 W* v3 W+ -29 bitroll-64 v1 W+ v3! ]
|
||||
} spread
|
||||
] each
|
||||
|
||||
v0 v3 W+ k0 W* v1 W+ -21 bitroll-64 k1 W* v2 bitxor v2!
|
||||
v1 v2 W+ k1 W* v0 W+ -21 bitroll-64 k0 W* v3 bitxor v3!
|
||||
v0 v2 W+ k0 W* v3 W+ -21 bitroll-64 k1 W* v0 bitxor v0!
|
||||
v1 v3 W+ k1 W* v2 W+ -21 bitroll-64 k0 W* v1 bitxor v1!
|
||||
] when
|
||||
|
||||
len/32 len/16 bytes uint64_t native-mapper [
|
||||
first2
|
||||
[ k2 W* v0 W+ -33 bitroll-64 k3 W* v0! ]
|
||||
[ k2 W* v1 W+ -33 bitroll-64 k3 W* v1! ] bi*
|
||||
v0 k2 W* v1 W+ -45 bitroll-64 k1 W* v0 bitxor v0!
|
||||
v1 k3 W* v0 W+ -45 bitroll-64 k0 W* v1 bitxor v1!
|
||||
] unless-empty
|
||||
|
||||
len/16 len/8 bytes uint64_t native-mapper [
|
||||
first k2 W* v0 W+ -33 bitroll-64 k3 W* v0!
|
||||
v0 k2 W* v1 W+ -27 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
len/8 len/4 bytes uint32_t native-mapper [
|
||||
first k2 W* v1 W+ -33 bitroll-64 k3 W* v1!
|
||||
v1 k3 W* v0 W+ -46 bitroll-64 k0 W* v1 bitxor v1!
|
||||
] unless-empty
|
||||
|
||||
len/4 len/2 bytes uint16_t native-mapper [
|
||||
first k2 W* v0 W+ -33 bitroll-64 k3 W* v0!
|
||||
v0 k2 W* v1 W+ -22 bitroll-64 k1 W* v0 bitxor v0!
|
||||
] unless-empty
|
||||
|
||||
bytes len/2 tail-slice [
|
||||
first k2 W* v1 W+ -33 bitroll-64 k3 W* v1!
|
||||
v1 k3 W* v0 W+ -58 bitroll-64 k0 W* v1 bitxor v1!
|
||||
] unless-empty
|
||||
|
||||
v0 k0 W* v1 W+ -13 bitroll-64 v0 W+ v0!
|
||||
v1 k1 W* v0 W+ -37 bitroll-64 v1 W+ v1!
|
||||
v0 k2 W* v1 W+ -13 bitroll-64 v0 W+ v0!
|
||||
v1 k3 W* v0 W+ -37 bitroll-64 v1 W+ v1!
|
||||
|
||||
v0 64 shift v1 + ;
|
||||
|
||||
INSTANCE: metrohash-128 checksum
|
|
@ -0,0 +1 @@
|
|||
MetroHash checksum algorithm
|
Loading…
Reference in New Issue