ulid: new vocab

This corresponds to commit 1bd3b5681bf1ba7155a42e75a937ec4b2520a202 of the
original repository at https://github.com/AlexIljin/ulid.
freebsd-work
Alexander Iljin 2019-01-01 21:05:02 +01:00 committed by Doug Coleman
parent 4ba2ac3e78
commit bb1e6943e6
5 changed files with 218 additions and 0 deletions

1
extra/ulid/authors.txt Normal file
View File

@ -0,0 +1 @@
Alexander Ilin

1
extra/ulid/summary.txt Normal file
View File

@ -0,0 +1 @@
Universally Unique Lexicographically Sortable Identifier

View File

@ -0,0 +1,79 @@
! Copyright (C) 2019 Alexander Ilin.
! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays help.markup help.syntax kernel math strings
ulid.private ;
IN: ulid
ABOUT: "ulid"
ARTICLE: "ulid" "Universally Unique Lexicographically Sortable Identifier"
"The " { $vocab-link "ulid" } " vocab implements the Universally Unique Lexicographically Sortable Identifier gereration according to the specification: " { $url "https://github.com/ulid/spec" } ". The main word to call is:"
{ $subsections ulid }
"Binary convertion interface:"
{ $subsections ulid>bytes bytes>ulid }
"Helpers:"
{ $subsections normalize-ulid }
;
HELP: bytes>ulid
{ $values
{ "byte-array" byte-array }
{ "ulid" string }
}
{ $description "Convert a binary ULID to its string representation using the Crockford's base32 " { $link encoding } ". The " { $snippet "byte-array" } " must be exactly 16 bytes long, the resulting " { $snippet "ulid" } " string is always 26 characters long."
$nl
"The following errors may be thrown during the conversion:"
{ $subsections bytes>ulid-bad-length } } ;
HELP: bytes>ulid-bad-length
{ $values
{ "n" number }
}
{ $description "Throws a " { $link bytes>ulid-bad-length } " error." }
{ $error-description "This error is thrown if the input array for the " { $link bytes>ulid } " conversion has length " { $snippet "n" } " instead of 16." } ;
HELP: normalize-ulid
{ $values
{ "str" string }
{ "str'" string }
}
{ $description "Convert the " { $snippet "str" } " to upper-case and substitute some ambiguous characters according to the Crockford's convention: \"L\" and \"I\" -> \"1\", \"O\" -> \"0\". This may be useful to run on a user-provided string to make sure it was typed in correctly. Subsequent " { $link ulid>bytes } " conversion could be used to make sure the decoded contents constitute a valid ULID." } ;
HELP: ulid
{ $values
{ "ulid" string }
}
{ $description "Generate a new 128-bit ULID using and return its string representation in the Crockford's base32 " { $link encoding } ". The current system time is encoded in the high 48 bits as the Unix time in milliseconds, the low 80 bits are random."
$nl
"At the time of this writing the Factor implementation is not able to produce multiple ULIDs within less than one millisecond of each other, but a provision is made to make sure that if that ever happens in the future, the subsequent ULIDs inside of a millisecond will be an increment of the previous ones to guarentee the sorting order of the identifiers, as per the specification."
$nl
"In case an overflow happens during such incrementing, the " { $link ulid-overflow } " error will be thrown." } ;
HELP: ulid-overflow
{ $description "Throws an " { $link ulid-overflow } " error." }
{ $error-description "This error is thrown if by chance the 80-bit random number generated by the " { $link ulid } " word matches " { $link 80-bits } ", and a new ULID is requested " { $strong "within the same millisecond." } " In this case the specification requires an error to be thrown, because it was not possible to produce two ULIDs, while guarenteeing their sorting order. The best course of action is to retry ULID generation when the next millisecond is on the system clock." } ;
HELP: ulid>bytes
{ $values
{ "ulid" string }
{ "byte-array" byte-array }
}
{ $description "Convert a string " { $snippet "ulid" } " into its binary representation. Some errors may be thrown in the process:" { $subsections ulid>bytes-bad-length ulid>bytes-bad-character ulid>bytes-overflow } } ;
HELP: ulid>bytes-bad-character
{ $values
{ "ch" "a character" }
}
{ $description "Throws a " { $link ulid>bytes-bad-character } " error." }
{ $error-description "This error is thrown if during ULID to byte-array conversion a character " { $snippet "ch" } " is encountered in the input string, which is not part of the supported " { $link encoding } ". Try using " { $link normalize-ulid } " before the conversion." } ;
HELP: ulid>bytes-bad-length
{ $values
{ "n" number }
}
{ $description "Throws a " { $link ulid>bytes-bad-length } " error." }
{ $error-description "This error is thrown if the input string has length " { $snippet "n" } " instead of 26." } ;
HELP: ulid>bytes-overflow
{ $description "Throws a " { $link ulid>bytes-overflow } " error." }
{ $error-description "This error is thrown if the first character of the ULID string is greater than \"7\" in the " { $link encoding } ". This can only mean that the input string is not a valid ULID according to the specification." } ;

View File

@ -0,0 +1,51 @@
! Copyright (C) 2019 Alexander Ilin.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors io.binary kernel math namespaces sequences
strings tools.test ulid ulid.private ;
IN: ulid.tests
{ "0123456789ABCDEFGH1JK1MN0PQRSTUVWXYZ" }
[ "0123456789abcdefghijklmnopqrstuvwxyz" normalize-ulid ] unit-test
{ "ABCDEFGH1JK1MN0PQRSTUVWXYZ" }
[ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" normalize-ulid ] unit-test
[ "aoeu" ulid>bytes ] [
[ ulid>bytes-bad-length? ] keep n>> 4 = and
] must-fail-with
[ "aBCDEFGH1JK1MN0PQRSTUVWXYZ" ulid>bytes ] [
[ ulid>bytes-bad-character? ] keep ch>> CHAR: a = and
] must-fail-with
[ "ABCDEFGH1JK1MN0PQRSTUVWXYZ" ulid>bytes ] [
[ ulid>bytes-bad-character? ] keep ch>> CHAR: U = and
] must-fail-with
[ "ABCDEFGH1JK1MN0PQRST0VWXYZ" ulid>bytes ]
[ ulid>bytes-overflow? ] must-fail-with
{ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 223 } }
[ "7BCDEFGH1JK1MN0PQRST0VWXYZ" ulid>bytes ] unit-test
{ "7BCDEFGH1JK1MN0PQRST0VWXYZ" }
[ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 223 } bytes>ulid ] unit-test
[ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 } bytes>ulid ] [
[ bytes>ulid-bad-length? ] keep n>> 15 = and
] must-fail-with
{ t } [ ulid string? ] unit-test
{ 26 } [ ulid length ] unit-test
{ f } [ ulid ulid = ] unit-test
: ulid-less-than-80-bits ( -- ulid )
ulid last-random-bits get 80-bits >=
[ drop ulid-less-than-80-bits ] when ;
{ t } [
ulid-less-than-80-bits t (ulid) [ ulid>bytes be> ] bi@ 1 - =
] unit-test
[ 80-bits \ last-random-bits set t (ulid) ]
[ ulid-overflow? ] must-fail-with

86
extra/ulid/ulid.factor Normal file
View File

@ -0,0 +1,86 @@
! Copyright (C) 2018, 2019 Alexander Ilin.
! See http://factorcode.org/license.txt for BSD license.
USING: ascii binary-search calendar io.binary kernel make math
math.bitwise math.order namespaces random sequences splitting
summary system tr ;
IN: ulid
ERROR: ulid-overflow ;
M: ulid-overflow summary drop "Too many ULIDs generated per msec" ;
<PRIVATE
CONSTANT: encoding "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
CONSTANT: base 32
CONSTANT: 80-bits 0xFFFFFFFFFFFFFFFFFFFF
SYMBOL: last-time-string
SYMBOL: last-random-bits
: encode-bits ( n chars -- string )
[ base /mod encoding nth ] "" replicate-as nip reverse! ;
: encode-random-bits ( n -- string )
16 encode-bits ;
: encode-time ( timestamp -- string )
timestamp>millis 10 encode-bits ;
: same-msec? ( -- ? )
nano-count 1000 /i dup \ same-msec? get =
[ drop t ] [ \ same-msec? set f ] if ;
: pack-bits ( seq -- seq' )
5 swap [ first ] [ rest ] bi [
[ ! can-take-bits overflow-byte elt
pick 5 >= [
swap 5 shift bitor swap 5 - [ , 0 8 ] when-zero swap
] [
3dup rot [ shift ] [ 5 - shift ] bi-curry bi* bitor ,
nip 5 rot - [ bits 8 ] keep - swap
] if
] each 2drop
] B{ } make ;
TR: (normalize-ulid) "ILO" "110" ; inline
: (ulid) ( same-msec? -- ulid )
[
last-time-string get last-random-bits get 1 +
dup 80-bits > [ ulid-overflow ] when
] [
now encode-time dup last-time-string set
80 random-bits
] if dup last-random-bits set encode-random-bits append ;
PRIVATE>
: ulid ( -- ulid )
same-msec? (ulid) ;
ERROR: ulid>bytes-bad-length n ;
M: ulid>bytes-bad-length summary drop "Invalid ULID length" ;
ERROR: ulid>bytes-bad-character ch ;
M: ulid>bytes-bad-character summary drop "Invalid character in ULID" ;
ERROR: ulid>bytes-overflow ;
M: ulid>bytes-overflow summary drop "Overflow error in ULID" ;
: ulid>bytes ( ulid -- byte-array )
dup length dup 26 = [ drop ] [ ulid>bytes-bad-length ] if
[
dup [ >=< ] curry encoding swap search pick =
[ nip ] [ drop ulid>bytes-bad-character ] if
] B{ } map-as dup first 7 > [ ulid>bytes-overflow ] when pack-bits ;
: normalize-ulid ( str -- str' )
>upper (normalize-ulid) ;
ERROR: bytes>ulid-bad-length n ;
M: bytes>ulid-bad-length summary drop "Invalid ULID byte-array length" ;
: bytes>ulid ( byte-array -- ulid )
dup length dup 16 = [ drop ] [ bytes>ulid-bad-length ] if
be> 26 encode-bits ;