ulid: new vocab

This corresponds to commit 1bd3b5681bf1ba7155a42e75a937ec4b2520a202 of the original repository at https://github.com/AlexIljin/ulid.
2019-01-01 21:05:02 +01:00 · 2019-01-01 21:05:02 +01:00 · bb1e6943e6
parent 4ba2ac3e78
commit bb1e6943e6
5 changed files with 218 additions and 0 deletions
--- a/extra/ulid/authors.txt
+++ b/extra/ulid/authors.txt
@ -0,0 +1 @@
+Alexander Ilin
--- a/extra/ulid/summary.txt
+++ b/extra/ulid/summary.txt
@ -0,0 +1 @@
+Universally Unique Lexicographically Sortable Identifier
--- a/extra/ulid/ulid-docs.factor
+++ b/extra/ulid/ulid-docs.factor
@ -0,0 +1,79 @@
+! Copyright (C) 2019 Alexander Ilin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: byte-arrays help.markup help.syntax kernel math strings
+ulid.private ;
+IN: ulid
+
+ABOUT: "ulid"
+
+ARTICLE: "ulid" "Universally Unique Lexicographically Sortable Identifier"
+"The " { $vocab-link "ulid" } " vocab implements the Universally Unique Lexicographically Sortable Identifier gereration according to the specification: " { $url "https://github.com/ulid/spec" } ". The main word to call is:"
+{ $subsections ulid }
+"Binary convertion interface:"
+{ $subsections ulid>bytes bytes>ulid }
+"Helpers:"
+{ $subsections normalize-ulid }
+;
+
+HELP: bytes>ulid
+{ $values
+    { "byte-array" byte-array }
+    { "ulid" string }
+}
+{ $description "Convert a binary ULID to its string representation using the Crockford's base32 " { $link encoding } ". The " { $snippet "byte-array" } " must be exactly 16 bytes long, the resulting " { $snippet "ulid" } " string is always 26 characters long."
+$nl
+"The following errors may be thrown during the conversion:"
+{ $subsections bytes>ulid-bad-length } } ;
+
+HELP: bytes>ulid-bad-length
+{ $values
+    { "n" number }
+}
+{ $description "Throws a " { $link bytes>ulid-bad-length } " error." }
+{ $error-description "This error is thrown if the input array for the " { $link bytes>ulid } " conversion has length " { $snippet "n" } " instead of 16." } ;
+
+HELP: normalize-ulid
+{ $values
+    { "str" string }
+    { "str'" string }
+}
+{ $description "Convert the " { $snippet "str" } " to upper-case and substitute some ambiguous characters according to the Crockford's convention: \"L\" and \"I\" -> \"1\", \"O\" -> \"0\". This may be useful to run on a user-provided string to make sure it was typed in correctly. Subsequent " { $link ulid>bytes } " conversion could be used to make sure the decoded contents constitute a valid ULID." } ;
+
+HELP: ulid
+{ $values
+    { "ulid" string }
+}
+{ $description "Generate a new 128-bit ULID using and return its string representation in the Crockford's base32 " { $link encoding } ". The current system time is encoded in the high 48 bits as the Unix time in milliseconds, the low 80 bits are random."
+$nl
+"At the time of this writing the Factor implementation is not able to produce multiple ULIDs within less than one millisecond of each other, but a provision is made to make sure that if that ever happens in the future, the subsequent ULIDs inside of a millisecond will be an increment of the previous ones to guarentee the sorting order of the identifiers, as per the specification."
+$nl
+"In case an overflow happens during such incrementing, the " { $link ulid-overflow } " error will be thrown." } ;
+
+HELP: ulid-overflow
+{ $description "Throws an " { $link ulid-overflow } " error." }
+{ $error-description "This error is thrown if by chance the 80-bit random number generated by the " { $link ulid } " word matches " { $link 80-bits } ", and a new ULID is requested " { $strong "within the same millisecond." } " In this case the specification requires an error to be thrown, because it was not possible to produce two ULIDs, while guarenteeing their sorting order. The best course of action is to retry ULID generation when the next millisecond is on the system clock." } ;
+
+HELP: ulid>bytes
+{ $values
+    { "ulid" string }
+    { "byte-array" byte-array }
+}
+{ $description "Convert a string " { $snippet "ulid" } " into its binary representation. Some errors may be thrown in the process:" { $subsections ulid>bytes-bad-length ulid>bytes-bad-character ulid>bytes-overflow } } ;
+
+HELP: ulid>bytes-bad-character
+{ $values
+    { "ch" "a character" }
+}
+{ $description "Throws a " { $link ulid>bytes-bad-character } " error." }
+{ $error-description "This error is thrown if during ULID to byte-array conversion a character " { $snippet "ch" } " is encountered in the input string, which is not part of the supported " { $link encoding } ". Try using " { $link normalize-ulid } " before the conversion." } ;
+
+HELP: ulid>bytes-bad-length
+{ $values
+    { "n" number }
+}
+{ $description "Throws a " { $link ulid>bytes-bad-length } " error." }
+{ $error-description "This error is thrown if the input string has length " { $snippet "n" } " instead of 26." } ;
+
+HELP: ulid>bytes-overflow
+{ $description "Throws a " { $link ulid>bytes-overflow } " error." }
+{ $error-description "This error is thrown if the first character of the ULID string is greater than \"7\" in the " { $link encoding } ". This can only mean that the input string is not a valid ULID according to the specification." } ;
--- a/extra/ulid/ulid-tests.factor
+++ b/extra/ulid/ulid-tests.factor
@ -0,0 +1,51 @@
+! Copyright (C) 2019 Alexander Ilin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors io.binary kernel math namespaces sequences
+strings tools.test ulid ulid.private ;
+IN: ulid.tests
+
+{ "0123456789ABCDEFGH1JK1MN0PQRSTUVWXYZ" }
+[ "0123456789abcdefghijklmnopqrstuvwxyz" normalize-ulid ] unit-test
+
+{ "ABCDEFGH1JK1MN0PQRSTUVWXYZ" }
+[ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" normalize-ulid ] unit-test
+
+[ "aoeu" ulid>bytes ] [
+    [ ulid>bytes-bad-length? ] keep n>> 4 = and
+] must-fail-with
+
+[ "aBCDEFGH1JK1MN0PQRSTUVWXYZ" ulid>bytes ] [
+    [ ulid>bytes-bad-character? ] keep ch>> CHAR: a = and
+] must-fail-with
+
+[ "ABCDEFGH1JK1MN0PQRSTUVWXYZ" ulid>bytes ] [
+    [ ulid>bytes-bad-character? ] keep ch>> CHAR: U = and
+] must-fail-with
+
+[ "ABCDEFGH1JK1MN0PQRST0VWXYZ" ulid>bytes ]
+[ ulid>bytes-overflow? ] must-fail-with
+
+{ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 223 } }
+[ "7BCDEFGH1JK1MN0PQRST0VWXYZ" ulid>bytes ] unit-test
+
+{ "7BCDEFGH1JK1MN0PQRST0VWXYZ" }
+[ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 223 } bytes>ulid ] unit-test
+
+[ B{ 235 99 92 248 68 50 152 105 80 90 248 206 129 190 119 } bytes>ulid ] [
+    [ bytes>ulid-bad-length? ] keep n>> 15 = and
+] must-fail-with
+
+{ t } [ ulid string? ] unit-test
+{ 26 } [ ulid length ] unit-test
+{ f } [ ulid ulid = ] unit-test
+
+: ulid-less-than-80-bits ( -- ulid )
+    ulid last-random-bits get 80-bits >=
+    [ drop ulid-less-than-80-bits ] when ;
+
+{ t } [
+    ulid-less-than-80-bits t (ulid) [ ulid>bytes be> ] bi@ 1 - =
+] unit-test
+
+[ 80-bits \ last-random-bits set t (ulid) ]
+[ ulid-overflow? ] must-fail-with
--- a/extra/ulid/ulid.factor
+++ b/extra/ulid/ulid.factor
@ -0,0 +1,86 @@
+! Copyright (C) 2018, 2019 Alexander Ilin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: ascii binary-search calendar io.binary kernel make math
+math.bitwise math.order namespaces random sequences splitting
+summary system tr ;
+
+IN: ulid
+
+ERROR: ulid-overflow ;
+M: ulid-overflow summary drop "Too many ULIDs generated per msec" ;
+
+<PRIVATE
+
+CONSTANT: encoding "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+CONSTANT: base 32
+CONSTANT: 80-bits 0xFFFFFFFFFFFFFFFFFFFF
+
+SYMBOL: last-time-string
+SYMBOL: last-random-bits
+
+: encode-bits ( n chars -- string )
+    [ base /mod encoding nth ] "" replicate-as nip reverse! ;
+
+: encode-random-bits ( n -- string )
+    16 encode-bits ;
+
+: encode-time ( timestamp -- string )
+    timestamp>millis 10 encode-bits ;
+
+: same-msec? ( -- ? )
+    nano-count 1000 /i dup \ same-msec? get =
+    [ drop t ] [ \ same-msec? set f ] if ;
+
+: pack-bits ( seq -- seq' )
+    5 swap [ first ] [ rest ] bi [
+        [ ! can-take-bits overflow-byte elt
+            pick 5 >= [
+                swap 5 shift bitor swap 5 - [ , 0 8 ] when-zero swap
+            ] [
+                3dup rot [ shift ] [ 5 - shift ] bi-curry bi* bitor ,
+                nip 5 rot - [ bits 8 ] keep - swap
+            ] if
+        ] each 2drop
+    ] B{ } make ;
+
+TR: (normalize-ulid) "ILO" "110" ; inline
+
+: (ulid) ( same-msec? -- ulid )
+    [
+        last-time-string get last-random-bits get 1 +
+        dup 80-bits > [ ulid-overflow ] when
+    ] [
+        now encode-time dup last-time-string set
+        80 random-bits
+    ] if dup last-random-bits set encode-random-bits append ;
+
+PRIVATE>
+
+: ulid ( -- ulid )
+    same-msec? (ulid) ;
+
+ERROR: ulid>bytes-bad-length n ;
+M: ulid>bytes-bad-length summary drop "Invalid ULID length" ;
+
+ERROR: ulid>bytes-bad-character ch ;
+M: ulid>bytes-bad-character summary drop "Invalid character in ULID" ;
+
+ERROR: ulid>bytes-overflow ;
+M: ulid>bytes-overflow summary drop "Overflow error in ULID" ;
+
+: ulid>bytes ( ulid -- byte-array )
+    dup length dup 26 = [ drop ] [ ulid>bytes-bad-length ] if
+    [
+        dup [ >=< ] curry encoding swap search pick =
+        [ nip ] [ drop ulid>bytes-bad-character ] if
+    ] B{ } map-as dup first 7 > [ ulid>bytes-overflow ] when pack-bits ;
+
+: normalize-ulid ( str -- str' )
+    >upper (normalize-ulid) ;
+
+ERROR: bytes>ulid-bad-length n ;
+M: bytes>ulid-bad-length summary drop "Invalid ULID byte-array length" ;
+
+: bytes>ulid ( byte-array -- ulid )
+    dup length dup 16 = [ drop ] [ bytes>ulid-bad-length ] if
+    be> 26 encode-bits ;
				`@ -0,0 +1 @@`
				`Universally Unique Lexicographically Sortable Identifier`