diff --git a/extra/toml/authors.txt b/extra/toml/authors.txt new file mode 100644 index 0000000000..e091bb8164 --- /dev/null +++ b/extra/toml/authors.txt @@ -0,0 +1 @@ +John Benediktsson diff --git a/extra/toml/summary.txt b/extra/toml/summary.txt new file mode 100644 index 0000000000..2ab6924450 --- /dev/null +++ b/extra/toml/summary.txt @@ -0,0 +1 @@ +Parsers for Tom's Obvious, Minimal Language (TOML). diff --git a/extra/toml/toml-docs.factor b/extra/toml/toml-docs.factor new file mode 100644 index 0000000000..429cd6ac93 --- /dev/null +++ b/extra/toml/toml-docs.factor @@ -0,0 +1,17 @@ +USING: help.markup help.syntax kernel strings ; +IN: toml + +HELP: toml> +{ $values { "string" string } { "assoc" object } } +{ $description "Decodes a configuration from the TOML format, represented as a " { $link string } "." } ; + +ARTICLE: "toml" "Tom's Obvious Markup Language (TOML)" +"Tom's Obvious Markup Language (TOML) is described further in " +{ $url "https://en.wikipedia.org/wiki/TOML" } "." +$nl +"Decoding support for the TOML protocol:" +{ $subsections + toml> +} ; + +ABOUT: "toml" diff --git a/extra/toml/toml-tests.factor b/extra/toml/toml-tests.factor new file mode 100644 index 0000000000..a82bf99087 --- /dev/null +++ b/extra/toml/toml-tests.factor @@ -0,0 +1,122 @@ +USING: multiline toml tools.test ; + +{ + H{ + { "title" "TOML Example" } + { "hosts" { "alpha" "omega" } } + { + "owner" + H{ + { "name" "Tom Preston-Werner" } + { "organization" "GitHub" } + { + "bio" + "GitHub Cofounder & CEO\nLikes tater tots and beer." + } + { "dob" "1979-05-27T07:32:00Z" } + } + } + { + "database" + H{ + { "server" "192.168.1.1" } + { "ports" { 8001 8001 8002 } } + { "connection_max" 5000 } + { "enabled" t } + } + } + { + "servers" + H{ + { + "alpha" + H{ + { "ip" "10.0.0.1" } + { "dc" "eqdc10" } + } + } + { + "beta" + H{ + { "ip" "10.0.0.2" } + { "dc" "eqdc10" } + { "country" "中国" } + } + } + } + } + { + "clients" + H{ + { "data" { { "gamma" "delta" } { 1 2 } } } + } + } + { + "products" + V{ + H{ + { "name" "Hammer" } + { "sku" 738594937 } + } + H{ + { "name" "Nail" } + { "sku" 284758393 } + { "color" "gray" } + } + } + } + } +} [ + [=[ + +# This is a TOML document. Boom. + +title = "TOML Example" + +[owner] +name = "Tom Preston-Werner" +organization = "GitHub" +bio = "GitHub Cofounder & CEO\nLikes tater tots and beer." +dob = 1979-05-27T07:32:00Z # First class dates? Why not? + +[database] +server = "192.168.1.1" +ports = [ 8001, 8001, 8002 ] +connection_max = 5000 +enabled = true + +[servers] + + # You can indent as you please. Tabs or spaces. TOML don't care. + [servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + country = "中国" # This should be parsed as UTF-8 + +[clients] +data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it + +# Line breaks are OK when inside arrays +hosts = [ + "alpha", + "omega" +] + +# Products + + [[products]] + name = "Hammer" + sku = 738594937 + + [[products]] + name = "Nail" + sku = 284758393 + color = "gray" + + ]=] toml> +] unit-test + diff --git a/extra/toml/toml.factor b/extra/toml/toml.factor new file mode 100644 index 0000000000..c2398e5e92 --- /dev/null +++ b/extra/toml/toml.factor @@ -0,0 +1,294 @@ +! Copyright (C) 2019 John Benediktsson +! See http://factorcode.org/license.txt for BSD license + +USING: accessors arrays assocs hashtables kernel locals make +math.parser peg peg.parsers regexp sequences splitting +strings.parser ; + +IN: toml + +ERROR: duplicate-key key ; + +ERROR: unknown-value value ; + + ] action ; + +: decdigit ( -- parser ) + CHAR: 0 CHAR: 9 range ; + +: dec ( -- parser ) + decdigit digits [ dec> ] action ; + +: octdigit ( -- parser ) + CHAR: 0 CHAR: 7 range ; + +: oct ( -- parser ) + "0o" token hide octdigit digits 2seq [ first oct> ] action ; + +: bindigit ( -- parser ) + CHAR: 0 CHAR: 1 range ; + +: bin ( -- parser ) + "0b" token hide bindigit digits 2seq [ first bin> ] action ; + +: integer-parser ( -- parser ) + hex oct bin dec 4choice [ ] action ; + +: float ( -- parser ) + [ + sign optional , + decdigit digits optional , + "." token , + decdigit digits optional , + "e" token "E" token 2choice + sign optional + decdigit digits optional 3seq optional , + ] seq* ; + +: +inf ( -- parser ) + "+" token optional "inf" token 2seq [ drop 1/0. ] action ; + +: -inf ( -- parser ) + "-inf" token [ drop -1/0. ] action ; + +: nan ( -- parser ) + sign optional "nan" token 2seq + [ drop NAN: 8000000000000 ] action ; + +: float-parser ( -- parser ) + float +inf -inf nan 4choice + [ unclip-last append "" concat-as string>number ] action ; + +: escaped ( -- parser ) + "\\" token hide [ "btnfr\"\\" member-eq? ] satisfy 2seq + [ first escape ] action ; + +: unicode ( -- parser ) + "\\u" token hide hexdigit 4 exactly-n 2seq + "\\U" token hide hexdigit 8 exactly-n 2seq + 2choice [ first hex> ] action ; + +: basic-string ( -- parser ) + escaped unicode [ "\"\n" member? not ] satisfy 3choice repeat0 + "\"" dup surrounded-by ; + +: literal-string ( -- parser ) + [ "'\n" member? not ] satisfy repeat0 + "'" dup surrounded-by ; + +: single-string ( -- parser ) + basic-string literal-string 2choice [ "" like ] action ; + +: multi-basic-string ( -- parser ) + escaped unicode [ CHAR: \" = not ] satisfy 3choice repeat0 + "\"\"\"" dup surrounded-by ; + +: multi-literal-string ( -- parser ) + [ CHAR: ' = not ] satisfy repeat0 + "'''" dup surrounded-by ; + +: multi-string ( -- parser ) + multi-basic-string multi-literal-string 2choice [ + "" like "\n" ?head drop + R/ \\[ \t\r\n]*\n[ \t\r\n]*/m "" re-replace + ] action ; + +: string-parser ( -- parser ) + multi-string single-string 2choice ; + +: date-parser ( -- parser ) + [ + decdigit 4 exactly-n , + "-" token , + decdigit 2 exactly-n , + "-" token , + decdigit 2 exactly-n , + ] seq* [ "" concat-as ] action ; + +: time-parser ( -- parser ) + [ + decdigit 2 exactly-n , + ":" token , + decdigit 2 exactly-n , + ":" token , + decdigit 2 exactly-n , + "." token decdigit repeat1 2seq optional , + ] seq* [ "" concat-as ] action ; + +: timezone-parser ( -- parser ) + [ + "Z" token , + "-" token + decdigit 2 exactly-n ":" token + decdigit 2 exactly-n 4seq [ "" concat-as ] action , + ] choice* ; + +: datetime-parser ( -- parser ) + [ + date-parser , + "T" token " " token 2choice , + time-parser , + timezone-parser optional , + ] seq* [ "" concat-as ] action ; + +: space ( -- parser ) + [ " \t" member? ] satisfy repeat0 ; + +: whitespace ( -- parser ) + [ " \t\r\n" member? ] satisfy repeat0 ; + +DEFER: value-parser + +: array-parser ( -- parser ) + [ + "[" token hide , + whitespace hide , + value-parser + whitespace hide "," token whitespace hide 3seq list-of , + whitespace hide , + "]" token hide , + ] seq* [ first { } like ] action ; + +DEFER: key-value-parser + +: inline-table-parser ( -- parser ) + [ + "{" token hide , + whitespace hide , + key-value-parser + whitespace hide "," token whitespace hide 3seq list-of , + whitespace hide , + "}" token hide , + ] seq* [ first >hashtable ] action ; + +: value-parser ( -- parser ) + [ + [ + boolean-parser , + datetime-parser , + date-parser , + time-parser , + float-parser , + integer-parser , + string-parser , + [ array-parser ] box , + [ inline-table-parser ] box , + ] choice* + ] delay ; + +: name-parser ( -- parser ) + [ + CHAR: A CHAR: Z range , + CHAR: a CHAR: z range , + CHAR: 0 CHAR: 9 range , + "_" token [ first ] action , + "-" token [ first ] action , + ] choice* repeat1 [ "" like ] action single-string 2choice ; + +: comment-parser ( -- parser ) + [ + space , + "#" token , + [ CHAR: \n = not ] satisfy repeat0 , + ] seq* [ drop f ] action ; + +: key-parser ( -- parser ) + name-parser "." token list-of [ { } like ] action ; + +: key-value-parser ( -- parser ) + [ + space hide , + key-parser , + space hide , + "=" token hide , + space hide , + value-parser , + comment-parser optional hide , + ] seq* [ first2 entry boa ] action ; + +: line-parser ( -- parser ) + "\n" token "\r\n" token 2choice ; + +:: table-name-parser ( begin end -- parser ) + [ + begin token hide , + space hide , + name-parser + space hide "." token space hide 3seq list-of + [ { } like ] action , + space hide , + end token hide , + comment-parser optional hide , + ] seq* ; + +: table-parser ( -- parser ) + [ + space hide , + "[[" "]]" table-name-parser [ t suffix! ] action + "[" "]" table-name-parser [ f suffix! ] action + 2choice , + whitespace hide , + key-value-parser line-parser list-of optional , + ] seq* [ first2 [ first2 ] dip table boa ] action ; + +: toml-parser ( -- parser ) + [ + whitespace hide , + [ + comment-parser , + table-parser , + key-value-parser , + ] choice* whitespace list-of , + whitespace hide , + ] seq* [ first sift { } like ] action ; + +: check-no-key ( key assoc -- key assoc ) + 2dup at* nip [ over duplicate-key ] when ; + +: deep-at ( keys assoc -- value ) + swap [ + over ?at [ nip ] [ + H{ } clone [ swap rot check-no-key set-at ] keep + ] if + ] each ; + +GENERIC: update-toml ( assoc entry -- assoc ) + +M: entry update-toml + [ key>> unclip-last [ over deep-at ] dip ] [ value>> ] bi + swap rot check-no-key set-at ; + +M: table update-toml + [ name>> unclip-last [ over deep-at ] dip ] + [ entries>> [ H{ } clone ] dip [ update-toml ] each swap rot ] + [ array?>> [ push-at ] [ check-no-key set-at ] if ] tri ; + +PRIVATE> + +: toml> ( string -- assoc ) + [ H{ } clone ] dip toml-parser parse [ update-toml ] each ;