diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor new file mode 100644 index 0000000000..40743132f3 --- /dev/null +++ b/extra/peg/peg-docs.factor @@ -0,0 +1,113 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax peg ; + +HELP: parse +{ $values + { "string" "a string" } + { "parse" "a parser" } + { "result" "a or f" } +} +{ $description + "Given the input string, parse it using the given parser. The result is a object if " + "the parse was successful, otherwise it is f." } ; + +HELP: token +{ $values + { "string" "a string" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that matches the given string." } ; + +HELP: range +{ $values + { "min" "a character" } + { "max" "a character" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that matches a single character that lies within the range of characters given, inclusive." } +{ $example ": digit ( -- parser ) CHAR: 0 CHAR: 9 range ;" } ; + +HELP: seq +{ $values + { "seq" "a sequence of parsers" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls all parsers in the given sequence, in order. The parser succeeds if " + "all the parsers succeed, otherwise it fails. The AST produced is a sequence of the AST produced by " + "the individual parsers." } ; + +HELP: choice +{ $values + { "seq" "a sequence of parsers" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that will try all the parsers in the sequence, in order, until one succeeds. " + "The resulting AST is that produced by the successful parser." } ; + +HELP: repeat0 +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 0 or more instances of the 'p1' parser. The AST produced is " + "an array of the AST produced by the 'p1' parser. An empty array indicates 0 instances were " + "parsed." } ; + +HELP: repeat1 +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 1 or more instances of the 'p1' parser. The AST produced is " + "an array of the AST produced by the 'p1' parser." } ; + +HELP: optional +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is " + "'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ; + +HELP: ensure +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser succeeds but does not add anything to the " + "AST and does not move the location in the input string. This can be used for lookahead and " + "disambiguation, along with the " { $link ensure-not } " word." } +{ $example "\"0\" token ensure octal-parser" } ; + +HELP: ensure-not +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser fails but does not add anything to the " + "AST and does not move the location in the input string. This can be used for lookahead and " + "disambiguation, along with the " { $link ensure } " word." } +{ $example "\"+\" token \"=\" token ensure-not \"+=\" token 3array seq" } ; + +HELP: action +{ $values + { "p1" "a parser" } + { "quot" "a quotation with stack effect ( ast -- ast )" } +} +{ $description + "Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting " + "from that parse. The result of the quotation is then used as the final AST. This can be used " + "for manipulating the parse tree to produce a AST better suited for the task at hand rather than " + "the default AST." } +{ $example "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; + diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor new file mode 100644 index 0000000000..7648819a8c --- /dev/null +++ b/extra/peg/peg-tests.factor @@ -0,0 +1,139 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test strings namespaces arrays sequences peg peg.private ; +IN: temporary + +{ 0 1 2 } [ + 0 next-id set-global get-next-id get-next-id get-next-id +] unit-test + +{ f } [ + "endbegin" "begin" token parse +] unit-test + +{ "begin" "end" } [ + "beginend" "begin" token parse + { parse-result-ast parse-result-remaining } get-slots + >string +] unit-test + +{ f } [ + "" CHAR: a CHAR: z range parse +] unit-test + +{ f } [ + "1bcd" CHAR: a CHAR: z range parse +] unit-test + +{ CHAR: a } [ + "abcd" CHAR: a CHAR: z range parse parse-result-ast +] unit-test + +{ CHAR: z } [ + "zbcd" CHAR: a CHAR: z range parse parse-result-ast +] unit-test + +{ f } [ + "bad" "a" token "b" token 2array seq parse +] unit-test + +{ V{ "g" "o" } } [ + "good" "g" token "o" token 2array seq parse parse-result-ast +] unit-test + +{ "a" } [ + "abcd" "a" token "b" token 2array choice parse parse-result-ast +] unit-test + +{ "b" } [ + "bbcd" "a" token "b" token 2array choice parse parse-result-ast +] unit-test + +{ f } [ + "cbcd" "a" token "b" token 2array choice parse +] unit-test + +{ f } [ + "" "a" token "b" token 2array choice parse +] unit-test + +{ 0 } [ + "" "a" token repeat0 parse parse-result-ast length +] unit-test + +{ 0 } [ + "b" "a" token repeat0 parse parse-result-ast length +] unit-test + +{ V{ "a" "a" "a" } } [ + "aaab" "a" token repeat0 parse parse-result-ast +] unit-test + +{ f } [ + "" "a" token repeat1 parse +] unit-test + +{ f } [ + "b" "a" token repeat1 parse +] unit-test + +{ V{ "a" "a" "a" } } [ + "aaab" "a" token repeat1 parse parse-result-ast +] unit-test + +{ V{ "a" "b" } } [ + "ab" "a" token optional "b" token 2array seq parse parse-result-ast +] unit-test + +{ V{ f "b" } } [ + "b" "a" token optional "b" token 2array seq parse parse-result-ast +] unit-test + +{ f } [ + "cb" "a" token optional "b" token 2array seq parse +] unit-test + +{ V{ CHAR: a CHAR: b } } [ + "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse parse-result-ast +] unit-test + +{ f } [ + "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse +] unit-test + +{ t } [ + "a+b" + "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ t } [ + "a++b" + "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ t } [ + "a+b" + "a" token "+" token "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ f } [ + "a++b" + "a" token "+" token "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ 1 } [ + "a" "a" token [ drop 1 ] action parse parse-result-ast +] unit-test + +{ V{ 1 1 } } [ + "aa" "a" token [ drop 1 ] action dup 2array seq parse parse-result-ast +] unit-test + +{ f } [ + "b" "a" token [ drop 1 ] action parse +] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor new file mode 100644 index 0000000000..1fb8e7860d --- /dev/null +++ b/extra/peg/peg.factor @@ -0,0 +1,176 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ; +IN: peg + +TUPLE: parse-result remaining ast ; + +GENERIC: parse ( state parser -- result ) + + ( remaining ast -- parse-result ) + parse-result construct-boa ; + +SYMBOL: next-id + +: get-next-id ( -- number ) + next-id get-global 0 or dup 1+ next-id set-global ; + +TUPLE: parser id ; + +: init-parser ( parser -- parser ) + get-next-id parser construct-boa over set-delegate ; + +TUPLE: token-parser symbol ; + +M: token-parser parse ( state parser -- result ) + token-parser-symbol 2dup head? [ + dup >r length tail-slice r> + ] [ + 2drop f + ] if ; + +TUPLE: range-parser min max ; + +M: range-parser parse ( state parser -- result ) + over empty? [ + 2drop f + ] [ + 0 pick nth dup rot + { range-parser-min range-parser-max } get-slots between? [ + [ 1 tail-slice ] dip + ] [ + 2drop f + ] if + ] if ; + +TUPLE: seq-parser parsers ; + +: do-seq-parser ( result parser -- result ) + [ dup parse-result-remaining ] dip parse [ + [ parse-result-remaining swap set-parse-result-remaining ] 2keep + parse-result-ast dup ignore = [ drop ] [ swap [ parse-result-ast push ] keep ] if + ] [ + drop f + ] if* ; + +: (seq-parser) ( result parsers -- result ) + dup empty? not pick and [ + unclip swap [ do-seq-parser ] dip (seq-parser) + ] [ + drop + ] if ; + +M: seq-parser parse ( state parser -- result ) + seq-parser-parsers [ V{ } clone ] dip (seq-parser) ; + +TUPLE: choice-parser parsers ; + +: (choice-parser) ( state parsers -- result ) + dup empty? [ + 2drop f + ] [ + unclip pick swap parse [ + 2nip + ] [ + (choice-parser) + ] if* + ] if ; + +M: choice-parser parse ( state parser -- result ) + choice-parser-parsers (choice-parser) ; + +TUPLE: repeat0-parser p1 ; + +: (repeat-parser) ( parser result -- result ) + 2dup parse-result-remaining swap parse [ + [ parse-result-remaining swap set-parse-result-remaining ] 2keep + parse-result-ast swap [ parse-result-ast push ] keep + (repeat-parser) + ] [ + nip + ] if* ; + +: clone-result ( result -- result ) + { parse-result-remaining parse-result-ast } + get-slots 1vector ; + +M: repeat0-parser parse ( state parser -- result ) + repeat0-parser-p1 2dup parse [ + nipd clone-result (repeat-parser) + ] [ + drop V{ } clone + ] if* ; + +TUPLE: repeat1-parser p1 ; + +M: repeat1-parser parse ( state parser -- result ) + repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; + +TUPLE: optional-parser p1 ; + +M: optional-parser parse ( state parser -- result ) + dupd optional-parser-p1 parse swap f or ; + +TUPLE: ensure-parser p1 ; + +M: ensure-parser parse ( state parser -- result ) + dupd ensure-parser-p1 parse [ + ignore + ] [ + drop f + ] if ; + +TUPLE: ensure-not-parser p1 ; + +M: ensure-not-parser parse ( state parser -- result ) + dupd ensure-not-parser-p1 parse [ + drop f + ] [ + ignore + ] if ; + +TUPLE: action-parser p1 quot ; + +M: action-parser parse ( state parser -- result ) + tuck action-parser-p1 parse dup [ + dup parse-result-ast rot action-parser-quot call + swap [ set-parse-result-ast ] keep + ] [ + nip + ] if ; + +PRIVATE> + +: token ( string -- parser ) + token-parser construct-boa init-parser ; + +: range ( min max -- parser ) + range-parser construct-boa init-parser ; + +: seq ( seq -- parser ) + seq-parser construct-boa init-parser ; + +: choice ( seq -- parser ) + choice-parser construct-boa init-parser ; + +: repeat0 ( parser -- parser ) + repeat0-parser construct-boa init-parser ; + +: repeat1 ( parser -- parser ) + repeat1-parser construct-boa init-parser ; + +: optional ( parser -- parser ) + optional-parser construct-boa init-parser ; + +: ensure ( parser -- parser ) + ensure-parser construct-boa init-parser ; + +: ensure-not ( parser -- parser ) + ensure-not-parser construct-boa init-parser ; + +: action ( parser quot -- parser ) + action-parser construct-boa init-parser ;