diff --git a/extra/peg/authors.txt b/extra/peg/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/ebnf/authors.txt b/extra/peg/ebnf/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/ebnf/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor new file mode 100644 index 0000000000..f7af6f98d3 --- /dev/null +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -0,0 +1,17 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg peg.ebnf ; +IN: temporary + +{ T{ ebnf-non-terminal f "abc" } } [ + "abc" 'non-terminal' parse parse-result-ast +] unit-test + +{ T{ ebnf-terminal f "55" } } [ + "\"55\"" 'terminal' parse parse-result-ast +] unit-test + +! { } [ +! "digit = \"0\" | \"1\" | \"2\"" 'rule' parse parse-result-ast +! ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor new file mode 100644 index 0000000000..c41e9d31a4 --- /dev/null +++ b/extra/peg/ebnf/ebnf.factor @@ -0,0 +1,65 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences namespaces peg ; +IN: peg.ebnf + +TUPLE: ebnf-non-terminal symbol ; +TUPLE: ebnf-terminal symbol ; +TUPLE: ebnf-choice options ; + +C: <ebnf-non-terminal> ebnf-non-terminal +C: <ebnf-terminal> ebnf-terminal +C: <ebnf-choice> ebnf-choice + +GENERIC: ebnf-compile ( ast -- quot ) + +M: ebnf-terminal ebnf-compile ( ast -- quot ) + [ + ebnf-terminal-symbol , \ token , + ] [ ] make ; + +M: ebnf-choice ebnf-compile ( ast -- quot ) + [ + [ + ebnf-choice-options [ + ebnf-compile , + ] each + ] { } make , + [ call ] , \ map , + ] [ ] make ; + +DEFER: 'rhs' + +: 'non-terminal' ( -- parser ) + CHAR: a CHAR: z range repeat1 [ >string <ebnf-non-terminal> ] action ; + +: 'terminal' ( -- parser ) + "\"" token hide [ CHAR: " = not ] satisfy repeat1 "\"" token hide 3array seq [ first >string <ebnf-terminal> ] action ; + +: 'element' ( -- parser ) + 'non-terminal' 'terminal' 2array choice ; + +: 'sequence' ( -- parser ) + 'element' sp repeat1 ; + +: 'choice' ( -- parser ) + 'element' sp "|" token sp list-of [ <ebnf-choice> ] action ; + +: 'repeat0' ( -- parser ) + "{" token sp hide + [ 'rhs' sp ] delay + "}" token sp hide + 3array seq ; + +: 'rhs' ( -- parser ) + 'repeat0' + 'choice' + 'sequence' + 'element' + 4array choice ; + +: 'rule' ( -- parser ) + 'non-terminal' + "=" token sp + 'rhs' + 3array seq ; diff --git a/extra/peg/ebnf/summary.txt b/extra/peg/ebnf/summary.txt new file mode 100644 index 0000000000..473cf4f3a2 --- /dev/null +++ b/extra/peg/ebnf/summary.txt @@ -0,0 +1 @@ +Grammar for parsing EBNF diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index 40743132f3..63b9d44310 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -20,6 +20,15 @@ HELP: token { $description "Returns a parser that matches the given string." } ; +HELP: satisfy +{ $values + { "quot" "a quotation" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls the quotation on the first character of the input string, " + "succeeding if that quotation returns true. The AST is the character from the string." } ; + HELP: range { $values { "min" "a character" } @@ -111,3 +120,31 @@ HELP: action "the default AST." } { $example "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; +HELP: sp +{ $values + { "p1" "a parser" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls the original parser 'p1' after stripping any whitespace " + " from the left of the input string." } ; + +HELP: hide +{ $values + { "p1" "a parser" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that succeeds if the original parser succeeds, but does not " + "put any result in the AST. Useful for ignoring 'syntax' in the AST." } +{ $example "\"[\" token hide number \"]\" token hide 3array seq" } ; + +HELP: delay +{ $values + { "quot" "a quotation with stack effect ( -- parser )" } + { "parser" "a parser" } +} +{ $description + "Delays the construction of a parser until it is actually required to parse. This " + "allows for calling a parser that results in a recursive call to itself. The quotation " + "should return the constructed parser." } ; \ No newline at end of file diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 7648819a8c..6a8d7429f3 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -136,4 +136,29 @@ IN: temporary { f } [ "b" "a" token [ drop 1 ] action parse -] unit-test \ No newline at end of file +] unit-test + +{ f } [ + "b" [ CHAR: a = ] satisfy parse +] unit-test + +{ CHAR: a } [ + "a" [ CHAR: a = ] satisfy parse parse-result-ast +] unit-test + +{ "a" } [ + " a" "a" token sp parse parse-result-ast +] unit-test + +{ "a" } [ + "a" "a" token sp parse parse-result-ast +] unit-test + +{ V{ "a" } } [ + "[a]" "[" token hide "a" token "]" token hide 3array seq parse parse-result-ast +] unit-test + +{ f } [ + "a]" "[" token hide "a" token "]" token hide 3array seq parse +] unit-test + diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 1fb8e7860d..a9e08f6024 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -33,6 +33,19 @@ M: token-parser parse ( state parser -- result ) 2drop f ] if ; +TUPLE: satisfy-parser quot ; + +M: satisfy-parser parse ( state parser -- result ) + over empty? [ + 2drop f + ] [ + satisfy-parser-quot [ unclip-slice dup ] dip call [ + <parse-result> + ] [ + 2drop f + ] if + ] if ; + TUPLE: range-parser min max ; M: range-parser parse ( state parser -- result ) @@ -143,11 +156,31 @@ M: action-parser parse ( state parser -- result ) nip ] if ; +: left-trim-slice ( string -- string ) + #! Return a new string without any leading whitespace + #! from the original string. + dup empty? [ + dup first blank? [ 1 tail-slice left-trim-slice ] when + ] unless ; + +TUPLE: sp-parser p1 ; + +M: sp-parser parse ( state parser -- result ) + [ left-trim-slice ] dip sp-parser-p1 parse ; + +TUPLE: delay-parser quot ; + +M: delay-parser parse ( state parser -- result ) + delay-parser-quot call parse ; + PRIVATE> : token ( string -- parser ) token-parser construct-boa init-parser ; +: satisfy ( quot -- parser ) + satisfy-parser construct-boa init-parser ; + : range ( min max -- parser ) range-parser construct-boa init-parser ; @@ -174,3 +207,15 @@ PRIVATE> : action ( parser quot -- parser ) action-parser construct-boa init-parser ; + +: sp ( parser -- parser ) + sp-parser construct-boa init-parser ; + +: hide ( parser -- parser ) + [ drop ignore ] action ; + +: delay ( parser -- parser ) + delay-parser construct-boa init-parser ; + +: list-of ( items separator -- parser ) + hide over 2array seq repeat0 [ concat ] action 2array seq [ unclip 1vector swap first append ] action ; diff --git a/extra/peg/pl0/authors.txt b/extra/peg/pl0/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/pl0/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor new file mode 100644 index 0000000000..e40c984660 --- /dev/null +++ b/extra/peg/pl0/pl0-tests.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg peg.pl0 ; +IN: temporary + +{ "abc" } [ + "abc" 'ident' parse parse-result-ast +] unit-test + +{ 55 } [ + "55abc" 'number' parse parse-result-ast +] unit-test diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor new file mode 100644 index 0000000000..8a01057bfb --- /dev/null +++ b/extra/peg/pl0/pl0.factor @@ -0,0 +1,58 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences peg ; +IN: peg.pl0 + +#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 + +: 'ident' ( -- parser ) + CHAR: a CHAR: z range + CHAR: A CHAR: Z range 2array choice repeat1 + [ >string ] action ; + +: 'number' ( -- parser ) + CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; + +DEFER: 'factor' + +: 'term' ( -- parser ) + 'factor' "*" token "/" token 2array choice sp 'factor' sp 2array seq repeat0 2array seq ; + +: 'expression' ( -- parser ) + [ "+" token "-" token 2array choice sp optional 'term' sp 2dup 2array seq repeat0 3array seq ] delay ; + +: 'factor' ( -- parser ) + 'ident' 'number' "(" token hide 'expression' sp ")" token sp hide 3array seq 3array choice ; + +: 'condition' ( -- parser ) + "odd" token 'expression' sp 2array seq + 'expression' { "=" "#" "<=" "<" ">=" ">" } [ token ] map choice sp 'expression' sp 3array seq + 2array choice ; + +: 'statement' ( -- parser ) + [ + 'ident' ":=" token sp 'expression' sp 3array seq + "call" token 'ident' sp 2array seq + "begin" token 'statement' sp ";" token sp 'statement' sp 2array seq repeat0 "end" token sp 4array seq + "if" token 'condition' sp "then" token sp 'statement' sp 4array seq + 4array choice + "while" token 'condition' sp "do" token sp 'statement' sp 4array seq + 2array choice optional + ] delay ; + +: 'block' ( -- parser ) + [ + "const" token 'ident' sp "=" token sp 'number' sp 4array seq + "," token sp 'ident' sp "=" token sp 'number' sp 4array seq repeat0 + ";" token sp 3array seq optional + + "var" token 'ident' sp "," token sp 'ident' sp 2array seq repeat0 + ";" token sp 4array seq optional + + "procedure" token 'ident' sp ";" token sp 'block' sp 4array seq ";" token sp 2array seq repeat0 'statement' sp 2array seq + + 3array seq + ] delay ; + +: 'program' ( -- parser ) + 'block' "." token sp 2array seq ; diff --git a/extra/peg/pl0/summary.txt b/extra/peg/pl0/summary.txt new file mode 100644 index 0000000000..59a20cf8c4 --- /dev/null +++ b/extra/peg/pl0/summary.txt @@ -0,0 +1 @@ +Grammar for PL/0 Language diff --git a/extra/peg/summary.txt b/extra/peg/summary.txt new file mode 100644 index 0000000000..324a544036 --- /dev/null +++ b/extra/peg/summary.txt @@ -0,0 +1 @@ +Parsing Expression Grammar and Packrat Parser