From 1eed006a299117f268955ef253779f5cc48fdaa4 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 12:36:14 +1300 Subject: [PATCH 1/8] Add author information to peg --- extra/peg/authors.txt | 1 + extra/peg/summary.txt | 1 + 2 files changed, 2 insertions(+) create mode 100644 extra/peg/authors.txt create mode 100644 extra/peg/summary.txt diff --git a/extra/peg/authors.txt b/extra/peg/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/summary.txt b/extra/peg/summary.txt new file mode 100644 index 0000000000..324a544036 --- /dev/null +++ b/extra/peg/summary.txt @@ -0,0 +1 @@ +Parsing Expression Grammar and Packrat Parser From e6b6bb8a5c069349fb31e07fce0ff95bf526bbee Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 15:08:16 +1300 Subject: [PATCH 2/8] Add satisfy parser in peg --- extra/peg/peg-docs.factor | 9 +++++++++ extra/peg/peg-tests.factor | 8 ++++++++ extra/peg/peg.factor | 16 ++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index 40743132f3..1ba30fe06e 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -20,6 +20,15 @@ HELP: token { $description "Returns a parser that matches the given string." } ; +HELP: satisfy +{ $values + { "quot" "a quotation" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls the quotation on the first character of the input string, " + "succeeding if that quotation returns true. The AST is the character from the string." } ; + HELP: range { $values { "min" "a character" } diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 7648819a8c..cea8dc6d3f 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -136,4 +136,12 @@ IN: temporary { f } [ "b" "a" token [ drop 1 ] action parse +] unit-test + +{ f } [ + "b" [ CHAR: a = ] satisfy parse +] unit-test + +{ CHAR: a } [ + "a" [ CHAR: a = ] satisfy parse parse-result-ast ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 1fb8e7860d..6dd3700291 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -33,6 +33,19 @@ M: token-parser parse ( state parser -- result ) 2drop f ] if ; +TUPLE: satisfy-parser quot ; + +M: satisfy-parser parse ( state parser -- result ) + over empty? [ + 2drop f + ] [ + satisfy-parser-quot [ unclip-slice dup ] dip call [ + + ] [ + 2drop f + ] if + ] if ; + TUPLE: range-parser min max ; M: range-parser parse ( state parser -- result ) @@ -148,6 +161,9 @@ PRIVATE> : token ( string -- parser ) token-parser construct-boa init-parser ; +: satisfy ( quot -- parser ) + satisfy-parser construct-boa init-parser ; + : range ( min max -- parser ) range-parser construct-boa init-parser ; From 055276ca25f896936719ff4e7ba01e0a3c63e01b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 15:36:26 +1300 Subject: [PATCH 3/8] Add 'sp' parser to skip whitespace --- extra/peg/peg-docs.factor | 8 ++++++++ extra/peg/peg.factor | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index 1ba30fe06e..bb610dce18 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -120,3 +120,11 @@ HELP: action "the default AST." } { $example "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; +HELP: sp +{ $values + { "p1" "a parser" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls the original parser 'p1' after stripping any whitespace " + " from the left of the input string." } ; diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 6dd3700291..df492ddcf2 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -156,6 +156,18 @@ M: action-parser parse ( state parser -- result ) nip ] if ; +: left-trim-slice ( string -- string ) + #! Return a new string without any leading whitespace + #! from the original string. + dup empty? [ + dup first blank? [ 1 tail-slice left-trim-slice ] when + ] unless ; + +TUPLE: sp-parser p1 ; + +M: sp-parser parse ( state parser -- result ) + [ left-trim-slice ] dip sp-parser-p1 parse ; + PRIVATE> : token ( string -- parser ) @@ -190,3 +202,6 @@ PRIVATE> : action ( parser quot -- parser ) action-parser construct-boa init-parser ; + +: sp ( parser -- parser ) + sp-parser construct-boa init-parser ; From 5fb6af754b1fc451e929afa7342704d0a9f96660 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 15:45:00 +1300 Subject: [PATCH 4/8] Add hide combinator --- extra/peg/peg-docs.factor | 10 ++++++++++ extra/peg/peg-tests.factor | 19 ++++++++++++++++++- extra/peg/peg.factor | 3 +++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index bb610dce18..9034b1c8fd 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -128,3 +128,13 @@ HELP: sp { $description "Returns a parser that calls the original parser 'p1' after stripping any whitespace " " from the left of the input string." } ; + +HELP: hide +{ $values + { "p1" "a parser" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that succeeds if the original parser succeeds, but does not " + "put any result in the AST. Useful for ignoring 'syntax' in the AST." } +{ $example "\"[\" token hide number \"]\" token hide 3array seq" } ; diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index cea8dc6d3f..6a8d7429f3 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -144,4 +144,21 @@ IN: temporary { CHAR: a } [ "a" [ CHAR: a = ] satisfy parse parse-result-ast -] unit-test \ No newline at end of file +] unit-test + +{ "a" } [ + " a" "a" token sp parse parse-result-ast +] unit-test + +{ "a" } [ + "a" "a" token sp parse parse-result-ast +] unit-test + +{ V{ "a" } } [ + "[a]" "[" token hide "a" token "]" token hide 3array seq parse parse-result-ast +] unit-test + +{ f } [ + "a]" "[" token hide "a" token "]" token hide 3array seq parse +] unit-test + diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index df492ddcf2..fe2b551f78 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -205,3 +205,6 @@ PRIVATE> : sp ( parser -- parser ) sp-parser construct-boa init-parser ; + +: hide ( parser -- parser ) + [ drop ignore ] action ; From ea2d4ea261e7c0b2cf08bd386ccdbf4ebdb19f2b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 15:56:26 +1300 Subject: [PATCH 5/8] Work on PL/0 Grammar as a PEG example --- extra/peg/pl0/authors.txt | 1 + extra/peg/pl0/pl0-tests.factor | 13 +++++++++++++ extra/peg/pl0/pl0.factor | 14 ++++++++++++++ extra/peg/pl0/summary.txt | 1 + 4 files changed, 29 insertions(+) create mode 100644 extra/peg/pl0/authors.txt create mode 100644 extra/peg/pl0/pl0-tests.factor create mode 100644 extra/peg/pl0/pl0.factor create mode 100644 extra/peg/pl0/summary.txt diff --git a/extra/peg/pl0/authors.txt b/extra/peg/pl0/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/pl0/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor new file mode 100644 index 0000000000..e40c984660 --- /dev/null +++ b/extra/peg/pl0/pl0-tests.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg peg.pl0 ; +IN: temporary + +{ "abc" } [ + "abc" 'ident' parse parse-result-ast +] unit-test + +{ 55 } [ + "55abc" 'number' parse parse-result-ast +] unit-test diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor new file mode 100644 index 0000000000..3e33bbb959 --- /dev/null +++ b/extra/peg/pl0/pl0.factor @@ -0,0 +1,14 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser peg ; +IN: peg.pl0 + +#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 + +: 'ident' ( -- parser ) + CHAR: a CHAR: z range + CHAR: A CHAR: Z range 2array choice repeat1 + [ >string ] action ; + +: 'number' ( -- parser ) + CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; diff --git a/extra/peg/pl0/summary.txt b/extra/peg/pl0/summary.txt new file mode 100644 index 0000000000..59a20cf8c4 --- /dev/null +++ b/extra/peg/pl0/summary.txt @@ -0,0 +1 @@ +Grammar for PL/0 Language From e49d84ce97d4822b051367ebd04b4e2c7444d2ce Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 16:16:21 +1300 Subject: [PATCH 6/8] Add 'delay' parser to peg --- extra/peg/peg-docs.factor | 10 ++++++++++ extra/peg/peg.factor | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index 9034b1c8fd..63b9d44310 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -138,3 +138,13 @@ HELP: hide "Returns a parser that succeeds if the original parser succeeds, but does not " "put any result in the AST. Useful for ignoring 'syntax' in the AST." } { $example "\"[\" token hide number \"]\" token hide 3array seq" } ; + +HELP: delay +{ $values + { "quot" "a quotation with stack effect ( -- parser )" } + { "parser" "a parser" } +} +{ $description + "Delays the construction of a parser until it is actually required to parse. This " + "allows for calling a parser that results in a recursive call to itself. The quotation " + "should return the constructed parser." } ; \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index fe2b551f78..7cc5aec845 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -168,6 +168,11 @@ TUPLE: sp-parser p1 ; M: sp-parser parse ( state parser -- result ) [ left-trim-slice ] dip sp-parser-p1 parse ; +TUPLE: delay-parser quot ; + +M: delay-parser parse ( state parser -- result ) + delay-parser-quot call parse ; + PRIVATE> : token ( string -- parser ) @@ -208,3 +213,6 @@ PRIVATE> : hide ( parser -- parser ) [ drop ignore ] action ; + +: delay ( parser -- parser ) + delay-parser construct-boa init-parser ; From 9f2f45cd71d8a5023a756a72cc7764f376f8a8e5 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 16:45:32 +1300 Subject: [PATCH 7/8] More peg.pl0 additions This parser is currently really ugly. The goal is to tidy up peg so this parser looks more like the EBNF. --- extra/peg/pl0/pl0.factor | 46 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 3e33bbb959..8a01057bfb 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel arrays strings math.parser peg ; +USING: kernel arrays strings math.parser sequences peg ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 @@ -12,3 +12,47 @@ IN: peg.pl0 : 'number' ( -- parser ) CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; + +DEFER: 'factor' + +: 'term' ( -- parser ) + 'factor' "*" token "/" token 2array choice sp 'factor' sp 2array seq repeat0 2array seq ; + +: 'expression' ( -- parser ) + [ "+" token "-" token 2array choice sp optional 'term' sp 2dup 2array seq repeat0 3array seq ] delay ; + +: 'factor' ( -- parser ) + 'ident' 'number' "(" token hide 'expression' sp ")" token sp hide 3array seq 3array choice ; + +: 'condition' ( -- parser ) + "odd" token 'expression' sp 2array seq + 'expression' { "=" "#" "<=" "<" ">=" ">" } [ token ] map choice sp 'expression' sp 3array seq + 2array choice ; + +: 'statement' ( -- parser ) + [ + 'ident' ":=" token sp 'expression' sp 3array seq + "call" token 'ident' sp 2array seq + "begin" token 'statement' sp ";" token sp 'statement' sp 2array seq repeat0 "end" token sp 4array seq + "if" token 'condition' sp "then" token sp 'statement' sp 4array seq + 4array choice + "while" token 'condition' sp "do" token sp 'statement' sp 4array seq + 2array choice optional + ] delay ; + +: 'block' ( -- parser ) + [ + "const" token 'ident' sp "=" token sp 'number' sp 4array seq + "," token sp 'ident' sp "=" token sp 'number' sp 4array seq repeat0 + ";" token sp 3array seq optional + + "var" token 'ident' sp "," token sp 'ident' sp 2array seq repeat0 + ";" token sp 4array seq optional + + "procedure" token 'ident' sp ";" token sp 'block' sp 4array seq ";" token sp 2array seq repeat0 'statement' sp 2array seq + + 3array seq + ] delay ; + +: 'program' ( -- parser ) + 'block' "." token sp 2array seq ; From 31d57422dacf41d9597978d4ae60f830b8930144 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 18:13:36 +1300 Subject: [PATCH 8/8] Start of EBNF parser --- extra/peg/ebnf/authors.txt | 1 + extra/peg/ebnf/ebnf-tests.factor | 17 +++++++++ extra/peg/ebnf/ebnf.factor | 65 ++++++++++++++++++++++++++++++++ extra/peg/ebnf/summary.txt | 1 + extra/peg/peg.factor | 3 ++ 5 files changed, 87 insertions(+) create mode 100644 extra/peg/ebnf/authors.txt create mode 100644 extra/peg/ebnf/ebnf-tests.factor create mode 100644 extra/peg/ebnf/ebnf.factor create mode 100644 extra/peg/ebnf/summary.txt diff --git a/extra/peg/ebnf/authors.txt b/extra/peg/ebnf/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/ebnf/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor new file mode 100644 index 0000000000..f7af6f98d3 --- /dev/null +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -0,0 +1,17 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg peg.ebnf ; +IN: temporary + +{ T{ ebnf-non-terminal f "abc" } } [ + "abc" 'non-terminal' parse parse-result-ast +] unit-test + +{ T{ ebnf-terminal f "55" } } [ + "\"55\"" 'terminal' parse parse-result-ast +] unit-test + +! { } [ +! "digit = \"0\" | \"1\" | \"2\"" 'rule' parse parse-result-ast +! ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor new file mode 100644 index 0000000000..c41e9d31a4 --- /dev/null +++ b/extra/peg/ebnf/ebnf.factor @@ -0,0 +1,65 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences namespaces peg ; +IN: peg.ebnf + +TUPLE: ebnf-non-terminal symbol ; +TUPLE: ebnf-terminal symbol ; +TUPLE: ebnf-choice options ; + +C: ebnf-non-terminal +C: ebnf-terminal +C: ebnf-choice + +GENERIC: ebnf-compile ( ast -- quot ) + +M: ebnf-terminal ebnf-compile ( ast -- quot ) + [ + ebnf-terminal-symbol , \ token , + ] [ ] make ; + +M: ebnf-choice ebnf-compile ( ast -- quot ) + [ + [ + ebnf-choice-options [ + ebnf-compile , + ] each + ] { } make , + [ call ] , \ map , + ] [ ] make ; + +DEFER: 'rhs' + +: 'non-terminal' ( -- parser ) + CHAR: a CHAR: z range repeat1 [ >string ] action ; + +: 'terminal' ( -- parser ) + "\"" token hide [ CHAR: " = not ] satisfy repeat1 "\"" token hide 3array seq [ first >string ] action ; + +: 'element' ( -- parser ) + 'non-terminal' 'terminal' 2array choice ; + +: 'sequence' ( -- parser ) + 'element' sp repeat1 ; + +: 'choice' ( -- parser ) + 'element' sp "|" token sp list-of [ ] action ; + +: 'repeat0' ( -- parser ) + "{" token sp hide + [ 'rhs' sp ] delay + "}" token sp hide + 3array seq ; + +: 'rhs' ( -- parser ) + 'repeat0' + 'choice' + 'sequence' + 'element' + 4array choice ; + +: 'rule' ( -- parser ) + 'non-terminal' + "=" token sp + 'rhs' + 3array seq ; diff --git a/extra/peg/ebnf/summary.txt b/extra/peg/ebnf/summary.txt new file mode 100644 index 0000000000..473cf4f3a2 --- /dev/null +++ b/extra/peg/ebnf/summary.txt @@ -0,0 +1 @@ +Grammar for parsing EBNF diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 7cc5aec845..a9e08f6024 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -216,3 +216,6 @@ PRIVATE> : delay ( parser -- parser ) delay-parser construct-boa init-parser ; + +: list-of ( items separator -- parser ) + hide over 2array seq repeat0 [ concat ] action 2array seq [ unclip 1vector swap first append ] action ;