From 4b37c9098ef8be2b9471d80d889af7bbe1d61d81 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 16:54:42 +1300 Subject: [PATCH 01/13] Use multiline for parsing EBNF string --- extra/peg/ebnf/ebnf.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 5d7d7297ef..4563783ab0 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories ; + peg.parsers unicode.categories multiline ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -182,4 +182,4 @@ DEFER: 'choice' f ] if* ; -: <EBNF "EBNF>" parse-tokens " " join ebnf>quot call ; parsing +: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing From cc9a17b551980b43b016bdc7154bcf7c65d12ccf Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 17:00:53 +1300 Subject: [PATCH 02/13] Use choice* and seq* in ebnf --- extra/peg/ebnf/ebnf.factor | 70 ++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 4563783ab0..81fc215bd9 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -100,33 +100,46 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' : 'non-terminal' ( -- parser ) - CHAR: a CHAR: z range "-" token [ first ] action 2array choice repeat1 [ >string <ebnf-non-terminal> ] action ; + [ + CHAR: a CHAR: z range , + "-" token [ first ] action , + ] choice* repeat1 [ >string <ebnf-non-terminal> ] action ; : 'terminal' ( -- parser ) - "'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string <ebnf-terminal> ] action ; + [ + "'" token hide , + [ CHAR: ' = not ] satisfy repeat1 , + "'" token hide , + ] seq* [ first >string <ebnf-terminal> ] action ; : 'element' ( -- parser ) - 'non-terminal' 'terminal' 2array choice ; + [ + 'non-terminal' , + 'terminal' , + ] choice* ; DEFER: 'choice' : 'group' ( -- parser ) - "(" token sp hide - [ 'choice' sp ] delay - ")" token sp hide - 3array seq [ first ] action ; + [ + "(" token sp hide , + [ 'choice' sp ] delay , + ")" token sp hide , + ] seq* [ first ] action ; : 'repeat0' ( -- parser ) - "{" token sp hide - [ 'choice' sp ] delay - "}" token sp hide - 3array seq [ first <ebnf-repeat0> ] action ; + [ + "{" token sp hide , + [ 'choice' sp ] delay , + "}" token sp hide , + ] seq* [ first <ebnf-repeat0> ] action ; : 'optional' ( -- parser ) - "[" token sp hide - [ 'choice' sp ] delay - "]" token sp hide - 3array seq [ first <ebnf-optional> ] action ; + [ + "[" token sp hide , + [ 'choice' sp ] delay , + "]" token sp hide , + ] seq* [ first <ebnf-optional> ] action ; : 'sequence' ( -- parser ) [ @@ -134,8 +147,7 @@ DEFER: 'choice' 'group' sp , 'repeat0' sp , 'optional' sp , - ] { } make choice - repeat1 [ + ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if ] action ; @@ -145,18 +157,26 @@ DEFER: 'choice' ] action ; : 'action' ( -- parser ) - "=>" token hide - [ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp - 2array seq [ first <ebnf-action> ] action ; + [ + "=>" token hide , + [ + [ blank? ] satisfy ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat1 [ >string ] action sp , + ] seq* [ first <ebnf-action> ] action ; : 'rhs' ( -- parser ) - 'choice' 'action' sp optional 2array seq ; + [ + 'choice' , + 'action' sp optional , + ] seq* ; : 'rule' ( -- parser ) - 'non-terminal' [ ebnf-non-terminal-symbol ] action - "=" token sp hide - 'rhs' - 3array seq [ first2 <ebnf-rule> ] action ; + [ + 'non-terminal' [ ebnf-non-terminal-symbol ] action , + "=" token sp hide , + 'rhs' , + ] seq* [ first2 <ebnf-rule> ] action ; : 'ebnf' ( -- parser ) 'rule' sp "." token sp hide list-of [ <ebnf> ] action ; From 757853812271dbeb31c97f5d33d2f4bf14f9f55f Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 17:34:28 +1300 Subject: [PATCH 03/13] Minor tidyup of ebnf --- extra/peg/ebnf/ebnf-tests.factor | 17 +++++++++++++ extra/peg/ebnf/ebnf.factor | 42 ++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 452da8df05..156f8e9389 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -97,3 +97,20 @@ IN: peg.ebnf.tests } [ "one [ two ] three" 'choice' parse parse-result-ast ] unit-test + +{ "foo" } [ + "\"foo\"" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "'foo'" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "foo" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + +{ "foo" } [ + "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 81fc215bd9..9a3b70fa1c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories multiline ; + peg.parsers unicode.categories multiline combinators.lib ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -99,18 +99,40 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' +: 'identifier' ( -- parser ) + #! Return a parser that parses an identifer delimited by + #! a quotation character. The quotation can be single + #! or double quotes. The AST produced is the identifier + #! between the quotes. + [ + [ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by , + [ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by , + ] choice* [ >string ] action ; + : 'non-terminal' ( -- parser ) - [ - CHAR: a CHAR: z range , - "-" token [ first ] action , - ] choice* repeat1 [ >string <ebnf-non-terminal> ] action ; + #! A non-terminal is the name of another rule. It can + #! be any non-blank character except for characters used + #! in the EBNF syntax itself. + [ + { + [ dup blank? ] + [ dup CHAR: " = ] + [ dup CHAR: ' = ] + [ dup CHAR: | = ] + [ dup CHAR: { = ] + [ dup CHAR: } = ] + [ dup CHAR: = = ] + [ dup CHAR: ) = ] + [ dup CHAR: ( = ] + [ dup CHAR: ] = ] + [ dup CHAR: [ = ] + } || not nip + ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; : 'terminal' ( -- parser ) - [ - "'" token hide , - [ CHAR: ' = not ] satisfy repeat1 , - "'" token hide , - ] seq* [ first >string <ebnf-terminal> ] action ; + #! A terminal is an identifier enclosed in quotations + #! and it represents the literal value of the identifier. + 'identifier' [ <ebnf-terminal> ] action ; : 'element' ( -- parser ) [ From 708d55fb8ef4777cb3464b498d794d04a7f96a3a Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 17:37:08 +1300 Subject: [PATCH 04/13] Add syntax word for ebnf --- extra/peg/ebnf/ebnf.factor | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 9a3b70fa1c..e2977a28fb 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -99,6 +99,11 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' +: syntax ( string -- parser ) + #! Parses the string, ignoring white space, and + #! does not put the result in the AST. + token sp hide ; + : 'identifier' ( -- parser ) #! Return a parser that parses an identifer delimited by #! a quotation character. The quotation can be single @@ -144,23 +149,23 @@ DEFER: 'choice' : 'group' ( -- parser ) [ - "(" token sp hide , + "(" syntax , [ 'choice' sp ] delay , - ")" token sp hide , + ")" syntax , ] seq* [ first ] action ; : 'repeat0' ( -- parser ) [ - "{" token sp hide , + "{" syntax , [ 'choice' sp ] delay , - "}" token sp hide , + "}" syntax , ] seq* [ first <ebnf-repeat0> ] action ; : 'optional' ( -- parser ) [ - "[" token sp hide , + "[" syntax , [ 'choice' sp ] delay , - "]" token sp hide , + "]" syntax , ] seq* [ first <ebnf-optional> ] action ; : 'sequence' ( -- parser ) @@ -196,12 +201,12 @@ DEFER: 'choice' : 'rule' ( -- parser ) [ 'non-terminal' [ ebnf-non-terminal-symbol ] action , - "=" token sp hide , + "=" syntax , 'rhs' , ] seq* [ first2 <ebnf-rule> ] action ; : 'ebnf' ( -- parser ) - 'rule' sp "." token sp hide list-of [ <ebnf> ] action ; + 'rule' sp "." syntax list-of [ <ebnf> ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ From 9403d97e22c1e0e59ce4285b033b4db5e4f18b2b Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 17:52:22 +1300 Subject: [PATCH 05/13] Add syntax-pack and grouped to ebnf refactoring --- extra/peg/ebnf/ebnf.factor | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e2977a28fb..fce7a8d3bd 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -104,6 +104,11 @@ DEFER: 'rhs' #! does not put the result in the AST. token sp hide ; +: syntax-pack ( begin parser end -- parser ) + #! Parse 'parser' surrounded by syntax elements + #! begin and end. + [ syntax ] dipd syntax pack ; + : 'identifier' ( -- parser ) #! Return a parser that parses an identifer delimited by #! a quotation character. The quotation can be single @@ -147,26 +152,20 @@ DEFER: 'rhs' DEFER: 'choice' +: grouped ( begin quot end -- parser ) + #! Parse a group of choices, where the delimiter for the + #! group is specified by 'begin' and 'end'. The quotation + #! should produce the AST to be the result of the parser. + [ [ 'choice' sp ] delay swap action ] dip syntax-pack ; + : 'group' ( -- parser ) - [ - "(" syntax , - [ 'choice' sp ] delay , - ")" syntax , - ] seq* [ first ] action ; + "(" [ ] ")" grouped ; : 'repeat0' ( -- parser ) - [ - "{" syntax , - [ 'choice' sp ] delay , - "}" syntax , - ] seq* [ first <ebnf-repeat0> ] action ; + "{" [ <ebnf-repeat0> ] "}" grouped ; : 'optional' ( -- parser ) - [ - "[" syntax , - [ 'choice' sp ] delay , - "]" syntax , - ] seq* [ first <ebnf-optional> ] action ; + "[" [ <ebnf-optional> ] "]" grouped ; : 'sequence' ( -- parser ) [ @@ -174,14 +173,14 @@ DEFER: 'choice' 'group' sp , 'repeat0' sp , 'optional' sp , - ] choice* repeat1 [ + ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if - ] action ; + ] action ; : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ <ebnf-choice> ] if - ] action ; + ] action ; : 'action' ( -- parser ) [ From eef6ae782730ba22a779997023c20d71730abcae Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 18:07:25 +1300 Subject: [PATCH 06/13] Remove need for '.' to terminate rule lines in EBNF --- extra/peg/ebnf/ebnf.factor | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index fce7a8d3bd..e95fc4f9d4 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -145,10 +145,17 @@ DEFER: 'rhs' 'identifier' [ <ebnf-terminal> ] action ; : 'element' ( -- parser ) - [ - 'non-terminal' , - 'terminal' , - ] choice* ; + #! An element of a rule. It can be a terminal or a + #! non-terminal but must not be followed by a "=". + #! The latter indicates that it is the beginning of a + #! new rule. + [ + [ + 'non-terminal' , + 'terminal' , + ] choice* , + "=" syntax ensure-not , + ] seq* [ first ] action ; DEFER: 'choice' @@ -168,6 +175,8 @@ DEFER: 'choice' "[" [ <ebnf-optional> ] "]" grouped ; : 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. [ 'element' sp , 'group' sp , @@ -205,7 +214,7 @@ DEFER: 'choice' ] seq* [ first2 <ebnf-rule> ] action ; : 'ebnf' ( -- parser ) - 'rule' sp "." syntax list-of [ <ebnf> ] action ; + 'rule' sp repeat1 [ <ebnf> ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ From 208c88c44949f72f62d9cd6ffbf700d301232963 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 18:35:45 +1300 Subject: [PATCH 07/13] Update pl0 for ebnf changes, and add more tests --- extra/peg/pl0/pl0-tests.factor | 88 +++++++++++++++++++++++++++++++++- extra/peg/pl0/pl0.factor | 35 +++++++------- 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index fa8ac89f57..bf321d54e9 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.pl0 ; +USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests { "abc" } [ @@ -11,3 +11,89 @@ IN: peg.pl0.tests { 55 } [ "55abc" number parse parse-result-ast ] unit-test + +{ t } [ + <" +VAR x, squ; + +PROCEDURE square; +BEGIN + squ := x * x +END; + +BEGIN + x := 1; + WHILE x <= 10 DO + BEGIN + CALL square; + x := x + 1; + END +END. +"> program parse parse-result-remaining empty? +] unit-test + +{ f } [ + <" +CONST + m = 7, + n = 85; + +VAR + x, y, z, q, r; + +PROCEDURE multiply; +VAR a, b; + +BEGIN + a := x; + b := y; + z := 0; + WHILE b > 0 DO BEGIN + IF ODD b THEN z := z + a; + a := 2 * a; + b := b / 2; + END +END; + +PROCEDURE divide; +VAR w; +BEGIN + r := x; + q := 0; + w := y; + WHILE w <= r DO w := 2 * w; + WHILE w > y DO BEGIN + q := 2 * q; + w := w / 2; + IF w <= r THEN BEGIN + r := r - w; + q := q + 1 + END + END +END; + +PROCEDURE gcd; +VAR f, g; +BEGIN + f := x; + g := y; + WHILE f # g DO BEGIN + IF f < g THEN g := g - f; + IF g < f THEN f := f - g; + END; + z := f +END; + +BEGIN + x := m; + y := n; + CALL multiply; + x := 25; + y := 3; + CALL divide; + x := 84; + y := 36; + CALL gcd; +END. + "> program parse parse-result-remaining empty? +] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 6844eb44dc..1ef7a23b41 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,30 +1,31 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize ; +peg peg.ebnf peg.parsers memoize namespaces ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 MEMO: ident ( -- parser ) - CHAR: a CHAR: z range - CHAR: A CHAR: Z range 2array choice repeat1 - [ >string ] action ; + [ + CHAR: a CHAR: z range , + CHAR: A CHAR: Z range , + ] choice* repeat1 [ >string ] action ; MEMO: number ( -- parser ) CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; <EBNF -program = block '.' . -block = [ 'const' ident '=' number { ',' ident '=' number } ';' ] - [ 'var' ident { ',' ident } ';' ] - { 'procedure' ident ';' [ block ';' ] } statement . -statement = [ ident ':=' expression | 'call' ident | - 'begin' statement {';' statement } 'end' | - 'if' condition 'then' statement | - 'while' condition 'do' statement ] . -condition = 'odd' expression | - expression ('=' | '#' | '<=' | '<' | '>=' | '>') expression . -expression = ['+' | '-'] term {('+' | '-') term } . -term = factor {('*' | '/') factor } . -factor = ident | number | '(' expression ')' +program = block "." +block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] + [ "VAR" ident { "," ident } ";" ] + { "PROCEDURE" ident ";" [ block ";" ] } statement +statement = [ ident ":=" expression | "CALL" ident | + "BEGIN" statement {";" statement } "END" | + "IF" condition "THEN" statement | + "WHILE" condition "DO" statement ] +condition = "ODD" expression | + expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression +expression = ["+" | "-"] term {("+" | "-") term } +term = factor {("*" | "/") factor } +factor = ident | number | "(" expression ")" EBNF> From 64135b73e1b029c49af511a9d32307b5c473b52a Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Wed, 19 Mar 2008 19:15:52 +1300 Subject: [PATCH 08/13] Add support for ensure-not and parsing any single character to EBNF This allows, for example: foo = {!("_" | "-") .} This will match zero or more of any character, except for _ or - --- extra/peg/ebnf/ebnf-tests.factor | 1 + extra/peg/ebnf/ebnf.factor | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 156f8e9389..86a7a454ed 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -114,3 +114,4 @@ IN: peg.ebnf.tests "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol ] unit-test + diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e95fc4f9d4..4dc096ecbd 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -7,6 +7,8 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; +TUPLE: ebnf-any-character ; +TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; @@ -17,6 +19,8 @@ TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-terminal> ebnf-terminal +C: <ebnf-any-character> ebnf-any-character +C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence C: <ebnf-repeat0> ebnf-repeat0 @@ -61,6 +65,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id ) parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , ] [ ] make delay sp store-parser ; +M: ebnf-any-character (generate-parser) ( ast -- id ) + drop [ drop t ] satisfy store-parser ; + M: ebnf-choice (generate-parser) ( ast -- id ) ebnf-choice-options [ generate-parser get-parser @@ -71,6 +78,9 @@ M: ebnf-sequence (generate-parser) ( ast -- id ) generate-parser get-parser ] map seq store-parser ; +M: ebnf-ensure-not (generate-parser) ( ast -- id ) + ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; + M: ebnf-repeat0 (generate-parser) ( ast -- id ) ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; @@ -136,6 +146,8 @@ DEFER: 'rhs' [ dup CHAR: ( = ] [ dup CHAR: ] = ] [ dup CHAR: [ = ] + [ dup CHAR: . = ] + [ dup CHAR: ! = ] } || not nip ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; @@ -144,6 +156,10 @@ DEFER: 'rhs' #! and it represents the literal value of the identifier. 'identifier' [ <ebnf-terminal> ] action ; +: 'any-character' ( -- parser ) + #! A parser to match the symbol for any character match. + [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ; + : 'element' ( -- parser ) #! An element of a rule. It can be a terminal or a #! non-terminal but must not be followed by a "=". @@ -153,6 +169,7 @@ DEFER: 'rhs' [ 'non-terminal' , 'terminal' , + 'any-character' , ] choice* , "=" syntax ensure-not , ] seq* [ first ] action ; @@ -174,10 +191,20 @@ DEFER: 'choice' : 'optional' ( -- parser ) "[" [ <ebnf-optional> ] "]" grouped ; +: 'ensure-not' ( -- parser ) + #! Parses the '!' syntax to ensure that + #! something that matches the following elements do + #! not exist in the parse stream. + [ + "!" syntax , + 'group' sp , + ] seq* [ first <ebnf-ensure-not> ] action ; + : 'sequence' ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ + 'ensure-not' sp , 'element' sp , 'group' sp , 'repeat0' sp , From 82d54d37769a30663face16e7bbd6c800bee8171 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 01:18:09 +1300 Subject: [PATCH 09/13] EBNF syntax change [ ... ] is now ( ... )? { ... } is now ( ... )* Added ( ... )+ --- extra/peg/ebnf/ebnf-tests.factor | 4 ++-- extra/peg/ebnf/ebnf.factor | 34 +++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 86a7a454ed..6838bf3eca 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -83,7 +83,7 @@ IN: peg.ebnf.tests } } } [ - "one {(two | three) four}" 'choice' parse parse-result-ast + "one ((two | three) four)*" 'choice' parse parse-result-ast ] unit-test { @@ -95,7 +95,7 @@ IN: peg.ebnf.tests } } } [ - "one [ two ] three" 'choice' parse parse-result-ast + "one ( two )? three" 'choice' parse parse-result-ast ] unit-test { "foo" } [ diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 4dc096ecbd..59695998ce 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -12,6 +12,7 @@ TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; +TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-action word ; @@ -24,6 +25,7 @@ C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence C: <ebnf-repeat0> ebnf-repeat0 +C: <ebnf-repeat1> ebnf-repeat1 C: <ebnf-optional> ebnf-optional C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action @@ -84,6 +86,9 @@ M: ebnf-ensure-not (generate-parser) ( ast -- id ) M: ebnf-repeat0 (generate-parser) ( ast -- id ) ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; +M: ebnf-repeat1 (generate-parser) ( ast -- id ) + ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; + M: ebnf-optional (generate-parser) ( ast -- id ) ebnf-optional-elements generate-parser get-parser optional store-parser ; @@ -176,20 +181,30 @@ DEFER: 'rhs' DEFER: 'choice' -: grouped ( begin quot end -- parser ) - #! Parse a group of choices, where the delimiter for the - #! group is specified by 'begin' and 'end'. The quotation - #! should produce the AST to be the result of the parser. - [ [ 'choice' sp ] delay swap action ] dip syntax-pack ; - +: grouped ( quot suffix -- parser ) + #! Parse a group of choices, with a suffix indicating + #! the type of group (repeat0, repeat1, etc) and + #! an quot that is the action that produces the AST. + "(" [ 'choice' sp ] delay ")" syntax-pack + swap 2seq + [ first ] rot compose action ; + : 'group' ( -- parser ) - "(" [ ] ")" grouped ; + #! A grouping with no suffix. Used for precedence. + [ ] [ + "*" token sp ensure-not , + "+" token sp ensure-not , + "?" token sp ensure-not , + ] seq* hide grouped ; : 'repeat0' ( -- parser ) - "{" [ <ebnf-repeat0> ] "}" grouped ; + [ <ebnf-repeat0> ] "*" syntax grouped ; + +: 'repeat1' ( -- parser ) + [ <ebnf-repeat1> ] "+" syntax grouped ; : 'optional' ( -- parser ) - "[" [ <ebnf-optional> ] "]" grouped ; + [ <ebnf-optional> ] "?" syntax grouped ; : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that @@ -208,6 +223,7 @@ DEFER: 'choice' 'element' sp , 'group' sp , 'repeat0' sp , + 'repeat1' sp , 'optional' sp , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if From c0b7bdf823001f4389e7f13df86d05a16dba0822 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 01:25:57 +1300 Subject: [PATCH 10/13] Add *, + and ? to list of non-allowed ebnf identifier characteres --- extra/peg/ebnf/ebnf.factor | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 59695998ce..b500d82e98 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -153,6 +153,9 @@ DEFER: 'rhs' [ dup CHAR: [ = ] [ dup CHAR: . = ] [ dup CHAR: ! = ] + [ dup CHAR: * = ] + [ dup CHAR: + = ] + [ dup CHAR: ? = ] } || not nip ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; From 65fabeec11956cf7d2d7ddacd50b33b7d6e10823 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 02:16:30 +1300 Subject: [PATCH 11/13] remove => action and replace it with [[ code ]] in EBNF Previously the action had to be a factor word and could only appear at the end of a rule: : aword ( ast -- ast ) drop V{ 1 2 } ; <EBNF foo = "a" "b" => aword EBNF> Now actions can appear anywhere after an element, and can be any factor code between [[ ... ]] delimiters: <EBNF foo = "a" "b" [[ drop V{ 1 2 } ]] EBNF> <EBNF foo = "a" [[ drop 1 ]] "b" [[ drop 2 ]] EBNF> Unfortunately since this means the ebnf>quot code uses the equivalent of eval, it no longer compiles nicely since it can't be inferred. The generated parsers however do compile. --- extra/peg/ebnf/ebnf-tests.factor | 12 +++++++++++- extra/peg/ebnf/ebnf.factor | 25 ++++++++++++++----------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 6838bf3eca..63cec2f120 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf ; +USING: kernel tools.test peg peg.ebnf compiler.units ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -114,4 +114,14 @@ IN: peg.ebnf.tests "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol ] unit-test +{ V{ "a" "b" } } [ + "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test +{ V{ 1 "b" } } [ + "foo='a' [[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ V{ 1 2 } } [ + "foo='a' [[ drop 1 ]] 'b' [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index b500d82e98..2e0740663a 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories multiline combinators.lib ; + peg.parsers unicode.categories multiline combinators.lib + splitting ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -15,7 +16,7 @@ TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; -TUPLE: ebnf-action word ; +TUPLE: ebnf-action code ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal @@ -98,7 +99,7 @@ M: ebnf-rule (generate-parser) ( ast -- id ) swap [ parsers get set-nth ] keep ; M: ebnf-action (generate-parser) ( ast -- id ) - ebnf-action-word search 1quotation + ebnf-action-code string-lines parse-lines last-parser get get-parser swap action store-parser ; M: vector (generate-parser) ( ast -- id ) @@ -237,20 +238,22 @@ DEFER: 'choice' dup length 1 = [ first ] [ <ebnf-choice> ] if ] action ; -: 'action' ( -- parser ) +: 'factor-code' ( -- parser ) [ - "=>" token hide , - [ - [ blank? ] satisfy ensure-not , - [ drop t ] satisfy , - ] seq* [ first ] action repeat1 [ >string ] action sp , - ] seq* [ first <ebnf-action> ] action ; + "]]" token ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat0 [ >string ] action ; + +: 'action' ( -- parser ) + "[[" 'factor-code' "]]" syntax-pack [ <ebnf-action> ] action ; : 'rhs' ( -- parser ) [ 'choice' , 'action' sp optional , - ] seq* ; + ] seq* repeat1 [ + dup length 1 = [ first ] [ <ebnf-sequence> ] if + ] action ; : 'rule' ( -- parser ) [ From 92d8140d87cff4015eb9d396296db0d015d7e0dd Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 03:05:40 +1300 Subject: [PATCH 12/13] Change ebnf-action to properly nest with attached parser This allows removal of last-parser hack. Syntax of EBNF changes though. Now an action must attach to a group: <EBNF foo = (a b c) [[ ...act on group... ]] EBNF> --- extra/peg/ebnf/ebnf-tests.factor | 18 ++++-------- extra/peg/ebnf/ebnf.factor | 49 ++++++++++++++------------------ 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 63cec2f120..8846a9c94c 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -15,11 +15,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-choice f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-choice f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -29,11 +26,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-sequence f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-sequence f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -119,9 +113,9 @@ IN: peg.ebnf.tests ] unit-test { V{ 1 "b" } } [ - "foo='a' [[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo='a' [[ drop 1 ]] 'b' [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 2e0740663a..e2c2dd5006 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -16,7 +16,7 @@ TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; -TUPLE: ebnf-action code ; +TUPLE: ebnf-action parser code ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal @@ -34,12 +34,10 @@ C: <ebnf> ebnf SYMBOL: parsers SYMBOL: non-terminals -SYMBOL: last-parser : reset-parser-generation ( -- ) V{ } clone parsers set - H{ } clone non-terminals set - f last-parser set ; + H{ } clone non-terminals set ; : store-parser ( parser -- number ) parsers get [ push ] keep length 1- ; @@ -57,7 +55,7 @@ SYMBOL: last-parser GENERIC: (generate-parser) ( ast -- id ) : generate-parser ( ast -- id ) - (generate-parser) dup last-parser set ; + (generate-parser) ; M: ebnf-terminal (generate-parser) ( ast -- id ) ebnf-terminal-symbol token sp store-parser ; @@ -99,15 +97,12 @@ M: ebnf-rule (generate-parser) ( ast -- id ) swap [ parsers get set-nth ] keep ; M: ebnf-action (generate-parser) ( ast -- id ) - ebnf-action-code string-lines parse-lines - last-parser get get-parser swap action store-parser ; + [ ebnf-action-parser generate-parser get-parser ] keep + ebnf-action-code string-lines parse-lines action store-parser ; M: vector (generate-parser) ( ast -- id ) [ generate-parser ] map peek ; -M: f (generate-parser) ( ast -- id ) - drop last-parser get ; - M: ebnf (generate-parser) ( ast -- id ) ebnf-rules [ generate-parser @@ -199,6 +194,7 @@ DEFER: 'choice' "*" token sp ensure-not , "+" token sp ensure-not , "?" token sp ensure-not , + "[[" token sp ensure-not , ] seq* hide grouped ; : 'repeat0' ( -- parser ) @@ -210,6 +206,19 @@ DEFER: 'choice' : 'optional' ( -- parser ) [ <ebnf-optional> ] "?" syntax grouped ; +: 'factor-code' ( -- parser ) + [ + "]]" token ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat0 [ >string ] action ; + +: 'action' ( -- parser ) + [ + "(" [ 'choice' sp ] delay ")" syntax-pack , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 <ebnf-action> ] action ; + + : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that #! something that matches the following elements do @@ -229,6 +238,7 @@ DEFER: 'choice' 'repeat0' sp , 'repeat1' sp , 'optional' sp , + 'action' sp , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if ] action ; @@ -237,29 +247,12 @@ DEFER: 'choice' 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ <ebnf-choice> ] if ] action ; - -: 'factor-code' ( -- parser ) - [ - "]]" token ensure-not , - [ drop t ] satisfy , - ] seq* [ first ] action repeat0 [ >string ] action ; - -: 'action' ( -- parser ) - "[[" 'factor-code' "]]" syntax-pack [ <ebnf-action> ] action ; - -: 'rhs' ( -- parser ) - [ - 'choice' , - 'action' sp optional , - ] seq* repeat1 [ - dup length 1 = [ first ] [ <ebnf-sequence> ] if - ] action ; : 'rule' ( -- parser ) [ 'non-terminal' [ ebnf-non-terminal-symbol ] action , "=" syntax , - 'rhs' , + 'choice' , ] seq* [ first2 <ebnf-rule> ] action ; : 'ebnf' ( -- parser ) From 97b58580a7a0bb633d88c1f7855ba3ad7a2cbf03 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 03:30:53 +1300 Subject: [PATCH 13/13] Add expression evaluator example for EBNF --- extra/peg/expr/authors.txt | 1 + extra/peg/expr/expr.factor | 30 ++++++++++++++++++++++++++++++ extra/peg/expr/summary.txt | 1 + extra/peg/expr/tags.txt | 1 + 4 files changed, 33 insertions(+) create mode 100644 extra/peg/expr/authors.txt create mode 100644 extra/peg/expr/expr.factor create mode 100644 extra/peg/expr/summary.txt create mode 100644 extra/peg/expr/tags.txt diff --git a/extra/peg/expr/authors.txt b/extra/peg/expr/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/expr/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor new file mode 100644 index 0000000000..ed13ac0e50 --- /dev/null +++ b/extra/peg/expr/expr.factor @@ -0,0 +1,30 @@ +! Copyright (C) 2008 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences +peg peg.ebnf peg.parsers memoize math ; +IN: peg.expr + +: operator-fold ( lhs seq -- value ) + #! Perform a fold of a lhs, followed by a sequence of pairs being + #! { operator rhs } in to a tree structure of the correct precedence. + swap [ first2 swap call ] reduce ; + +<EBNF + +times = ("*") [[ drop [ * ] ]] +divide = ("/") [[ drop [ / ] ]] +add = ("+") [[ drop [ + ] ]] +subtract = ("-") [[ drop [ - ] ]] + +digit = "0" | "1" | "2" | "3" | "4" | + "5" | "6" | "7" | "8" | "9" +number = ((digit)+) [[ concat string>number ]] + +value = number | ("(" expr ")") [[ second ]] +product = (value ((times | divide) value)*) [[ first2 operator-fold ]] +sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] +expr = sum +EBNF> + +: eval-expr ( string -- number ) + expr parse parse-result-ast ; \ No newline at end of file diff --git a/extra/peg/expr/summary.txt b/extra/peg/expr/summary.txt new file mode 100644 index 0000000000..6c3c140b2b --- /dev/null +++ b/extra/peg/expr/summary.txt @@ -0,0 +1 @@ +Simple expression evaluator using EBNF diff --git a/extra/peg/expr/tags.txt b/extra/peg/expr/tags.txt new file mode 100644 index 0000000000..9da56880c0 --- /dev/null +++ b/extra/peg/expr/tags.txt @@ -0,0 +1 @@ +parsing