From 4b37c9098ef8be2b9471d80d889af7bbe1d61d81 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 16:54:42 +1300 Subject: [PATCH 01/20] Use multiline for parsing EBNF string --- extra/peg/ebnf/ebnf.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 5d7d7297ef..4563783ab0 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories ; + peg.parsers unicode.categories multiline ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -182,4 +182,4 @@ DEFER: 'choice' f ] if* ; -: " parse-tokens " " join ebnf>quot call ; parsing +: " parse-multiline-string ebnf>quot call ; parsing From cc9a17b551980b43b016bdc7154bcf7c65d12ccf Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 17:00:53 +1300 Subject: [PATCH 02/20] Use choice* and seq* in ebnf --- extra/peg/ebnf/ebnf.factor | 70 ++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 4563783ab0..81fc215bd9 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -100,33 +100,46 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' : 'non-terminal' ( -- parser ) - CHAR: a CHAR: z range "-" token [ first ] action 2array choice repeat1 [ >string ] action ; + [ + CHAR: a CHAR: z range , + "-" token [ first ] action , + ] choice* repeat1 [ >string ] action ; : 'terminal' ( -- parser ) - "'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string ] action ; + [ + "'" token hide , + [ CHAR: ' = not ] satisfy repeat1 , + "'" token hide , + ] seq* [ first >string ] action ; : 'element' ( -- parser ) - 'non-terminal' 'terminal' 2array choice ; + [ + 'non-terminal' , + 'terminal' , + ] choice* ; DEFER: 'choice' : 'group' ( -- parser ) - "(" token sp hide - [ 'choice' sp ] delay - ")" token sp hide - 3array seq [ first ] action ; + [ + "(" token sp hide , + [ 'choice' sp ] delay , + ")" token sp hide , + ] seq* [ first ] action ; : 'repeat0' ( -- parser ) - "{" token sp hide - [ 'choice' sp ] delay - "}" token sp hide - 3array seq [ first ] action ; + [ + "{" token sp hide , + [ 'choice' sp ] delay , + "}" token sp hide , + ] seq* [ first ] action ; : 'optional' ( -- parser ) - "[" token sp hide - [ 'choice' sp ] delay - "]" token sp hide - 3array seq [ first ] action ; + [ + "[" token sp hide , + [ 'choice' sp ] delay , + "]" token sp hide , + ] seq* [ first ] action ; : 'sequence' ( -- parser ) [ @@ -134,8 +147,7 @@ DEFER: 'choice' 'group' sp , 'repeat0' sp , 'optional' sp , - ] { } make choice - repeat1 [ + ] choice* repeat1 [ dup length 1 = [ first ] [ ] if ] action ; @@ -145,18 +157,26 @@ DEFER: 'choice' ] action ; : 'action' ( -- parser ) - "=>" token hide - [ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp - 2array seq [ first ] action ; + [ + "=>" token hide , + [ + [ blank? ] satisfy ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat1 [ >string ] action sp , + ] seq* [ first ] action ; : 'rhs' ( -- parser ) - 'choice' 'action' sp optional 2array seq ; + [ + 'choice' , + 'action' sp optional , + ] seq* ; : 'rule' ( -- parser ) - 'non-terminal' [ ebnf-non-terminal-symbol ] action - "=" token sp hide - 'rhs' - 3array seq [ first2 ] action ; + [ + 'non-terminal' [ ebnf-non-terminal-symbol ] action , + "=" token sp hide , + 'rhs' , + ] seq* [ first2 ] action ; : 'ebnf' ( -- parser ) 'rule' sp "." token sp hide list-of [ ] action ; From 757853812271dbeb31c97f5d33d2f4bf14f9f55f Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 17:34:28 +1300 Subject: [PATCH 03/20] Minor tidyup of ebnf --- extra/peg/ebnf/ebnf-tests.factor | 17 +++++++++++++ extra/peg/ebnf/ebnf.factor | 42 ++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 452da8df05..156f8e9389 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -97,3 +97,20 @@ IN: peg.ebnf.tests } [ "one [ two ] three" 'choice' parse parse-result-ast ] unit-test + +{ "foo" } [ + "\"foo\"" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "'foo'" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "foo" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + +{ "foo" } [ + "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 81fc215bd9..9a3b70fa1c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories multiline ; + peg.parsers unicode.categories multiline combinators.lib ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -99,18 +99,40 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' +: 'identifier' ( -- parser ) + #! Return a parser that parses an identifer delimited by + #! a quotation character. The quotation can be single + #! or double quotes. The AST produced is the identifier + #! between the quotes. + [ + [ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by , + [ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by , + ] choice* [ >string ] action ; + : 'non-terminal' ( -- parser ) - [ - CHAR: a CHAR: z range , - "-" token [ first ] action , - ] choice* repeat1 [ >string ] action ; + #! A non-terminal is the name of another rule. It can + #! be any non-blank character except for characters used + #! in the EBNF syntax itself. + [ + { + [ dup blank? ] + [ dup CHAR: " = ] + [ dup CHAR: ' = ] + [ dup CHAR: | = ] + [ dup CHAR: { = ] + [ dup CHAR: } = ] + [ dup CHAR: = = ] + [ dup CHAR: ) = ] + [ dup CHAR: ( = ] + [ dup CHAR: ] = ] + [ dup CHAR: [ = ] + } || not nip + ] satisfy repeat1 [ >string ] action ; : 'terminal' ( -- parser ) - [ - "'" token hide , - [ CHAR: ' = not ] satisfy repeat1 , - "'" token hide , - ] seq* [ first >string ] action ; + #! A terminal is an identifier enclosed in quotations + #! and it represents the literal value of the identifier. + 'identifier' [ ] action ; : 'element' ( -- parser ) [ From 708d55fb8ef4777cb3464b498d794d04a7f96a3a Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 17:37:08 +1300 Subject: [PATCH 04/20] Add syntax word for ebnf --- extra/peg/ebnf/ebnf.factor | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 9a3b70fa1c..e2977a28fb 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -99,6 +99,11 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' +: syntax ( string -- parser ) + #! Parses the string, ignoring white space, and + #! does not put the result in the AST. + token sp hide ; + : 'identifier' ( -- parser ) #! Return a parser that parses an identifer delimited by #! a quotation character. The quotation can be single @@ -144,23 +149,23 @@ DEFER: 'choice' : 'group' ( -- parser ) [ - "(" token sp hide , + "(" syntax , [ 'choice' sp ] delay , - ")" token sp hide , + ")" syntax , ] seq* [ first ] action ; : 'repeat0' ( -- parser ) [ - "{" token sp hide , + "{" syntax , [ 'choice' sp ] delay , - "}" token sp hide , + "}" syntax , ] seq* [ first ] action ; : 'optional' ( -- parser ) [ - "[" token sp hide , + "[" syntax , [ 'choice' sp ] delay , - "]" token sp hide , + "]" syntax , ] seq* [ first ] action ; : 'sequence' ( -- parser ) @@ -196,12 +201,12 @@ DEFER: 'choice' : 'rule' ( -- parser ) [ 'non-terminal' [ ebnf-non-terminal-symbol ] action , - "=" token sp hide , + "=" syntax , 'rhs' , ] seq* [ first2 ] action ; : 'ebnf' ( -- parser ) - 'rule' sp "." token sp hide list-of [ ] action ; + 'rule' sp "." syntax list-of [ ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ From 9403d97e22c1e0e59ce4285b033b4db5e4f18b2b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 17:52:22 +1300 Subject: [PATCH 05/20] Add syntax-pack and grouped to ebnf refactoring --- extra/peg/ebnf/ebnf.factor | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e2977a28fb..fce7a8d3bd 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -104,6 +104,11 @@ DEFER: 'rhs' #! does not put the result in the AST. token sp hide ; +: syntax-pack ( begin parser end -- parser ) + #! Parse 'parser' surrounded by syntax elements + #! begin and end. + [ syntax ] dipd syntax pack ; + : 'identifier' ( -- parser ) #! Return a parser that parses an identifer delimited by #! a quotation character. The quotation can be single @@ -147,26 +152,20 @@ DEFER: 'rhs' DEFER: 'choice' +: grouped ( begin quot end -- parser ) + #! Parse a group of choices, where the delimiter for the + #! group is specified by 'begin' and 'end'. The quotation + #! should produce the AST to be the result of the parser. + [ [ 'choice' sp ] delay swap action ] dip syntax-pack ; + : 'group' ( -- parser ) - [ - "(" syntax , - [ 'choice' sp ] delay , - ")" syntax , - ] seq* [ first ] action ; + "(" [ ] ")" grouped ; : 'repeat0' ( -- parser ) - [ - "{" syntax , - [ 'choice' sp ] delay , - "}" syntax , - ] seq* [ first ] action ; + "{" [ ] "}" grouped ; : 'optional' ( -- parser ) - [ - "[" syntax , - [ 'choice' sp ] delay , - "]" syntax , - ] seq* [ first ] action ; + "[" [ ] "]" grouped ; : 'sequence' ( -- parser ) [ @@ -174,14 +173,14 @@ DEFER: 'choice' 'group' sp , 'repeat0' sp , 'optional' sp , - ] choice* repeat1 [ + ] choice* repeat1 [ dup length 1 = [ first ] [ ] if - ] action ; + ] action ; : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ ] if - ] action ; + ] action ; : 'action' ( -- parser ) [ From eef6ae782730ba22a779997023c20d71730abcae Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 18:07:25 +1300 Subject: [PATCH 06/20] Remove need for '.' to terminate rule lines in EBNF --- extra/peg/ebnf/ebnf.factor | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index fce7a8d3bd..e95fc4f9d4 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -145,10 +145,17 @@ DEFER: 'rhs' 'identifier' [ ] action ; : 'element' ( -- parser ) - [ - 'non-terminal' , - 'terminal' , - ] choice* ; + #! An element of a rule. It can be a terminal or a + #! non-terminal but must not be followed by a "=". + #! The latter indicates that it is the beginning of a + #! new rule. + [ + [ + 'non-terminal' , + 'terminal' , + ] choice* , + "=" syntax ensure-not , + ] seq* [ first ] action ; DEFER: 'choice' @@ -168,6 +175,8 @@ DEFER: 'choice' "[" [ ] "]" grouped ; : 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. [ 'element' sp , 'group' sp , @@ -205,7 +214,7 @@ DEFER: 'choice' ] seq* [ first2 ] action ; : 'ebnf' ( -- parser ) - 'rule' sp "." syntax list-of [ ] action ; + 'rule' sp repeat1 [ ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ From 208c88c44949f72f62d9cd6ffbf700d301232963 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 18:35:45 +1300 Subject: [PATCH 07/20] Update pl0 for ebnf changes, and add more tests --- extra/peg/pl0/pl0-tests.factor | 88 +++++++++++++++++++++++++++++++++- extra/peg/pl0/pl0.factor | 35 +++++++------- 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index fa8ac89f57..bf321d54e9 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.pl0 ; +USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests { "abc" } [ @@ -11,3 +11,89 @@ IN: peg.pl0.tests { 55 } [ "55abc" number parse parse-result-ast ] unit-test + +{ t } [ + <" +VAR x, squ; + +PROCEDURE square; +BEGIN + squ := x * x +END; + +BEGIN + x := 1; + WHILE x <= 10 DO + BEGIN + CALL square; + x := x + 1; + END +END. +"> program parse parse-result-remaining empty? +] unit-test + +{ f } [ + <" +CONST + m = 7, + n = 85; + +VAR + x, y, z, q, r; + +PROCEDURE multiply; +VAR a, b; + +BEGIN + a := x; + b := y; + z := 0; + WHILE b > 0 DO BEGIN + IF ODD b THEN z := z + a; + a := 2 * a; + b := b / 2; + END +END; + +PROCEDURE divide; +VAR w; +BEGIN + r := x; + q := 0; + w := y; + WHILE w <= r DO w := 2 * w; + WHILE w > y DO BEGIN + q := 2 * q; + w := w / 2; + IF w <= r THEN BEGIN + r := r - w; + q := q + 1 + END + END +END; + +PROCEDURE gcd; +VAR f, g; +BEGIN + f := x; + g := y; + WHILE f # g DO BEGIN + IF f < g THEN g := g - f; + IF g < f THEN f := f - g; + END; + z := f +END; + +BEGIN + x := m; + y := n; + CALL multiply; + x := 25; + y := 3; + CALL divide; + x := 84; + y := 36; + CALL gcd; +END. + "> program parse parse-result-remaining empty? +] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 6844eb44dc..1ef7a23b41 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,30 +1,31 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize ; +peg peg.ebnf peg.parsers memoize namespaces ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 MEMO: ident ( -- parser ) - CHAR: a CHAR: z range - CHAR: A CHAR: Z range 2array choice repeat1 - [ >string ] action ; + [ + CHAR: a CHAR: z range , + CHAR: A CHAR: Z range , + ] choice* repeat1 [ >string ] action ; MEMO: number ( -- parser ) CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; =' | '>') expression . -expression = ['+' | '-'] term {('+' | '-') term } . -term = factor {('*' | '/') factor } . -factor = ident | number | '(' expression ')' +program = block "." +block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] + [ "VAR" ident { "," ident } ";" ] + { "PROCEDURE" ident ";" [ block ";" ] } statement +statement = [ ident ":=" expression | "CALL" ident | + "BEGIN" statement {";" statement } "END" | + "IF" condition "THEN" statement | + "WHILE" condition "DO" statement ] +condition = "ODD" expression | + expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression +expression = ["+" | "-"] term {("+" | "-") term } +term = factor {("*" | "/") factor } +factor = ident | number | "(" expression ")" EBNF> From 64135b73e1b029c49af511a9d32307b5c473b52a Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 19 Mar 2008 19:15:52 +1300 Subject: [PATCH 08/20] Add support for ensure-not and parsing any single character to EBNF This allows, for example: foo = {!("_" | "-") .} This will match zero or more of any character, except for _ or - --- extra/peg/ebnf/ebnf-tests.factor | 1 + extra/peg/ebnf/ebnf.factor | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 156f8e9389..86a7a454ed 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -114,3 +114,4 @@ IN: peg.ebnf.tests "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol ] unit-test + diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e95fc4f9d4..4dc096ecbd 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -7,6 +7,8 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; +TUPLE: ebnf-any-character ; +TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; @@ -17,6 +19,8 @@ TUPLE: ebnf rules ; C: ebnf-non-terminal C: ebnf-terminal +C: ebnf-any-character +C: ebnf-ensure-not C: ebnf-choice C: ebnf-sequence C: ebnf-repeat0 @@ -61,6 +65,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id ) parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , ] [ ] make delay sp store-parser ; +M: ebnf-any-character (generate-parser) ( ast -- id ) + drop [ drop t ] satisfy store-parser ; + M: ebnf-choice (generate-parser) ( ast -- id ) ebnf-choice-options [ generate-parser get-parser @@ -71,6 +78,9 @@ M: ebnf-sequence (generate-parser) ( ast -- id ) generate-parser get-parser ] map seq store-parser ; +M: ebnf-ensure-not (generate-parser) ( ast -- id ) + ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; + M: ebnf-repeat0 (generate-parser) ( ast -- id ) ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; @@ -136,6 +146,8 @@ DEFER: 'rhs' [ dup CHAR: ( = ] [ dup CHAR: ] = ] [ dup CHAR: [ = ] + [ dup CHAR: . = ] + [ dup CHAR: ! = ] } || not nip ] satisfy repeat1 [ >string ] action ; @@ -144,6 +156,10 @@ DEFER: 'rhs' #! and it represents the literal value of the identifier. 'identifier' [ ] action ; +: 'any-character' ( -- parser ) + #! A parser to match the symbol for any character match. + [ CHAR: . = ] satisfy [ drop ] action ; + : 'element' ( -- parser ) #! An element of a rule. It can be a terminal or a #! non-terminal but must not be followed by a "=". @@ -153,6 +169,7 @@ DEFER: 'rhs' [ 'non-terminal' , 'terminal' , + 'any-character' , ] choice* , "=" syntax ensure-not , ] seq* [ first ] action ; @@ -174,10 +191,20 @@ DEFER: 'choice' : 'optional' ( -- parser ) "[" [ ] "]" grouped ; +: 'ensure-not' ( -- parser ) + #! Parses the '!' syntax to ensure that + #! something that matches the following elements do + #! not exist in the parse stream. + [ + "!" syntax , + 'group' sp , + ] seq* [ first ] action ; + : 'sequence' ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ + 'ensure-not' sp , 'element' sp , 'group' sp , 'repeat0' sp , From 82d54d37769a30663face16e7bbd6c800bee8171 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 01:18:09 +1300 Subject: [PATCH 09/20] EBNF syntax change [ ... ] is now ( ... )? { ... } is now ( ... )* Added ( ... )+ --- extra/peg/ebnf/ebnf-tests.factor | 4 ++-- extra/peg/ebnf/ebnf.factor | 34 +++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 86a7a454ed..6838bf3eca 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -83,7 +83,7 @@ IN: peg.ebnf.tests } } } [ - "one {(two | three) four}" 'choice' parse parse-result-ast + "one ((two | three) four)*" 'choice' parse parse-result-ast ] unit-test { @@ -95,7 +95,7 @@ IN: peg.ebnf.tests } } } [ - "one [ two ] three" 'choice' parse parse-result-ast + "one ( two )? three" 'choice' parse parse-result-ast ] unit-test { "foo" } [ diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 4dc096ecbd..59695998ce 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -12,6 +12,7 @@ TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; +TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-action word ; @@ -24,6 +25,7 @@ C: ebnf-ensure-not C: ebnf-choice C: ebnf-sequence C: ebnf-repeat0 +C: ebnf-repeat1 C: ebnf-optional C: ebnf-rule C: ebnf-action @@ -84,6 +86,9 @@ M: ebnf-ensure-not (generate-parser) ( ast -- id ) M: ebnf-repeat0 (generate-parser) ( ast -- id ) ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; +M: ebnf-repeat1 (generate-parser) ( ast -- id ) + ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; + M: ebnf-optional (generate-parser) ( ast -- id ) ebnf-optional-elements generate-parser get-parser optional store-parser ; @@ -176,20 +181,30 @@ DEFER: 'rhs' DEFER: 'choice' -: grouped ( begin quot end -- parser ) - #! Parse a group of choices, where the delimiter for the - #! group is specified by 'begin' and 'end'. The quotation - #! should produce the AST to be the result of the parser. - [ [ 'choice' sp ] delay swap action ] dip syntax-pack ; - +: grouped ( quot suffix -- parser ) + #! Parse a group of choices, with a suffix indicating + #! the type of group (repeat0, repeat1, etc) and + #! an quot that is the action that produces the AST. + "(" [ 'choice' sp ] delay ")" syntax-pack + swap 2seq + [ first ] rot compose action ; + : 'group' ( -- parser ) - "(" [ ] ")" grouped ; + #! A grouping with no suffix. Used for precedence. + [ ] [ + "*" token sp ensure-not , + "+" token sp ensure-not , + "?" token sp ensure-not , + ] seq* hide grouped ; : 'repeat0' ( -- parser ) - "{" [ ] "}" grouped ; + [ ] "*" syntax grouped ; + +: 'repeat1' ( -- parser ) + [ ] "+" syntax grouped ; : 'optional' ( -- parser ) - "[" [ ] "]" grouped ; + [ ] "?" syntax grouped ; : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that @@ -208,6 +223,7 @@ DEFER: 'choice' 'element' sp , 'group' sp , 'repeat0' sp , + 'repeat1' sp , 'optional' sp , ] choice* repeat1 [ dup length 1 = [ first ] [ ] if From c0b7bdf823001f4389e7f13df86d05a16dba0822 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 01:25:57 +1300 Subject: [PATCH 10/20] Add *, + and ? to list of non-allowed ebnf identifier characteres --- extra/peg/ebnf/ebnf.factor | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 59695998ce..b500d82e98 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -153,6 +153,9 @@ DEFER: 'rhs' [ dup CHAR: [ = ] [ dup CHAR: . = ] [ dup CHAR: ! = ] + [ dup CHAR: * = ] + [ dup CHAR: + = ] + [ dup CHAR: ? = ] } || not nip ] satisfy repeat1 [ >string ] action ; From 65fabeec11956cf7d2d7ddacd50b33b7d6e10823 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 02:16:30 +1300 Subject: [PATCH 11/20] remove => action and replace it with [[ code ]] in EBNF Previously the action had to be a factor word and could only appear at the end of a rule: : aword ( ast -- ast ) drop V{ 1 2 } ; aword EBNF> Now actions can appear anywhere after an element, and can be any factor code between [[ ... ]] delimiters: Unfortunately since this means the ebnf>quot code uses the equivalent of eval, it no longer compiles nicely since it can't be inferred. The generated parsers however do compile. --- extra/peg/ebnf/ebnf-tests.factor | 12 +++++++++++- extra/peg/ebnf/ebnf.factor | 25 ++++++++++++++----------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 6838bf3eca..63cec2f120 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf ; +USING: kernel tools.test peg peg.ebnf compiler.units ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -114,4 +114,14 @@ IN: peg.ebnf.tests "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol ] unit-test +{ V{ "a" "b" } } [ + "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test +{ V{ 1 "b" } } [ + "foo='a' [[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ V{ 1 2 } } [ + "foo='a' [[ drop 1 ]] 'b' [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index b500d82e98..2e0740663a 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories multiline combinators.lib ; + peg.parsers unicode.categories multiline combinators.lib + splitting ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -15,7 +16,7 @@ TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; -TUPLE: ebnf-action word ; +TUPLE: ebnf-action code ; TUPLE: ebnf rules ; C: ebnf-non-terminal @@ -98,7 +99,7 @@ M: ebnf-rule (generate-parser) ( ast -- id ) swap [ parsers get set-nth ] keep ; M: ebnf-action (generate-parser) ( ast -- id ) - ebnf-action-word search 1quotation + ebnf-action-code string-lines parse-lines last-parser get get-parser swap action store-parser ; M: vector (generate-parser) ( ast -- id ) @@ -237,20 +238,22 @@ DEFER: 'choice' dup length 1 = [ first ] [ ] if ] action ; -: 'action' ( -- parser ) +: 'factor-code' ( -- parser ) [ - "=>" token hide , - [ - [ blank? ] satisfy ensure-not , - [ drop t ] satisfy , - ] seq* [ first ] action repeat1 [ >string ] action sp , - ] seq* [ first ] action ; + "]]" token ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat0 [ >string ] action ; + +: 'action' ( -- parser ) + "[[" 'factor-code' "]]" syntax-pack [ ] action ; : 'rhs' ( -- parser ) [ 'choice' , 'action' sp optional , - ] seq* ; + ] seq* repeat1 [ + dup length 1 = [ first ] [ ] if + ] action ; : 'rule' ( -- parser ) [ From 92d8140d87cff4015eb9d396296db0d015d7e0dd Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 03:05:40 +1300 Subject: [PATCH 12/20] Change ebnf-action to properly nest with attached parser This allows removal of last-parser hack. Syntax of EBNF changes though. Now an action must attach to a group: --- extra/peg/ebnf/ebnf-tests.factor | 18 ++++-------- extra/peg/ebnf/ebnf.factor | 49 ++++++++++++++------------------ 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 63cec2f120..8846a9c94c 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -15,11 +15,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-choice f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-choice f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -29,11 +26,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-sequence f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-sequence f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -119,9 +113,9 @@ IN: peg.ebnf.tests ] unit-test { V{ 1 "b" } } [ - "foo='a' [[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo='a' [[ drop 1 ]] 'b' [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 2e0740663a..e2c2dd5006 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -16,7 +16,7 @@ TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; -TUPLE: ebnf-action code ; +TUPLE: ebnf-action parser code ; TUPLE: ebnf rules ; C: ebnf-non-terminal @@ -34,12 +34,10 @@ C: ebnf SYMBOL: parsers SYMBOL: non-terminals -SYMBOL: last-parser : reset-parser-generation ( -- ) V{ } clone parsers set - H{ } clone non-terminals set - f last-parser set ; + H{ } clone non-terminals set ; : store-parser ( parser -- number ) parsers get [ push ] keep length 1- ; @@ -57,7 +55,7 @@ SYMBOL: last-parser GENERIC: (generate-parser) ( ast -- id ) : generate-parser ( ast -- id ) - (generate-parser) dup last-parser set ; + (generate-parser) ; M: ebnf-terminal (generate-parser) ( ast -- id ) ebnf-terminal-symbol token sp store-parser ; @@ -99,15 +97,12 @@ M: ebnf-rule (generate-parser) ( ast -- id ) swap [ parsers get set-nth ] keep ; M: ebnf-action (generate-parser) ( ast -- id ) - ebnf-action-code string-lines parse-lines - last-parser get get-parser swap action store-parser ; + [ ebnf-action-parser generate-parser get-parser ] keep + ebnf-action-code string-lines parse-lines action store-parser ; M: vector (generate-parser) ( ast -- id ) [ generate-parser ] map peek ; -M: f (generate-parser) ( ast -- id ) - drop last-parser get ; - M: ebnf (generate-parser) ( ast -- id ) ebnf-rules [ generate-parser @@ -199,6 +194,7 @@ DEFER: 'choice' "*" token sp ensure-not , "+" token sp ensure-not , "?" token sp ensure-not , + "[[" token sp ensure-not , ] seq* hide grouped ; : 'repeat0' ( -- parser ) @@ -210,6 +206,19 @@ DEFER: 'choice' : 'optional' ( -- parser ) [ ] "?" syntax grouped ; +: 'factor-code' ( -- parser ) + [ + "]]" token ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat0 [ >string ] action ; + +: 'action' ( -- parser ) + [ + "(" [ 'choice' sp ] delay ")" syntax-pack , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 ] action ; + + : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that #! something that matches the following elements do @@ -229,6 +238,7 @@ DEFER: 'choice' 'repeat0' sp , 'repeat1' sp , 'optional' sp , + 'action' sp , ] choice* repeat1 [ dup length 1 = [ first ] [ ] if ] action ; @@ -237,29 +247,12 @@ DEFER: 'choice' 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ ] if ] action ; - -: 'factor-code' ( -- parser ) - [ - "]]" token ensure-not , - [ drop t ] satisfy , - ] seq* [ first ] action repeat0 [ >string ] action ; - -: 'action' ( -- parser ) - "[[" 'factor-code' "]]" syntax-pack [ ] action ; - -: 'rhs' ( -- parser ) - [ - 'choice' , - 'action' sp optional , - ] seq* repeat1 [ - dup length 1 = [ first ] [ ] if - ] action ; : 'rule' ( -- parser ) [ 'non-terminal' [ ebnf-non-terminal-symbol ] action , "=" syntax , - 'rhs' , + 'choice' , ] seq* [ first2 ] action ; : 'ebnf' ( -- parser ) From 97b58580a7a0bb633d88c1f7855ba3ad7a2cbf03 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 03:30:53 +1300 Subject: [PATCH 13/20] Add expression evaluator example for EBNF --- extra/peg/expr/authors.txt | 1 + extra/peg/expr/expr.factor | 30 ++++++++++++++++++++++++++++++ extra/peg/expr/summary.txt | 1 + extra/peg/expr/tags.txt | 1 + 4 files changed, 33 insertions(+) create mode 100644 extra/peg/expr/authors.txt create mode 100644 extra/peg/expr/expr.factor create mode 100644 extra/peg/expr/summary.txt create mode 100644 extra/peg/expr/tags.txt diff --git a/extra/peg/expr/authors.txt b/extra/peg/expr/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/expr/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor new file mode 100644 index 0000000000..ed13ac0e50 --- /dev/null +++ b/extra/peg/expr/expr.factor @@ -0,0 +1,30 @@ +! Copyright (C) 2008 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences +peg peg.ebnf peg.parsers memoize math ; +IN: peg.expr + +: operator-fold ( lhs seq -- value ) + #! Perform a fold of a lhs, followed by a sequence of pairs being + #! { operator rhs } in to a tree structure of the correct precedence. + swap [ first2 swap call ] reduce ; + +number ]] + +value = number | ("(" expr ")") [[ second ]] +product = (value ((times | divide) value)*) [[ first2 operator-fold ]] +sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] +expr = sum +EBNF> + +: eval-expr ( string -- number ) + expr parse parse-result-ast ; \ No newline at end of file diff --git a/extra/peg/expr/summary.txt b/extra/peg/expr/summary.txt new file mode 100644 index 0000000000..6c3c140b2b --- /dev/null +++ b/extra/peg/expr/summary.txt @@ -0,0 +1 @@ +Simple expression evaluator using EBNF diff --git a/extra/peg/expr/tags.txt b/extra/peg/expr/tags.txt new file mode 100644 index 0000000000..9da56880c0 --- /dev/null +++ b/extra/peg/expr/tags.txt @@ -0,0 +1 @@ +parsing From 264284d0c4dac5d6b70232fc1ff35b1bba0573c8 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 13:40:22 +1300 Subject: [PATCH 14/20] Add range-pattern parser --- extra/peg/parsers/parsers-docs.factor | 18 ++++++++++++++++ extra/peg/parsers/parsers.factor | 30 ++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/extra/peg/parsers/parsers-docs.factor b/extra/peg/parsers/parsers-docs.factor index 1991cba0eb..d49f1158dd 100755 --- a/extra/peg/parsers/parsers-docs.factor +++ b/extra/peg/parsers/parsers-docs.factor @@ -159,3 +159,21 @@ HELP: 'string' } { $description "Returns a parser that matches an string composed of a \", anything that is not \", and another \"." } { $see-also 'integer' } ; + +HELP: range-pattern +{ $values + { "pattern" "a string" } + { "parser" "a parser" } +} { $description +"Returns a parser that matches a single character based on the set " +"of characters in the pattern string." +"Any single character in the pattern matches that character. " +"If the pattern begins with a ^ then the set is negated " +"(the element matches any character not in the set). Any pair " +"of characters separated with a dash (-) represents the " +"range of characters from the first to the second, inclusive." +{ $examples + { $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" } + { $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" } +} +} ; diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 87306e1469..63e9e9a336 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib memoize math.parser match - unicode.categories sequences.deep peg peg.private ; + unicode.categories sequences.deep peg peg.private + peg.search math.ranges ; IN: peg.parsers TUPLE: just-parser p1 ; @@ -83,3 +84,30 @@ MEMO: 'string' ( -- parser ) [ CHAR: " = not ] satisfy repeat0 , [ CHAR: " = ] satisfy hide , ] { } make seq [ first >string ] action ; + +: (range-pattern) ( pattern -- string ) + #! Given a range pattern, produce a string containing + #! all characters within that range. + [ + any-char , + [ CHAR: - = ] satisfy hide , + any-char , + ] seq* [ + first2 [a,b] >string + ] action + replace ; + +MEMO: range-pattern ( pattern -- parser ) + #! 'pattern' is a set of characters describing the + #! parser to be produced. Any single character in + #! the pattern matches that character. If the pattern + #! begins with a ^ then the set is negated (the element + #! matches any character not in the set). Any pair of + #! characters separated with a dash (-) represents the + #! range of characters from the first to the second, + #! inclusive. + dup first CHAR: ^ = [ + 1 tail (range-pattern) [ member? not ] curry satisfy + ] [ + (range-pattern) [ member? ] curry satisfy + ] if ; From 795ef0ae3b0a5031b329c84d555a1c64bfeae758 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 13:55:19 +1300 Subject: [PATCH 15/20] Add ranges to EBNF syntax This works: and --- extra/peg/ebnf/ebnf-tests.factor | 24 ++++++++++++++++++++++++ extra/peg/ebnf/ebnf.factor | 14 ++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 8846a9c94c..458c68e0d4 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -118,4 +118,28 @@ IN: peg.ebnf.tests { V{ 1 2 } } [ "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ CHAR: A } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "A" foo parse parse-result-ast +] unit-test + +{ CHAR: Z } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" foo parse parse-result-ast +] unit-test + +{ f } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "0" foo parse +] unit-test + +{ CHAR: 0 } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" foo parse parse-result-ast +] unit-test + +{ f } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" foo parse +] unit-test + +{ f } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" foo parse ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e2c2dd5006..03f36c5f28 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -9,6 +9,7 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-any-character ; +TUPLE: ebnf-range pattern ; TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; @@ -22,6 +23,7 @@ TUPLE: ebnf rules ; C: ebnf-non-terminal C: ebnf-terminal C: ebnf-any-character +C: ebnf-range C: ebnf-ensure-not C: ebnf-choice C: ebnf-sequence @@ -69,6 +71,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id ) M: ebnf-any-character (generate-parser) ( ast -- id ) drop [ drop t ] satisfy store-parser ; +M: ebnf-range (generate-parser) ( ast -- id ) + ebnf-range-pattern range-pattern store-parser ; + M: ebnf-choice (generate-parser) ( ast -- id ) ebnf-choice-options [ generate-parser get-parser @@ -163,6 +168,14 @@ DEFER: 'rhs' : 'any-character' ( -- parser ) #! A parser to match the symbol for any character match. [ CHAR: . = ] satisfy [ drop ] action ; + +: 'range-parser' ( -- parser ) + #! Match the syntax for declaring character ranges + [ + "[" syntax , + [ CHAR: ] = not ] satisfy repeat1 , + "]" syntax , + ] seq* [ first >string ] action ; : 'element' ( -- parser ) #! An element of a rule. It can be a terminal or a @@ -173,6 +186,7 @@ DEFER: 'rhs' [ 'non-terminal' , 'terminal' , + 'range-parser' , 'any-character' , ] choice* , "=" syntax ensure-not , From ec4f964e4f770f912cc9e1674bd790abcebc7f53 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 14:06:21 +1300 Subject: [PATCH 16/20] Fix pl0 for EBNF syntax changes --- extra/peg/pl0/pl0.factor | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 1ef7a23b41..b30f6bfe70 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -16,16 +16,16 @@ MEMO: number ( -- parser ) =" | ">") expression -expression = ["+" | "-"] term {("+" | "-") term } -term = factor {("*" | "/") factor } +expression = ("+" | "-")? term (("+" | "-") term )* +term = factor (("*" | "/") factor )* factor = ident | number | "(" expression ")" EBNF> From 68388fbed90e0765925491d2ccc6ff3354bf7c0b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 14:15:06 +1300 Subject: [PATCH 17/20] Updated peg.expr to use range-pattern for digits --- extra/peg/expr/expr-tests.factor | 25 +++++++++++++++++++++++++ extra/peg/expr/expr.factor | 5 ++--- 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 extra/peg/expr/expr-tests.factor diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor new file mode 100644 index 0000000000..0ed05765cd --- /dev/null +++ b/extra/peg/expr/expr-tests.factor @@ -0,0 +1,25 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg.expr multiline sequences ; +IN: temporary + +{ 5 } [ + "2+3" eval-expr +] unit-test + +{ 6 } [ + "2*3" eval-expr +] unit-test + +{ 14 } [ + "2+3*4" eval-expr +] unit-test + +{ 17 } [ + "2+3*4+3" eval-expr +] unit-test + +{ 23 } [ + "2+3*(4+3)" eval-expr +] unit-test diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index ed13ac0e50..26ae76c0b0 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -16,9 +16,8 @@ divide = ("/") [[ drop [ / ] ]] add = ("+") [[ drop [ + ] ]] subtract = ("-") [[ drop [ - ] ]] -digit = "0" | "1" | "2" | "3" | "4" | - "5" | "6" | "7" | "8" | "9" -number = ((digit)+) [[ concat string>number ]] +digit = ([0-9]) [[ digit> ]] +number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] value = number | ("(" expr ")") [[ second ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]] From 39c228db6d14ae9229d712abb716489248c3dca8 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 14:22:14 +1300 Subject: [PATCH 18/20] Update peg.pl0 to use range pattern syntax This allows removing the words for ident and number, replacing them with EBNF expressions. --- extra/peg/pl0/pl0.factor | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index b30f6bfe70..34973e6a52 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,18 +1,10 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize namespaces ; +peg peg.ebnf peg.parsers memoize namespaces math ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 -MEMO: ident ( -- parser ) - [ - CHAR: a CHAR: z range , - CHAR: A CHAR: Z range , - ] choice* repeat1 [ >string ] action ; - -MEMO: number ( -- parser ) - CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; string ]] +digit = ([0-9]) [[ digit> ]] +number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] EBNF> From c1f69f01beb2c6a183e42bd13b81a40374039baf Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 14:57:12 +1300 Subject: [PATCH 19/20] Change ordering of [[ ... ]] --- extra/peg/ebnf/ebnf-tests.factor | 20 ++++++++++---------- extra/peg/ebnf/ebnf.factor | 28 +++++++++++++++------------- extra/peg/expr/expr-tests.factor | 2 +- extra/peg/expr/expr.factor | 3 ++- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 458c68e0d4..0989e4beb5 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf compiler.units ; +USING: kernel parser words tools.test peg peg.ebnf compiler.units ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -109,37 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { V{ 1 "b" } } [ - "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { CHAR: A } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "A" foo parse parse-result-ast + "foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast ] unit-test { CHAR: Z } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" foo parse parse-result-ast + "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast ] unit-test { f } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "0" foo parse + "foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse ] unit-test { CHAR: 0 } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" foo parse parse-result-ast + "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" foo parse + "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" foo parse + "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 03f36c5f28..7d298a709d 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -172,7 +172,7 @@ DEFER: 'rhs' : 'range-parser' ( -- parser ) #! Match the syntax for declaring character ranges [ - "[" syntax , + [ "[" syntax , "[" token ensure-not , ] seq* hide , [ CHAR: ] = not ] satisfy repeat1 , "]" syntax , ] seq* [ first >string ] action ; @@ -208,7 +208,6 @@ DEFER: 'choice' "*" token sp ensure-not , "+" token sp ensure-not , "?" token sp ensure-not , - "[[" token sp ensure-not , ] seq* hide grouped ; : 'repeat0' ( -- parser ) @@ -226,13 +225,6 @@ DEFER: 'choice' [ drop t ] satisfy , ] seq* [ first ] action repeat0 [ >string ] action ; -: 'action' ( -- parser ) - [ - "(" [ 'choice' sp ] delay ")" syntax-pack , - "[[" 'factor-code' "]]" syntax-pack , - ] seq* [ first2 ] action ; - - : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that #! something that matches the following elements do @@ -242,7 +234,7 @@ DEFER: 'choice' 'group' sp , ] seq* [ first ] action ; -: 'sequence' ( -- parser ) +: ('sequence') ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ @@ -252,11 +244,21 @@ DEFER: 'choice' 'repeat0' sp , 'repeat1' sp , 'optional' sp , - 'action' sp , + ] choice* ; + +: 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. + [ + [ + ('sequence') , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 ] action , + ('sequence') , ] choice* repeat1 [ dup length 1 = [ first ] [ ] if - ] action ; - + ] action ; + : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ ] if diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor index 0ed05765cd..20da5cd16a 100644 --- a/extra/peg/expr/expr-tests.factor +++ b/extra/peg/expr/expr-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg.expr multiline sequences ; +USING: kernel tools.test peg peg.expr multiline sequences ; IN: temporary { 5 } [ diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index 26ae76c0b0..62ef4ea88f 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -26,4 +26,5 @@ expr = sum EBNF> : eval-expr ( string -- number ) - expr parse parse-result-ast ; \ No newline at end of file + expr parse parse-result-ast ; + From 7dc772db2647ebeb78c74dfa10d98b3963b5a94d Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 15:42:21 +1300 Subject: [PATCH 20/20] Refactor ebnf parser generation --- extra/peg/ebnf/ebnf.factor | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 7d298a709d..c7a007bfc8 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -34,6 +34,55 @@ C: ebnf-rule C: ebnf-action C: ebnf +GENERIC: (transform) ( ast -- parser ) + +: transform ( ast -- object ) + H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ; + +M: ebnf (transform) ( ast -- parser ) + ebnf-rules [ (transform) ] map peek ; + +M: ebnf-rule (transform) ( ast -- parser ) + dup ebnf-rule-elements (transform) [ + swap ebnf-rule-symbol set + ] keep ; + +M: ebnf-sequence (transform) ( ast -- parser ) + ebnf-sequence-elements [ (transform) ] map seq ; + +M: ebnf-choice (transform) ( ast -- parser ) + ebnf-choice-options [ (transform) ] map choice ; + +M: ebnf-any-character (transform) ( ast -- parser ) + drop any-char ; + +M: ebnf-range (transform) ( ast -- parser ) + ebnf-range-pattern range-pattern ; + +M: ebnf-ensure-not (transform) ( ast -- parser ) + ebnf-ensure-not-group (transform) ensure-not ; + +M: ebnf-repeat0 (transform) ( ast -- parser ) + ebnf-repeat0-group (transform) repeat0 ; + +M: ebnf-repeat1 (transform) ( ast -- parser ) + ebnf-repeat1-group (transform) repeat1 ; + +M: ebnf-optional (transform) ( ast -- parser ) + ebnf-optional-elements (transform) optional ; + +M: ebnf-action (transform) ( ast -- parser ) + [ ebnf-action-parser (transform) ] keep + ebnf-action-code string-lines parse-lines action ; + +M: ebnf-terminal (transform) ( ast -- parser ) + ebnf-terminal-symbol token sp ; + +M: ebnf-non-terminal (transform) ( ast -- parser ) + ebnf-non-terminal-symbol [ + , "parser" get , \ at , + ] [ ] make delay ; + SYMBOL: parsers SYMBOL: non-terminals @@ -295,4 +344,7 @@ DEFER: 'choice' f ] if* ; +: transform-ebnf ( string -- object ) + 'ebnf' parse parse-result-ast transform ; + : " parse-multiline-string ebnf>quot call ; parsing