diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 452da8df05..8846a9c94c 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf ; +USING: kernel tools.test peg peg.ebnf compiler.units ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -15,11 +15,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-choice f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-choice f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -29,11 +26,8 @@ IN: peg.ebnf.tests { T{ ebnf-rule f "digit" - V{ - T{ ebnf-sequence f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } - } - f + T{ ebnf-sequence f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } } } } [ @@ -83,7 +77,7 @@ IN: peg.ebnf.tests } } } [ - "one {(two | three) four}" 'choice' parse parse-result-ast + "one ((two | three) four)*" 'choice' parse parse-result-ast ] unit-test { @@ -95,5 +89,33 @@ IN: peg.ebnf.tests } } } [ - "one [ two ] three" 'choice' parse parse-result-ast + "one ( two )? three" 'choice' parse parse-result-ast ] unit-test + +{ "foo" } [ + "\"foo\"" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "'foo'" 'identifier' parse parse-result-ast +] unit-test + +{ "foo" } [ + "foo" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + +{ "foo" } [ + "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol +] unit-test + +{ V{ "a" "b" } } [ + "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ V{ 1 "b" } } [ + "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ V{ 1 2 } } [ + "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 5d7d7297ef..e2c2dd5006 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -2,24 +2,31 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg - peg.parsers unicode.categories ; + peg.parsers unicode.categories multiline combinators.lib + splitting ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; +TUPLE: ebnf-any-character ; +TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; +TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; -TUPLE: ebnf-action word ; +TUPLE: ebnf-action parser code ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-terminal> ebnf-terminal +C: <ebnf-any-character> ebnf-any-character +C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence C: <ebnf-repeat0> ebnf-repeat0 +C: <ebnf-repeat1> ebnf-repeat1 C: <ebnf-optional> ebnf-optional C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action @@ -27,12 +34,10 @@ C: <ebnf> ebnf SYMBOL: parsers SYMBOL: non-terminals -SYMBOL: last-parser : reset-parser-generation ( -- ) V{ } clone parsers set - H{ } clone non-terminals set - f last-parser set ; + H{ } clone non-terminals set ; : store-parser ( parser -- number ) parsers get [ push ] keep length 1- ; @@ -50,7 +55,7 @@ SYMBOL: last-parser GENERIC: (generate-parser) ( ast -- id ) : generate-parser ( ast -- id ) - (generate-parser) dup last-parser set ; + (generate-parser) ; M: ebnf-terminal (generate-parser) ( ast -- id ) ebnf-terminal-symbol token sp store-parser ; @@ -61,6 +66,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id ) parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , ] [ ] make delay sp store-parser ; +M: ebnf-any-character (generate-parser) ( ast -- id ) + drop [ drop t ] satisfy store-parser ; + M: ebnf-choice (generate-parser) ( ast -- id ) ebnf-choice-options [ generate-parser get-parser @@ -71,9 +79,15 @@ M: ebnf-sequence (generate-parser) ( ast -- id ) generate-parser get-parser ] map seq store-parser ; +M: ebnf-ensure-not (generate-parser) ( ast -- id ) + ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; + M: ebnf-repeat0 (generate-parser) ( ast -- id ) ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; +M: ebnf-repeat1 (generate-parser) ( ast -- id ) + ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; + M: ebnf-optional (generate-parser) ( ast -- id ) ebnf-optional-elements generate-parser get-parser optional store-parser ; @@ -83,15 +97,12 @@ M: ebnf-rule (generate-parser) ( ast -- id ) swap [ parsers get set-nth ] keep ; M: ebnf-action (generate-parser) ( ast -- id ) - ebnf-action-word search 1quotation - last-parser get get-parser swap action store-parser ; + [ ebnf-action-parser generate-parser get-parser ] keep + ebnf-action-code string-lines parse-lines action store-parser ; M: vector (generate-parser) ( ast -- id ) [ generate-parser ] map peek ; -M: f (generate-parser) ( ast -- id ) - drop last-parser get ; - M: ebnf (generate-parser) ( ast -- id ) ebnf-rules [ generate-parser @@ -99,67 +110,153 @@ M: ebnf (generate-parser) ( ast -- id ) DEFER: 'rhs' +: syntax ( string -- parser ) + #! Parses the string, ignoring white space, and + #! does not put the result in the AST. + token sp hide ; + +: syntax-pack ( begin parser end -- parser ) + #! Parse 'parser' surrounded by syntax elements + #! begin and end. + [ syntax ] dipd syntax pack ; + +: 'identifier' ( -- parser ) + #! Return a parser that parses an identifer delimited by + #! a quotation character. The quotation can be single + #! or double quotes. The AST produced is the identifier + #! between the quotes. + [ + [ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by , + [ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by , + ] choice* [ >string ] action ; + : 'non-terminal' ( -- parser ) - CHAR: a CHAR: z range "-" token [ first ] action 2array choice repeat1 [ >string <ebnf-non-terminal> ] action ; + #! A non-terminal is the name of another rule. It can + #! be any non-blank character except for characters used + #! in the EBNF syntax itself. + [ + { + [ dup blank? ] + [ dup CHAR: " = ] + [ dup CHAR: ' = ] + [ dup CHAR: | = ] + [ dup CHAR: { = ] + [ dup CHAR: } = ] + [ dup CHAR: = = ] + [ dup CHAR: ) = ] + [ dup CHAR: ( = ] + [ dup CHAR: ] = ] + [ dup CHAR: [ = ] + [ dup CHAR: . = ] + [ dup CHAR: ! = ] + [ dup CHAR: * = ] + [ dup CHAR: + = ] + [ dup CHAR: ? = ] + } || not nip + ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; : 'terminal' ( -- parser ) - "'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string <ebnf-terminal> ] action ; + #! A terminal is an identifier enclosed in quotations + #! and it represents the literal value of the identifier. + 'identifier' [ <ebnf-terminal> ] action ; +: 'any-character' ( -- parser ) + #! A parser to match the symbol for any character match. + [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ; + : 'element' ( -- parser ) - 'non-terminal' 'terminal' 2array choice ; + #! An element of a rule. It can be a terminal or a + #! non-terminal but must not be followed by a "=". + #! The latter indicates that it is the beginning of a + #! new rule. + [ + [ + 'non-terminal' , + 'terminal' , + 'any-character' , + ] choice* , + "=" syntax ensure-not , + ] seq* [ first ] action ; DEFER: 'choice' +: grouped ( quot suffix -- parser ) + #! Parse a group of choices, with a suffix indicating + #! the type of group (repeat0, repeat1, etc) and + #! an quot that is the action that produces the AST. + "(" [ 'choice' sp ] delay ")" syntax-pack + swap 2seq + [ first ] rot compose action ; + : 'group' ( -- parser ) - "(" token sp hide - [ 'choice' sp ] delay - ")" token sp hide - 3array seq [ first ] action ; + #! A grouping with no suffix. Used for precedence. + [ ] [ + "*" token sp ensure-not , + "+" token sp ensure-not , + "?" token sp ensure-not , + "[[" token sp ensure-not , + ] seq* hide grouped ; : 'repeat0' ( -- parser ) - "{" token sp hide - [ 'choice' sp ] delay - "}" token sp hide - 3array seq [ first <ebnf-repeat0> ] action ; + [ <ebnf-repeat0> ] "*" syntax grouped ; + +: 'repeat1' ( -- parser ) + [ <ebnf-repeat1> ] "+" syntax grouped ; : 'optional' ( -- parser ) - "[" token sp hide - [ 'choice' sp ] delay - "]" token sp hide - 3array seq [ first <ebnf-optional> ] action ; + [ <ebnf-optional> ] "?" syntax grouped ; + +: 'factor-code' ( -- parser ) + [ + "]]" token ensure-not , + [ drop t ] satisfy , + ] seq* [ first ] action repeat0 [ >string ] action ; + +: 'action' ( -- parser ) + [ + "(" [ 'choice' sp ] delay ")" syntax-pack , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 <ebnf-action> ] action ; + + +: 'ensure-not' ( -- parser ) + #! Parses the '!' syntax to ensure that + #! something that matches the following elements do + #! not exist in the parse stream. + [ + "!" syntax , + 'group' sp , + ] seq* [ first <ebnf-ensure-not> ] action ; : 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. [ + 'ensure-not' sp , 'element' sp , 'group' sp , 'repeat0' sp , + 'repeat1' sp , 'optional' sp , - ] { } make choice - repeat1 [ + 'action' sp , + ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if - ] action ; + ] action ; : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ <ebnf-choice> ] if - ] action ; - -: 'action' ( -- parser ) - "=>" token hide - [ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp - 2array seq [ first <ebnf-action> ] action ; - -: 'rhs' ( -- parser ) - 'choice' 'action' sp optional 2array seq ; + ] action ; : 'rule' ( -- parser ) - 'non-terminal' [ ebnf-non-terminal-symbol ] action - "=" token sp hide - 'rhs' - 3array seq [ first2 <ebnf-rule> ] action ; + [ + 'non-terminal' [ ebnf-non-terminal-symbol ] action , + "=" syntax , + 'choice' , + ] seq* [ first2 <ebnf-rule> ] action ; : 'ebnf' ( -- parser ) - 'rule' sp "." token sp hide list-of [ <ebnf> ] action ; + 'rule' sp repeat1 [ <ebnf> ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ @@ -182,4 +279,4 @@ DEFER: 'choice' f ] if* ; -: <EBNF "EBNF>" parse-tokens " " join ebnf>quot call ; parsing +: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing diff --git a/extra/peg/expr/authors.txt b/extra/peg/expr/authors.txt new file mode 100644 index 0000000000..44b06f94bc --- /dev/null +++ b/extra/peg/expr/authors.txt @@ -0,0 +1 @@ +Chris Double diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor new file mode 100644 index 0000000000..ed13ac0e50 --- /dev/null +++ b/extra/peg/expr/expr.factor @@ -0,0 +1,30 @@ +! Copyright (C) 2008 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel arrays strings math.parser sequences +peg peg.ebnf peg.parsers memoize math ; +IN: peg.expr + +: operator-fold ( lhs seq -- value ) + #! Perform a fold of a lhs, followed by a sequence of pairs being + #! { operator rhs } in to a tree structure of the correct precedence. + swap [ first2 swap call ] reduce ; + +<EBNF + +times = ("*") [[ drop [ * ] ]] +divide = ("/") [[ drop [ / ] ]] +add = ("+") [[ drop [ + ] ]] +subtract = ("-") [[ drop [ - ] ]] + +digit = "0" | "1" | "2" | "3" | "4" | + "5" | "6" | "7" | "8" | "9" +number = ((digit)+) [[ concat string>number ]] + +value = number | ("(" expr ")") [[ second ]] +product = (value ((times | divide) value)*) [[ first2 operator-fold ]] +sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] +expr = sum +EBNF> + +: eval-expr ( string -- number ) + expr parse parse-result-ast ; \ No newline at end of file diff --git a/extra/peg/expr/summary.txt b/extra/peg/expr/summary.txt new file mode 100644 index 0000000000..6c3c140b2b --- /dev/null +++ b/extra/peg/expr/summary.txt @@ -0,0 +1 @@ +Simple expression evaluator using EBNF diff --git a/extra/peg/expr/tags.txt b/extra/peg/expr/tags.txt new file mode 100644 index 0000000000..9da56880c0 --- /dev/null +++ b/extra/peg/expr/tags.txt @@ -0,0 +1 @@ +parsing diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index fa8ac89f57..bf321d54e9 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.pl0 ; +USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests { "abc" } [ @@ -11,3 +11,89 @@ IN: peg.pl0.tests { 55 } [ "55abc" number parse parse-result-ast ] unit-test + +{ t } [ + <" +VAR x, squ; + +PROCEDURE square; +BEGIN + squ := x * x +END; + +BEGIN + x := 1; + WHILE x <= 10 DO + BEGIN + CALL square; + x := x + 1; + END +END. +"> program parse parse-result-remaining empty? +] unit-test + +{ f } [ + <" +CONST + m = 7, + n = 85; + +VAR + x, y, z, q, r; + +PROCEDURE multiply; +VAR a, b; + +BEGIN + a := x; + b := y; + z := 0; + WHILE b > 0 DO BEGIN + IF ODD b THEN z := z + a; + a := 2 * a; + b := b / 2; + END +END; + +PROCEDURE divide; +VAR w; +BEGIN + r := x; + q := 0; + w := y; + WHILE w <= r DO w := 2 * w; + WHILE w > y DO BEGIN + q := 2 * q; + w := w / 2; + IF w <= r THEN BEGIN + r := r - w; + q := q + 1 + END + END +END; + +PROCEDURE gcd; +VAR f, g; +BEGIN + f := x; + g := y; + WHILE f # g DO BEGIN + IF f < g THEN g := g - f; + IF g < f THEN f := f - g; + END; + z := f +END; + +BEGIN + x := m; + y := n; + CALL multiply; + x := 25; + y := 3; + CALL divide; + x := 84; + y := 36; + CALL gcd; +END. + "> program parse parse-result-remaining empty? +] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 6844eb44dc..1ef7a23b41 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,30 +1,31 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize ; +peg peg.ebnf peg.parsers memoize namespaces ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 MEMO: ident ( -- parser ) - CHAR: a CHAR: z range - CHAR: A CHAR: Z range 2array choice repeat1 - [ >string ] action ; + [ + CHAR: a CHAR: z range , + CHAR: A CHAR: Z range , + ] choice* repeat1 [ >string ] action ; MEMO: number ( -- parser ) CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; <EBNF -program = block '.' . -block = [ 'const' ident '=' number { ',' ident '=' number } ';' ] - [ 'var' ident { ',' ident } ';' ] - { 'procedure' ident ';' [ block ';' ] } statement . -statement = [ ident ':=' expression | 'call' ident | - 'begin' statement {';' statement } 'end' | - 'if' condition 'then' statement | - 'while' condition 'do' statement ] . -condition = 'odd' expression | - expression ('=' | '#' | '<=' | '<' | '>=' | '>') expression . -expression = ['+' | '-'] term {('+' | '-') term } . -term = factor {('*' | '/') factor } . -factor = ident | number | '(' expression ')' +program = block "." +block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] + [ "VAR" ident { "," ident } ";" ] + { "PROCEDURE" ident ";" [ block ";" ] } statement +statement = [ ident ":=" expression | "CALL" ident | + "BEGIN" statement {";" statement } "END" | + "IF" condition "THEN" statement | + "WHILE" condition "DO" statement ] +condition = "ODD" expression | + expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression +expression = ["+" | "-"] term {("+" | "-") term } +term = factor {("*" | "/") factor } +factor = ident | number | "(" expression ")" EBNF>