From e7980ebc616579df199cef126e11f33d42a243ec Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 20 Mar 2008 17:11:09 +1300 Subject: [PATCH] More refactoring of EBNF now produces a quotation that when called does the parsing EBNF: foo ... ;EBNF creates a 'foo' word with stack effect (string -- result) when called it parses the string and returns the result. --- extra/peg/ebnf/ebnf-tests.factor | 20 +-- extra/peg/ebnf/ebnf.factor | 227 ++++++++++--------------------- extra/peg/expr/expr.factor | 7 +- extra/peg/pl0/pl0-tests.factor | 12 +- extra/peg/pl0/pl0.factor | 6 +- 5 files changed, 93 insertions(+), 179 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 0989e4beb5..6606fa9ffc 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel parser words tools.test peg peg.ebnf compiler.units ; +USING: kernel tools.test peg peg.ebnf ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -109,37 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" call parse-result-ast ] unit-test { V{ 1 "b" } } [ - "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" call parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" call parse-result-ast ] unit-test { CHAR: A } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast + "A" call parse-result-ast ] unit-test { CHAR: Z } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast + "Z" call parse-result-ast ] unit-test { f } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse + "0" call ] unit-test { CHAR: 0 } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast + "0" call parse-result-ast ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse + "A" call ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse + "Z" call ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index c7a007bfc8..b9f88f5f24 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel parser words arrays strings math.parser sequences +USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib splitting ; @@ -34,136 +34,6 @@ C: ebnf-rule C: ebnf-action C: ebnf -GENERIC: (transform) ( ast -- parser ) - -: transform ( ast -- object ) - H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ; - -M: ebnf (transform) ( ast -- parser ) - ebnf-rules [ (transform) ] map peek ; - -M: ebnf-rule (transform) ( ast -- parser ) - dup ebnf-rule-elements (transform) [ - swap ebnf-rule-symbol set - ] keep ; - -M: ebnf-sequence (transform) ( ast -- parser ) - ebnf-sequence-elements [ (transform) ] map seq ; - -M: ebnf-choice (transform) ( ast -- parser ) - ebnf-choice-options [ (transform) ] map choice ; - -M: ebnf-any-character (transform) ( ast -- parser ) - drop any-char ; - -M: ebnf-range (transform) ( ast -- parser ) - ebnf-range-pattern range-pattern ; - -M: ebnf-ensure-not (transform) ( ast -- parser ) - ebnf-ensure-not-group (transform) ensure-not ; - -M: ebnf-repeat0 (transform) ( ast -- parser ) - ebnf-repeat0-group (transform) repeat0 ; - -M: ebnf-repeat1 (transform) ( ast -- parser ) - ebnf-repeat1-group (transform) repeat1 ; - -M: ebnf-optional (transform) ( ast -- parser ) - ebnf-optional-elements (transform) optional ; - -M: ebnf-action (transform) ( ast -- parser ) - [ ebnf-action-parser (transform) ] keep - ebnf-action-code string-lines parse-lines action ; - -M: ebnf-terminal (transform) ( ast -- parser ) - ebnf-terminal-symbol token sp ; - -M: ebnf-non-terminal (transform) ( ast -- parser ) - ebnf-non-terminal-symbol [ - , "parser" get , \ at , - ] [ ] make delay ; - -SYMBOL: parsers -SYMBOL: non-terminals - -: reset-parser-generation ( -- ) - V{ } clone parsers set - H{ } clone non-terminals set ; - -: store-parser ( parser -- number ) - parsers get [ push ] keep length 1- ; - -: get-parser ( index -- parser ) - parsers get nth ; - -: non-terminal-index ( name -- number ) - dup non-terminals get at [ - nip - ] [ - f store-parser [ swap non-terminals get set-at ] keep - ] if* ; - -GENERIC: (generate-parser) ( ast -- id ) - -: generate-parser ( ast -- id ) - (generate-parser) ; - -M: ebnf-terminal (generate-parser) ( ast -- id ) - ebnf-terminal-symbol token sp store-parser ; - -M: ebnf-non-terminal (generate-parser) ( ast -- id ) - [ - ebnf-non-terminal-symbol dup non-terminal-index , - parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , - ] [ ] make delay sp store-parser ; - -M: ebnf-any-character (generate-parser) ( ast -- id ) - drop [ drop t ] satisfy store-parser ; - -M: ebnf-range (generate-parser) ( ast -- id ) - ebnf-range-pattern range-pattern store-parser ; - -M: ebnf-choice (generate-parser) ( ast -- id ) - ebnf-choice-options [ - generate-parser get-parser - ] map choice store-parser ; - -M: ebnf-sequence (generate-parser) ( ast -- id ) - ebnf-sequence-elements [ - generate-parser get-parser - ] map seq store-parser ; - -M: ebnf-ensure-not (generate-parser) ( ast -- id ) - ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; - -M: ebnf-repeat0 (generate-parser) ( ast -- id ) - ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; - -M: ebnf-repeat1 (generate-parser) ( ast -- id ) - ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; - -M: ebnf-optional (generate-parser) ( ast -- id ) - ebnf-optional-elements generate-parser get-parser optional store-parser ; - -M: ebnf-rule (generate-parser) ( ast -- id ) - dup ebnf-rule-symbol non-terminal-index swap - ebnf-rule-elements generate-parser get-parser ! nt-id body - swap [ parsers get set-nth ] keep ; - -M: ebnf-action (generate-parser) ( ast -- id ) - [ ebnf-action-parser generate-parser get-parser ] keep - ebnf-action-code string-lines parse-lines action store-parser ; - -M: vector (generate-parser) ( ast -- id ) - [ generate-parser ] map peek ; - -M: ebnf (generate-parser) ( ast -- id ) - ebnf-rules [ - generate-parser - ] map peek ; - -DEFER: 'rhs' - : syntax ( string -- parser ) #! Parses the string, ignoring white space, and #! does not put the result in the AST. @@ -323,28 +193,81 @@ DEFER: 'choice' : 'ebnf' ( -- parser ) 'rule' sp repeat1 [ ] action ; -: ebnf>quot ( string -- quot ) - 'ebnf' parse [ - parse-result-ast [ - reset-parser-generation - generate-parser drop - [ - non-terminals get - [ - get-parser [ - swap , \ in , \ get , \ create , - 1quotation , \ define , - ] [ - drop - ] if* - ] assoc-each - ] [ ] make - ] with-scope - ] [ - f - ] if* ; +GENERIC: (transform) ( ast -- parser ) + +SYMBOL: parser +SYMBOL: main + +: transform ( ast -- object ) + H{ } clone dup dup [ parser set swap (transform) main set ] bind ; + +M: ebnf (transform) ( ast -- parser ) + ebnf-rules [ (transform) ] map peek ; + +M: ebnf-rule (transform) ( ast -- parser ) + dup ebnf-rule-elements (transform) [ + swap ebnf-rule-symbol set + ] keep ; + +M: ebnf-sequence (transform) ( ast -- parser ) + ebnf-sequence-elements [ (transform) ] map seq ; + +M: ebnf-choice (transform) ( ast -- parser ) + ebnf-choice-options [ (transform) ] map choice ; + +M: ebnf-any-character (transform) ( ast -- parser ) + drop any-char ; + +M: ebnf-range (transform) ( ast -- parser ) + ebnf-range-pattern range-pattern ; + +M: ebnf-ensure-not (transform) ( ast -- parser ) + ebnf-ensure-not-group (transform) ensure-not ; + +M: ebnf-repeat0 (transform) ( ast -- parser ) + ebnf-repeat0-group (transform) repeat0 ; + +M: ebnf-repeat1 (transform) ( ast -- parser ) + ebnf-repeat1-group (transform) repeat1 ; + +M: ebnf-optional (transform) ( ast -- parser ) + ebnf-optional-elements (transform) optional ; + +M: ebnf-action (transform) ( ast -- parser ) + [ ebnf-action-parser (transform) ] keep + ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ; + +M: ebnf-terminal (transform) ( ast -- parser ) + ebnf-terminal-symbol token sp ; + +M: ebnf-non-terminal (transform) ( ast -- parser ) + ebnf-non-terminal-symbol [ + , parser get , \ at , + ] [ ] make delay sp ; : transform-ebnf ( string -- object ) 'ebnf' parse parse-result-ast transform ; -: " parse-multiline-string ebnf>quot call ; parsing +: check-parse-result ( result -- result ) + dup [ + dup parse-result-remaining empty? [ + [ + "Unable to fully parse EBNF. Left to parse was: " % + parse-result-remaining % + ] "" make throw + ] unless + ] [ + "Could not parse EBNF" throw + ] if ; + +: ebnf>quot ( string -- hashtable quot ) + 'ebnf' parse check-parse-result + parse-result-ast transform dup main swap at compile ; + +: " parse-multiline-string ebnf>quot nip parsed ; parsing + +: EBNF: + CREATE-WORD dup + ";EBNF" parse-multiline-string + ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing + diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index 62ef4ea88f..14f0e7c14e 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -9,8 +9,7 @@ IN: peg.expr #! { operator rhs } in to a tree structure of the correct precedence. swap [ first2 swap call ] reduce ; - +;EBNF : eval-expr ( string -- number ) - expr parse parse-result-ast ; + expr parse-result-ast ; diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index bf321d54e9..b3d2135da7 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -4,14 +4,6 @@ USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests -{ "abc" } [ - "abc" ident parse parse-result-ast -] unit-test - -{ 55 } [ - "55abc" number parse parse-result-ast -] unit-test - { t } [ <" VAR x, squ; @@ -29,7 +21,7 @@ BEGIN x := x + 1; END END. -"> program parse parse-result-remaining empty? +"> pl0 parse-result-remaining empty? ] unit-test { f } [ @@ -95,5 +87,5 @@ BEGIN y := 36; CALL gcd; END. - "> program parse parse-result-remaining empty? + "> pl0 parse-result-remaining empty? ] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 34973e6a52..f7eb3cad23 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -6,8 +6,7 @@ IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 -string ]] digit = ([0-9]) [[ digit> ]] number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] -EBNF> +program = block "." +;EBNF