From 78633e03a0d9951407e33c01c8e33eac0205657e Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Sun, 30 Mar 2008 19:01:47 +1300 Subject: [PATCH 1/9] Allow var names in ebnf but ignore them for now --- extra/peg/ebnf/ebnf.factor | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index af61c3aae0..0ae1430c8c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -19,6 +19,7 @@ TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional group ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-action parser code ; +TUPLE: ebnf-var parser name ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal @@ -34,6 +35,7 @@ C: <ebnf-repeat1> ebnf-repeat1 C: <ebnf-optional> ebnf-optional C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action +C: <ebnf-var> ebnf-var C: <ebnf> ebnf : syntax ( string -- parser ) @@ -79,6 +81,7 @@ C: <ebnf> ebnf [ dup CHAR: * = ] [ dup CHAR: + = ] [ dup CHAR: ? = ] + [ dup CHAR: : = ] } || not nip ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; @@ -200,6 +203,7 @@ DEFER: 'choice' : 'actioned-sequence' ( -- parser ) [ [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action , + [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action , 'sequence' , ] choice* ; @@ -270,6 +274,9 @@ M: ebnf-action (transform) ( ast -- parser ) [ parser>> (transform) ] keep code>> string-lines [ parse-lines ] with-compilation-unit action ; +M: ebnf-var (transform) ( ast -- parser ) + parser>> (transform) ; + M: ebnf-terminal (transform) ( ast -- parser ) symbol>> token sp ; From bb8198d3d0163e0cacc701e21588c16e858d2b08 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Sun, 30 Mar 2008 23:24:02 +1300 Subject: [PATCH 2/9] Declare stack effects for compiled parsers --- extra/peg/ebnf/ebnf.factor | 4 ++-- extra/peg/peg.factor | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 0ae1430c8c..41b5a1b655 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -3,7 +3,7 @@ USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib - splitting accessors ; + splitting accessors effects ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -310,5 +310,5 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : EBNF: CREATE-WORD dup ";EBNF" parse-multiline-string - ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing + ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 8621b43a7f..a09962783b 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words quotations effects memoize accessors locals ; + words quotations effects memoize accessors locals effects ; IN: peg USE: prettyprint @@ -206,7 +206,7 @@ GENERIC: (compile) ( parser -- quot ) :: parser-body ( parser -- quot ) #! Return the body of the word that is the compiled version #! of the parser. - [let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ] + [let* | rule [ gensym dup parser (compile) 0 1 <effect> define-declared dup parser "peg" set-word-prop ] | [ rule pos get apply-rule dup fail = [ @@ -216,7 +216,7 @@ GENERIC: (compile) ( parser -- quot ) ] if ] ] ; - + : compiled-parser ( parser -- word ) #! Look to see if the given parser has been compiled. #! If not, compile it to a temporary word, cache it, @@ -227,7 +227,7 @@ GENERIC: (compile) ( parser -- quot ) dup compiled>> [ nip ] [ - gensym tuck >>compiled 2dup parser-body define dupd "peg" set-word-prop + gensym tuck >>compiled 2dup parser-body 0 1 <effect> define-declared dupd "peg" set-word-prop ] if* ; : compile ( parser -- word ) From 5989680a7b992b392dbb57ca99f3909140f2b879 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 00:53:33 +1300 Subject: [PATCH 3/9] Ensure box parsers are never cached --- extra/peg/peg.factor | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index a09962783b..e07942a3cd 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -488,8 +488,11 @@ M: box-parser (compile) ( parser -- quot ) #! Calls the quotation at compile time #! to produce the parser to be compiled. #! This differs from 'delay' which calls - #! it at run time. - quot>> call compiled-parser 1quotation ; + #! it at run time. Due to using the runtime + #! environment at compile time, this parser + #! must not be cached, so we clear out the + #! delgates cache. + f >>compiled quot>> call compiled-parser 1quotation ; PRIVATE> @@ -560,7 +563,12 @@ PRIVATE> delay-parser construct-boa init-parser ; : box ( quot -- parser ) - box-parser construct-boa init-parser ; + #! because a box has its quotation run at compile time + #! it must always have a new parser delgate created, + #! not a cached one. This is because the same box, + #! compiled twice can have a different compiled word + #! due to running at compile time. + box-parser construct-boa next-id f <parser> over set-delegate ; : PEG: (:) [ From 55a69392faadff0988a49696f734562491e484d0 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 13:52:42 +1300 Subject: [PATCH 4/9] First cut at variables in ebnf --- extra/peg/ebnf/ebnf.factor | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 41b5a1b655..e9ec0dc4e2 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -3,7 +3,7 @@ USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib - splitting accessors effects ; + splitting accessors effects sequences.deep ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -227,15 +227,17 @@ GENERIC: (transform) ( ast -- parser ) SYMBOL: parser SYMBOL: main +SYMBOL: vars : transform ( ast -- object ) - H{ } clone dup dup [ parser set swap (transform) main set ] bind ; + H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ; M: ebnf (transform) ( ast -- parser ) rules>> [ (transform) ] map peek ; M: ebnf-rule (transform) ( ast -- parser ) - dup elements>> (transform) [ + dup elements>> + vars get clone vars [ (transform) ] with-variable [ swap symbol>> set ] keep ; @@ -270,12 +272,26 @@ M: ebnf-repeat1 (transform) ( ast -- parser ) M: ebnf-optional (transform) ( ast -- parser ) transform-group optional ; +: build-locals ( string vars -- string ) + dup empty? [ + drop + ] [ + [ + "[let* | " % + [ dup % " [ \"" % % "\" get ] " % ] each + " | " % + % + " ] with-locals" % + ] "" make + ] if ; + M: ebnf-action (transform) ( ast -- parser ) [ parser>> (transform) ] keep - code>> string-lines [ parse-lines ] with-compilation-unit action ; + code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ; M: ebnf-var (transform) ( ast -- parser ) - parser>> (transform) ; + [ parser>> (transform) ] [ name>> ] bi + dup vars get push [ dupd set ] curry action ; M: ebnf-terminal (transform) ( ast -- parser ) symbol>> token sp ; @@ -303,7 +319,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : ebnf>quot ( string -- hashtable quot ) 'ebnf' parse check-parse-result parse-result-ast transform dup dup parser [ main swap at compile ] with-variable - [ compiled-parse ] curry ; + [ compiled-parse ] curry [ with-scope ] curry ; : [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing From ee2194d1dc1eb4df9072dae9ce50a9bb13353b98 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 14:03:16 +1300 Subject: [PATCH 5/9] Allow variable names on elements --- extra/peg/ebnf/ebnf.factor | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e9ec0dc4e2..f98b08093a 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -102,7 +102,7 @@ C: <ebnf> ebnf "]" syntax , ] seq* [ first >string <ebnf-range> ] action ; -: 'element' ( -- parser ) +: ('element') ( -- parser ) #! An element of a rule. It can be a terminal or a #! non-terminal but must not be followed by a "=". #! The latter indicates that it is the beginning of a @@ -120,6 +120,12 @@ C: <ebnf> ebnf ] choice* , ] seq* [ first ] action ; +: 'element' ( -- parser ) + [ + [ ('element') , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action , + ('element') , + ] choice* ; + DEFER: 'choice' : grouped ( quot suffix -- parser ) From 729ac1d6dc18ddfd26aebae44d27c6ea62eec767 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 14:59:22 +1300 Subject: [PATCH 6/9] Some ebnf tweaks and tests to do with variables --- extra/peg/ebnf/ebnf-tests.factor | 9 ++++++++- extra/peg/ebnf/ebnf.factor | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 7aa61e84da..cf16fad2cd 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf words ; +USING: kernel tools.test peg peg.ebnf words math math.parser ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -160,6 +160,13 @@ IN: peg.ebnf.tests "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call parse-result-ast ] unit-test +{ 6 } [ + "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ drop x y + ]] EBNF] call parse-result-ast +] unit-test + +{ 6 } [ + "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ drop x digit> y digit> + ]] EBNF] call parse-result-ast +] unit-test { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index f98b08093a..74b3e3540d 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -283,7 +283,7 @@ M: ebnf-optional (transform) ( ast -- parser ) drop ] [ [ - "[let* | " % + "USING: locals namespaces ; [let* | " % [ dup % " [ \"" % % "\" get ] " % ] each " | " % % From c45eba68987e41ad14e0cc817079801e713af1b8 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 16:34:59 +1300 Subject: [PATCH 7/9] Add semantic parser --- extra/peg/peg-docs.factor | 13 +++++++++++++ extra/peg/peg.factor | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index e7bd255569..c54a39b7b0 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -95,6 +95,19 @@ HELP: optional "Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is " "'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ; +HELP: semantic +{ $values + { "parser" "a parser" } + { "quot" "a quotation with stack effect ( object -- bool )" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser succeeds and the quotation called with " + "the AST produced by 'p1' on the stack returns true." } +{ $examples + { $example "\"A\" [ drop t ] satisfy [ 66 > ] semantic parse" "f" } + { $example "\"C\" [ drop t ] satisfy [ 66 > ] semantic parse parse-result-ast " "67" } +} ; + HELP: ensure { $values { "parser" "a parser" } diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 43eb9e8d9e..9e35c5b9be 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -414,6 +414,23 @@ TUPLE: optional-parser p1 ; M: optional-parser (compile) ( parser -- quot ) p1>> compiled-parser \ ?quot optional-pattern match-replace ; +TUPLE: semantic-parser p1 quot ; + +MATCH-VARS: ?parser ; + +: semantic-pattern ( -- quot ) + [ + ?parser [ + dup parse-result-ast ?quot call [ drop f ] unless + ] [ + f + ] if* + ] ; + +M: semantic-parser (compile) ( parser -- quot ) + [ p1>> compiled-parser ] [ quot>> ] bi + 2array { ?parser ?quot } semantic-pattern match-replace ; + TUPLE: ensure-parser p1 ; : ensure-pattern ( -- quot ) @@ -546,6 +563,9 @@ PRIVATE> : optional ( parser -- parser ) optional-parser construct-boa init-parser ; +: semantic ( parser quot -- parser ) + semantic-parser construct-boa init-parser ; + : ensure ( parser -- parser ) ensure-parser construct-boa init-parser ; From f66774e87564aa5f6d66f80dd00c72b2db456700 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 16:50:05 +1300 Subject: [PATCH 8/9] Add tests for semantic and add syntax for it to ebnf Syntax is ?[ ...]? For example: [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] { 1 2 3 4 5 6 } swap call . --- extra/peg/ebnf/ebnf-tests.factor | 12 ++++++++++++ extra/peg/ebnf/ebnf.factor | 16 ++++++++++++---- extra/peg/peg-tests.factor | 13 +++++++++++-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index cf16fad2cd..4f802c5207 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -168,6 +168,18 @@ IN: peg.ebnf.tests "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ drop x digit> y digit> + ]] EBNF] call parse-result-ast ] unit-test +{ 10 } [ + { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast +] unit-test + +{ f } [ + { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call +] unit-test + +{ 3 } [ + { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast +] unit-test + { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 74b3e3540d..4f00edbd3c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -20,6 +20,7 @@ TUPLE: ebnf-optional group ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-action parser code ; TUPLE: ebnf-var parser name ; +TUPLE: ebnf-semantic parser code ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal @@ -36,6 +37,7 @@ C: <ebnf-optional> ebnf-optional C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action C: <ebnf-var> ebnf-var +C: <ebnf-semantic> ebnf-semantic C: <ebnf> ebnf : syntax ( string -- parser ) @@ -156,6 +158,7 @@ DEFER: 'choice' : 'factor-code' ( -- parser ) [ "]]" token ensure-not , + "]?" token ensure-not , [ drop t ] satisfy , ] seq* [ first ] action repeat0 [ >string ] action ; @@ -193,14 +196,15 @@ DEFER: 'choice' : 'action' ( -- parser ) "[[" 'factor-code' "]]" syntax-pack ; +: 'semantic' ( -- parser ) + "?[" 'factor-code' "]?" syntax-pack ; + : 'sequence' ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ - [ - ('sequence') , - 'action' , - ] seq* [ first2 <ebnf-action> ] action , + [ ('sequence') , 'action' , ] seq* [ first2 <ebnf-action> ] action , + [ ('sequence') , 'semantic' , ] seq* [ first2 <ebnf-semantic> ] action , ('sequence') , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if @@ -295,6 +299,10 @@ M: ebnf-action (transform) ( ast -- parser ) [ parser>> (transform) ] keep code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ; +M: ebnf-semantic (transform) ( ast -- parser ) + [ parser>> (transform) ] keep + code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ; + M: ebnf-var (transform) ( ast -- parser ) [ parser>> (transform) ] [ name>> ] bi dup vars get push [ dupd set ] curry action ; diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index f57fe83220..fcec33f7c2 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words ; +USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words math ; IN: peg.tests { f } [ @@ -182,4 +182,13 @@ IN: peg.tests [ f , "a" token , ] seq* dup parsers>> dupd 0 swap set-nth compile word? -] unit-test \ No newline at end of file +] unit-test + +{ f } [ + "A" [ drop t ] satisfy [ 66 >= ] semantic parse +] unit-test + +{ CHAR: B } [ + "B" [ drop t ] satisfy [ 66 >= ] semantic parse parse-result-ast +] unit-test + From a41f8ef7338d565329ca8d0cb646e3746032ccd2 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Mon, 31 Mar 2008 17:26:42 +1300 Subject: [PATCH 9/9] Mention how to fail from action in pegs --- extra/peg/peg-docs.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index c54a39b7b0..5f200be78e 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -137,7 +137,7 @@ HELP: action "Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting " "from that parse. The result of the quotation is then used as the final AST. This can be used " "for manipulating the parse tree to produce a AST better suited for the task at hand rather than " - "the default AST." } + "the default AST. If the quotation returns " { $link fail } " then the parser fails." } { $code "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; HELP: sp