diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 7aa61e84da..4f802c5207 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf words ; +USING: kernel tools.test peg peg.ebnf words math math.parser ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -160,6 +160,25 @@ IN: peg.ebnf.tests "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call parse-result-ast ] unit-test +{ 6 } [ + "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ drop x y + ]] EBNF] call parse-result-ast +] unit-test + +{ 6 } [ + "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ drop x digit> y digit> + ]] EBNF] call parse-result-ast +] unit-test + +{ 10 } [ + { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast +] unit-test + +{ f } [ + { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call +] unit-test + +{ 3 } [ + { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast +] unit-test { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index af61c3aae0..4f00edbd3c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -3,7 +3,7 @@ USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib - splitting accessors ; + splitting accessors effects sequences.deep ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -19,6 +19,8 @@ TUPLE: ebnf-repeat1 group ; TUPLE: ebnf-optional group ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-action parser code ; +TUPLE: ebnf-var parser name ; +TUPLE: ebnf-semantic parser code ; TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal @@ -34,6 +36,8 @@ C: <ebnf-repeat1> ebnf-repeat1 C: <ebnf-optional> ebnf-optional C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action +C: <ebnf-var> ebnf-var +C: <ebnf-semantic> ebnf-semantic C: <ebnf> ebnf : syntax ( string -- parser ) @@ -79,6 +83,7 @@ C: <ebnf> ebnf [ dup CHAR: * = ] [ dup CHAR: + = ] [ dup CHAR: ? = ] + [ dup CHAR: : = ] } || not nip ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; @@ -99,7 +104,7 @@ C: <ebnf> ebnf "]" syntax , ] seq* [ first >string <ebnf-range> ] action ; -: 'element' ( -- parser ) +: ('element') ( -- parser ) #! An element of a rule. It can be a terminal or a #! non-terminal but must not be followed by a "=". #! The latter indicates that it is the beginning of a @@ -117,6 +122,12 @@ C: <ebnf> ebnf ] choice* , ] seq* [ first ] action ; +: 'element' ( -- parser ) + [ + [ ('element') , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action , + ('element') , + ] choice* ; + DEFER: 'choice' : grouped ( quot suffix -- parser ) @@ -147,6 +158,7 @@ DEFER: 'choice' : 'factor-code' ( -- parser ) [ "]]" token ensure-not , + "]?" token ensure-not , [ drop t ] satisfy , ] seq* [ first ] action repeat0 [ >string ] action ; @@ -184,14 +196,15 @@ DEFER: 'choice' : 'action' ( -- parser ) "[[" 'factor-code' "]]" syntax-pack ; +: 'semantic' ( -- parser ) + "?[" 'factor-code' "]?" syntax-pack ; + : 'sequence' ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ - [ - ('sequence') , - 'action' , - ] seq* [ first2 <ebnf-action> ] action , + [ ('sequence') , 'action' , ] seq* [ first2 <ebnf-action> ] action , + [ ('sequence') , 'semantic' , ] seq* [ first2 <ebnf-semantic> ] action , ('sequence') , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if @@ -200,6 +213,7 @@ DEFER: 'choice' : 'actioned-sequence' ( -- parser ) [ [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action , + [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action , 'sequence' , ] choice* ; @@ -223,15 +237,17 @@ GENERIC: (transform) ( ast -- parser ) SYMBOL: parser SYMBOL: main +SYMBOL: vars : transform ( ast -- object ) - H{ } clone dup dup [ parser set swap (transform) main set ] bind ; + H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ; M: ebnf (transform) ( ast -- parser ) rules>> [ (transform) ] map peek ; M: ebnf-rule (transform) ( ast -- parser ) - dup elements>> (transform) [ + dup elements>> + vars get clone vars [ (transform) ] with-variable [ swap symbol>> set ] keep ; @@ -266,9 +282,30 @@ M: ebnf-repeat1 (transform) ( ast -- parser ) M: ebnf-optional (transform) ( ast -- parser ) transform-group optional ; +: build-locals ( string vars -- string ) + dup empty? [ + drop + ] [ + [ + "USING: locals namespaces ; [let* | " % + [ dup % " [ \"" % % "\" get ] " % ] each + " | " % + % + " ] with-locals" % + ] "" make + ] if ; + M: ebnf-action (transform) ( ast -- parser ) [ parser>> (transform) ] keep - code>> string-lines [ parse-lines ] with-compilation-unit action ; + code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ; + +M: ebnf-semantic (transform) ( ast -- parser ) + [ parser>> (transform) ] keep + code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ; + +M: ebnf-var (transform) ( ast -- parser ) + [ parser>> (transform) ] [ name>> ] bi + dup vars get push [ dupd set ] curry action ; M: ebnf-terminal (transform) ( ast -- parser ) symbol>> token sp ; @@ -296,12 +333,12 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : ebnf>quot ( string -- hashtable quot ) 'ebnf' parse check-parse-result parse-result-ast transform dup dup parser [ main swap at compile ] with-variable - [ compiled-parse ] curry ; + [ compiled-parse ] curry [ with-scope ] curry ; : [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing : EBNF: CREATE-WORD dup ";EBNF" parse-multiline-string - ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing + ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index e7bd255569..5f200be78e 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -95,6 +95,19 @@ HELP: optional "Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is " "'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ; +HELP: semantic +{ $values + { "parser" "a parser" } + { "quot" "a quotation with stack effect ( object -- bool )" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser succeeds and the quotation called with " + "the AST produced by 'p1' on the stack returns true." } +{ $examples + { $example "\"A\" [ drop t ] satisfy [ 66 > ] semantic parse" "f" } + { $example "\"C\" [ drop t ] satisfy [ 66 > ] semantic parse parse-result-ast " "67" } +} ; + HELP: ensure { $values { "parser" "a parser" } @@ -124,7 +137,7 @@ HELP: action "Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting " "from that parse. The result of the quotation is then used as the final AST. This can be used " "for manipulating the parse tree to produce a AST better suited for the task at hand rather than " - "the default AST." } + "the default AST. If the quotation returns " { $link fail } " then the parser fails." } { $code "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; HELP: sp diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index f57fe83220..fcec33f7c2 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words ; +USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words math ; IN: peg.tests { f } [ @@ -182,4 +182,13 @@ IN: peg.tests [ f , "a" token , ] seq* dup parsers>> dupd 0 swap set-nth compile word? -] unit-test \ No newline at end of file +] unit-test + +{ f } [ + "A" [ drop t ] satisfy [ 66 >= ] semantic parse +] unit-test + +{ CHAR: B } [ + "B" [ drop t ] satisfy [ 66 >= ] semantic parse parse-result-ast +] unit-test + diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 68aab7d820..9e35c5b9be 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words quotations effects memoize accessors locals ; + words quotations effects memoize accessors locals effects ; IN: peg USE: prettyprint @@ -208,7 +208,7 @@ GENERIC: (compile) ( parser -- quot ) :: parser-body ( parser -- quot ) #! Return the body of the word that is the compiled version #! of the parser. - [let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ] + [let* | rule [ gensym dup parser (compile) 0 1 <effect> define-declared dup parser "peg" set-word-prop ] | [ rule pos get apply-rule dup fail = [ @@ -218,7 +218,7 @@ GENERIC: (compile) ( parser -- quot ) ] if ] ] ; - + : compiled-parser ( parser -- word ) #! Look to see if the given parser has been compiled. #! If not, compile it to a temporary word, cache it, @@ -229,7 +229,7 @@ GENERIC: (compile) ( parser -- quot ) dup compiled>> [ nip ] [ - gensym tuck >>compiled 2dup parser-body define dupd "peg" set-word-prop + gensym tuck >>compiled 2dup parser-body 0 1 <effect> define-declared dupd "peg" set-word-prop ] if* ; : compile ( parser -- word ) @@ -414,6 +414,23 @@ TUPLE: optional-parser p1 ; M: optional-parser (compile) ( parser -- quot ) p1>> compiled-parser \ ?quot optional-pattern match-replace ; +TUPLE: semantic-parser p1 quot ; + +MATCH-VARS: ?parser ; + +: semantic-pattern ( -- quot ) + [ + ?parser [ + dup parse-result-ast ?quot call [ drop f ] unless + ] [ + f + ] if* + ] ; + +M: semantic-parser (compile) ( parser -- quot ) + [ p1>> compiled-parser ] [ quot>> ] bi + 2array { ?parser ?quot } semantic-pattern match-replace ; + TUPLE: ensure-parser p1 ; : ensure-pattern ( -- quot ) @@ -490,8 +507,11 @@ M: box-parser (compile) ( parser -- quot ) #! Calls the quotation at compile time #! to produce the parser to be compiled. #! This differs from 'delay' which calls - #! it at run time. - quot>> call compiled-parser 1quotation ; + #! it at run time. Due to using the runtime + #! environment at compile time, this parser + #! must not be cached, so we clear out the + #! delgates cache. + f >>compiled quot>> call compiled-parser 1quotation ; PRIVATE> @@ -543,6 +563,9 @@ PRIVATE> : optional ( parser -- parser ) optional-parser construct-boa init-parser ; +: semantic ( parser quot -- parser ) + semantic-parser construct-boa init-parser ; + : ensure ( parser -- parser ) ensure-parser construct-boa init-parser ; @@ -562,7 +585,12 @@ PRIVATE> delay-parser construct-boa init-parser ; : box ( quot -- parser ) - box-parser construct-boa init-parser ; + #! because a box has its quotation run at compile time + #! it must always have a new parser delgate created, + #! not a cached one. This is because the same box, + #! compiled twice can have a different compiled word + #! due to running at compile time. + box-parser construct-boa next-id f <parser> over set-delegate ; : PEG: (:) [