Various peg/ebnf fixes

- Box parsers were broken when involved in left recursion detection
- ebnf no longer implicitly ignores white space between terminates/non-terminals
- ebnf now handles \t and \n in grammars so productions to detect white space work
- reset-delegates is now reset-pegs
db4
Chris Double 2008-04-02 12:59:12 +13:00
parent 122fd50d4a
commit 6b454eed36
3 changed files with 74 additions and 16 deletions

View File

@ -180,6 +180,55 @@ IN: peg.ebnf.tests
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test ] unit-test
{ f } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" f "b" } } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ f } [
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [ { V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion. #! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used #! Using packrat, so first part of expr fails, causing 2nd choice to be used
@ -200,7 +249,7 @@ IN: peg.ebnf.tests
EBNF: primary EBNF: primary
Primary = PrimaryNoNewArray Primary = PrimaryNoNewArray
PrimaryNoNewArray = ClassInstanceCreationExpression PrimaryNoNewArray = ClassInstanceCreationExpression
| MethodInvocation | MethodInvocation
| FieldAccess | FieldAccess
| ArrayAccess | ArrayAccess
@ -211,7 +260,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
| MethodName "(" ")" | MethodName "(" ")"
FieldAccess = Primary "." Identifier FieldAccess = Primary "." Identifier
| "super" "." Identifier | "super" "." Identifier
ArrayAccess = Primary "[" Expression "]" ArrayAccess = Primary "[" Expression "]"
| ExpressionName "[" Expression "]" | ExpressionName "[" Expression "]"
ClassOrInterfaceType = ClassName | InterfaceTypeName ClassOrInterfaceType = ClassName | InterfaceTypeName
ClassName = "C" | "D" ClassName = "C" | "D"

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib peg.parsers unicode.categories multiline combinators.lib
splitting accessors effects sequences.deep ; splitting accessors effects sequences.deep peg.search ;
IN: peg.ebnf IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
@ -308,7 +308,7 @@ M: ebnf-var (transform) ( ast -- parser )
dup vars get push [ dupd set ] curry action ; dup vars get push [ dupd set ] curry action ;
M: ebnf-terminal (transform) ( ast -- parser ) M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ; symbol>> token ;
: parser-not-found ( name -- * ) : parser-not-found ( name -- * )
[ [
@ -317,7 +317,7 @@ M: ebnf-terminal (transform) ( ast -- parser )
M: ebnf-non-terminal (transform) ( ast -- parser ) M: ebnf-non-terminal (transform) ( ast -- parser )
symbol>> [ symbol>> [
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ sp , \ nip , , \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
] [ ] make box ; ] [ ] make box ;
: transform-ebnf ( string -- object ) : transform-ebnf ( string -- object )
@ -340,10 +340,13 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ] curry ; [ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing : replace-escapes ( string -- string )
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
: EBNF: : EBNF:
CREATE-WORD dup CREATE-WORD dup
";EBNF" parse-multiline-string ";EBNF" parse-multiline-string replace-escapes
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing

View File

@ -30,6 +30,14 @@ SYMBOL: fail
SYMBOL: lrstack SYMBOL: lrstack
SYMBOL: heads SYMBOL: heads
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-pegs ( -- )
H{ } clone \ delegates set-global ;
reset-pegs
TUPLE: memo-entry ans pos ; TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry C: <memo-entry> memo-entry
@ -253,14 +261,6 @@ SYMBOL: id
1 id set-global 0 1 id set-global 0
] if* ; ] if* ;
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-delegates ( -- )
H{ } clone \ delegates set-global ;
reset-delegates
: init-parser ( parser -- parser ) : init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers #! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id. #! get a delegate with the same id.
@ -590,7 +590,13 @@ PRIVATE>
#! not a cached one. This is because the same box, #! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word #! compiled twice can have a different compiled word
#! due to running at compile time. #! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ; #! Why the [ ] action at the end? Box parsers don't get
#! memoized during parsing due to all box parsers being
#! unique. This breaks left recursion detection during the
#! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed...
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
: PEG: : PEG:
(:) [ (:) [