Various peg/ebnf fixes

- Box parsers were broken when involved in left recursion detection
- ebnf no longer implicitly ignores white space between terminates/non-terminals
- ebnf now handles \t and \n in grammars so productions to detect white space work
- reset-delegates is now reset-pegs
db4
Chris Double 2008-04-02 12:59:12 +13:00
parent 122fd50d4a
commit 6b454eed36
3 changed files with 74 additions and 16 deletions

View File

@ -180,6 +180,55 @@ IN: peg.ebnf.tests
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ f } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" f "b" } } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ f } [
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
@ -200,7 +249,7 @@ IN: peg.ebnf.tests
EBNF: primary
Primary = PrimaryNoNewArray
PrimaryNoNewArray = ClassInstanceCreationExpression
PrimaryNoNewArray = ClassInstanceCreationExpression
| MethodInvocation
| FieldAccess
| ArrayAccess
@ -211,7 +260,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
| MethodName "(" ")"
FieldAccess = Primary "." Identifier
| "super" "." Identifier
ArrayAccess = Primary "[" Expression "]"
ArrayAccess = Primary "[" Expression "]"
| ExpressionName "[" Expression "]"
ClassOrInterfaceType = ClassName | InterfaceTypeName
ClassName = "C" | "D"

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib
splitting accessors effects sequences.deep ;
splitting accessors effects sequences.deep peg.search ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
@ -308,7 +308,7 @@ M: ebnf-var (transform) ( ast -- parser )
dup vars get push [ dupd set ] curry action ;
M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ;
symbol>> token ;
: parser-not-found ( name -- * )
[
@ -317,7 +317,7 @@ M: ebnf-terminal (transform) ( ast -- parser )
M: ebnf-non-terminal (transform) ( ast -- parser )
symbol>> [
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ sp , \ nip ,
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
] [ ] make box ;
: transform-ebnf ( string -- object )
@ -340,10 +340,13 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
: replace-escapes ( string -- string )
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
: EBNF:
CREATE-WORD dup
";EBNF" parse-multiline-string
";EBNF" parse-multiline-string replace-escapes
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing

View File

@ -30,6 +30,14 @@ SYMBOL: fail
SYMBOL: lrstack
SYMBOL: heads
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-pegs ( -- )
H{ } clone \ delegates set-global ;
reset-pegs
TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry
@ -253,14 +261,6 @@ SYMBOL: id
1 id set-global 0
] if* ;
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-delegates ( -- )
H{ } clone \ delegates set-global ;
reset-delegates
: init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id.
@ -590,7 +590,13 @@ PRIVATE>
#! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word
#! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ;
#! Why the [ ] action at the end? Box parsers don't get
#! memoized during parsing due to all box parsers being
#! unique. This breaks left recursion detection during the
#! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed...
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
: PEG:
(:) [