Various peg/ebnf fixes
- Box parsers were broken when involved in left recursion detection - ebnf no longer implicitly ignores white space between terminates/non-terminals - ebnf now handles \t and \n in grammars so productions to detect white space work - reset-delegates is now reset-pegsdb4
parent
122fd50d4a
commit
6b454eed36
|
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
|
|||
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" " " "b" } } [
|
||||
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\t" "b" } } [
|
||||
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\n" "b" } } [
|
||||
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" f "b" } } [
|
||||
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" " " "b" } } [
|
||||
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
|
||||
{ V{ "a" "\t" "b" } } [
|
||||
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\n" "b" } } [
|
||||
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
|
||||
] unit-test
|
||||
|
||||
{ V{ V{ 49 } "+" V{ 49 } } } [
|
||||
#! Test direct left recursion.
|
||||
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||
|
@ -200,7 +249,7 @@ IN: peg.ebnf.tests
|
|||
|
||||
EBNF: primary
|
||||
Primary = PrimaryNoNewArray
|
||||
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||
| MethodInvocation
|
||||
| FieldAccess
|
||||
| ArrayAccess
|
||||
|
@ -211,7 +260,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
|
|||
| MethodName "(" ")"
|
||||
FieldAccess = Primary "." Identifier
|
||||
| "super" "." Identifier
|
||||
ArrayAccess = Primary "[" Expression "]"
|
||||
ArrayAccess = Primary "[" Expression "]"
|
||||
| ExpressionName "[" Expression "]"
|
||||
ClassOrInterfaceType = ClassName | InterfaceTypeName
|
||||
ClassName = "C" | "D"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: kernel compiler.units parser words arrays strings math.parser sequences
|
||||
quotations vectors namespaces math assocs continuations peg
|
||||
peg.parsers unicode.categories multiline combinators.lib
|
||||
splitting accessors effects sequences.deep ;
|
||||
splitting accessors effects sequences.deep peg.search ;
|
||||
IN: peg.ebnf
|
||||
|
||||
TUPLE: ebnf-non-terminal symbol ;
|
||||
|
@ -308,7 +308,7 @@ M: ebnf-var (transform) ( ast -- parser )
|
|||
dup vars get push [ dupd set ] curry action ;
|
||||
|
||||
M: ebnf-terminal (transform) ( ast -- parser )
|
||||
symbol>> token sp ;
|
||||
symbol>> token ;
|
||||
|
||||
: parser-not-found ( name -- * )
|
||||
[
|
||||
|
@ -317,7 +317,7 @@ M: ebnf-terminal (transform) ( ast -- parser )
|
|||
|
||||
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||
symbol>> [
|
||||
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ sp , \ nip ,
|
||||
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
|
||||
] [ ] make box ;
|
||||
|
||||
: transform-ebnf ( string -- object )
|
||||
|
@ -340,10 +340,13 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ] curry ;
|
||||
|
||||
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
||||
: replace-escapes ( string -- string )
|
||||
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
|
||||
|
||||
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
|
||||
|
||||
: EBNF:
|
||||
CREATE-WORD dup
|
||||
";EBNF" parse-multiline-string
|
||||
";EBNF" parse-multiline-string replace-escapes
|
||||
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
|
||||
|
||||
|
|
|
@ -30,6 +30,14 @@ SYMBOL: fail
|
|||
SYMBOL: lrstack
|
||||
SYMBOL: heads
|
||||
|
||||
: delegates ( -- cache )
|
||||
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||
|
||||
: reset-pegs ( -- )
|
||||
H{ } clone \ delegates set-global ;
|
||||
|
||||
reset-pegs
|
||||
|
||||
TUPLE: memo-entry ans pos ;
|
||||
C: <memo-entry> memo-entry
|
||||
|
||||
|
@ -253,14 +261,6 @@ SYMBOL: id
|
|||
1 id set-global 0
|
||||
] if* ;
|
||||
|
||||
: delegates ( -- cache )
|
||||
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||
|
||||
: reset-delegates ( -- )
|
||||
H{ } clone \ delegates set-global ;
|
||||
|
||||
reset-delegates
|
||||
|
||||
: init-parser ( parser -- parser )
|
||||
#! Set the delegate for the parser. Equivalent parsers
|
||||
#! get a delegate with the same id.
|
||||
|
@ -590,7 +590,13 @@ PRIVATE>
|
|||
#! not a cached one. This is because the same box,
|
||||
#! compiled twice can have a different compiled word
|
||||
#! due to running at compile time.
|
||||
box-parser construct-boa next-id f <parser> over set-delegate ;
|
||||
#! Why the [ ] action at the end? Box parsers don't get
|
||||
#! memoized during parsing due to all box parsers being
|
||||
#! unique. This breaks left recursion detection during the
|
||||
#! parse. The action adds an indirection with a parser type
|
||||
#! that gets memoized and fixes this. Need to rethink how
|
||||
#! to fix boxes so this isn't needed...
|
||||
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
|
||||
|
||||
: PEG:
|
||||
(:) [
|
||||
|
|
Loading…
Reference in New Issue