Various peg/ebnf fixes
- Box parsers were broken when involved in left recursion detection - ebnf no longer implicitly ignores white space between terminates/non-terminals - ebnf now handles \t and \n in grammars so productions to detect white space work - reset-delegates is now reset-pegsdb4
parent
122fd50d4a
commit
6b454eed36
|
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
|
||||||
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
|
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
{ f } [
|
||||||
|
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" " " "b" } } [
|
||||||
|
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "\t" "b" } } [
|
||||||
|
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "\n" "b" } } [
|
||||||
|
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" f "b" } } [
|
||||||
|
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" " " "b" } } [
|
||||||
|
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
|
||||||
|
{ V{ "a" "\t" "b" } } [
|
||||||
|
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "\n" "b" } } [
|
||||||
|
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "b" } } [
|
||||||
|
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "b" } } [
|
||||||
|
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "a" "b" } } [
|
||||||
|
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ f } [
|
||||||
|
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
|
||||||
|
] unit-test
|
||||||
|
|
||||||
{ V{ V{ 49 } "+" V{ 49 } } } [
|
{ V{ V{ 49 } "+" V{ 49 } } } [
|
||||||
#! Test direct left recursion.
|
#! Test direct left recursion.
|
||||||
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||||
|
@ -200,7 +249,7 @@ IN: peg.ebnf.tests
|
||||||
|
|
||||||
EBNF: primary
|
EBNF: primary
|
||||||
Primary = PrimaryNoNewArray
|
Primary = PrimaryNoNewArray
|
||||||
PrimaryNoNewArray = ClassInstanceCreationExpression
|
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||||
| MethodInvocation
|
| MethodInvocation
|
||||||
| FieldAccess
|
| FieldAccess
|
||||||
| ArrayAccess
|
| ArrayAccess
|
||||||
|
@ -211,7 +260,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
|
||||||
| MethodName "(" ")"
|
| MethodName "(" ")"
|
||||||
FieldAccess = Primary "." Identifier
|
FieldAccess = Primary "." Identifier
|
||||||
| "super" "." Identifier
|
| "super" "." Identifier
|
||||||
ArrayAccess = Primary "[" Expression "]"
|
ArrayAccess = Primary "[" Expression "]"
|
||||||
| ExpressionName "[" Expression "]"
|
| ExpressionName "[" Expression "]"
|
||||||
ClassOrInterfaceType = ClassName | InterfaceTypeName
|
ClassOrInterfaceType = ClassName | InterfaceTypeName
|
||||||
ClassName = "C" | "D"
|
ClassName = "C" | "D"
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
USING: kernel compiler.units parser words arrays strings math.parser sequences
|
USING: kernel compiler.units parser words arrays strings math.parser sequences
|
||||||
quotations vectors namespaces math assocs continuations peg
|
quotations vectors namespaces math assocs continuations peg
|
||||||
peg.parsers unicode.categories multiline combinators.lib
|
peg.parsers unicode.categories multiline combinators.lib
|
||||||
splitting accessors effects sequences.deep ;
|
splitting accessors effects sequences.deep peg.search ;
|
||||||
IN: peg.ebnf
|
IN: peg.ebnf
|
||||||
|
|
||||||
TUPLE: ebnf-non-terminal symbol ;
|
TUPLE: ebnf-non-terminal symbol ;
|
||||||
|
@ -308,7 +308,7 @@ M: ebnf-var (transform) ( ast -- parser )
|
||||||
dup vars get push [ dupd set ] curry action ;
|
dup vars get push [ dupd set ] curry action ;
|
||||||
|
|
||||||
M: ebnf-terminal (transform) ( ast -- parser )
|
M: ebnf-terminal (transform) ( ast -- parser )
|
||||||
symbol>> token sp ;
|
symbol>> token ;
|
||||||
|
|
||||||
: parser-not-found ( name -- * )
|
: parser-not-found ( name -- * )
|
||||||
[
|
[
|
||||||
|
@ -317,7 +317,7 @@ M: ebnf-terminal (transform) ( ast -- parser )
|
||||||
|
|
||||||
M: ebnf-non-terminal (transform) ( ast -- parser )
|
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
symbol>> [
|
symbol>> [
|
||||||
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ sp , \ nip ,
|
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
|
||||||
] [ ] make box ;
|
] [ ] make box ;
|
||||||
|
|
||||||
: transform-ebnf ( string -- object )
|
: transform-ebnf ( string -- object )
|
||||||
|
@ -340,10 +340,13 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
||||||
[ compiled-parse ] curry [ with-scope ] curry ;
|
[ compiled-parse ] curry [ with-scope ] curry ;
|
||||||
|
|
||||||
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
: replace-escapes ( string -- string )
|
||||||
|
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
|
||||||
|
|
||||||
|
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
|
||||||
|
|
||||||
: EBNF:
|
: EBNF:
|
||||||
CREATE-WORD dup
|
CREATE-WORD dup
|
||||||
";EBNF" parse-multiline-string
|
";EBNF" parse-multiline-string replace-escapes
|
||||||
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
|
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,14 @@ SYMBOL: fail
|
||||||
SYMBOL: lrstack
|
SYMBOL: lrstack
|
||||||
SYMBOL: heads
|
SYMBOL: heads
|
||||||
|
|
||||||
|
: delegates ( -- cache )
|
||||||
|
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||||
|
|
||||||
|
: reset-pegs ( -- )
|
||||||
|
H{ } clone \ delegates set-global ;
|
||||||
|
|
||||||
|
reset-pegs
|
||||||
|
|
||||||
TUPLE: memo-entry ans pos ;
|
TUPLE: memo-entry ans pos ;
|
||||||
C: <memo-entry> memo-entry
|
C: <memo-entry> memo-entry
|
||||||
|
|
||||||
|
@ -253,14 +261,6 @@ SYMBOL: id
|
||||||
1 id set-global 0
|
1 id set-global 0
|
||||||
] if* ;
|
] if* ;
|
||||||
|
|
||||||
: delegates ( -- cache )
|
|
||||||
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
|
||||||
|
|
||||||
: reset-delegates ( -- )
|
|
||||||
H{ } clone \ delegates set-global ;
|
|
||||||
|
|
||||||
reset-delegates
|
|
||||||
|
|
||||||
: init-parser ( parser -- parser )
|
: init-parser ( parser -- parser )
|
||||||
#! Set the delegate for the parser. Equivalent parsers
|
#! Set the delegate for the parser. Equivalent parsers
|
||||||
#! get a delegate with the same id.
|
#! get a delegate with the same id.
|
||||||
|
@ -590,7 +590,13 @@ PRIVATE>
|
||||||
#! not a cached one. This is because the same box,
|
#! not a cached one. This is because the same box,
|
||||||
#! compiled twice can have a different compiled word
|
#! compiled twice can have a different compiled word
|
||||||
#! due to running at compile time.
|
#! due to running at compile time.
|
||||||
box-parser construct-boa next-id f <parser> over set-delegate ;
|
#! Why the [ ] action at the end? Box parsers don't get
|
||||||
|
#! memoized during parsing due to all box parsers being
|
||||||
|
#! unique. This breaks left recursion detection during the
|
||||||
|
#! parse. The action adds an indirection with a parser type
|
||||||
|
#! that gets memoized and fixes this. Need to rethink how
|
||||||
|
#! to fix boxes so this isn't needed...
|
||||||
|
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
|
||||||
|
|
||||||
: PEG:
|
: PEG:
|
||||||
(:) [
|
(:) [
|
||||||
|
|
Loading…
Reference in New Issue