Add support for ensure-not and parsing any single character to EBNF

This allows, for example:
  foo = {!("_" | "-") .}

This will match zero or more of any character, except for _ or -
db4
Chris Double 2008-03-19 19:15:52 +13:00
parent 208c88c449
commit 64135b73e1
2 changed files with 28 additions and 0 deletions

View File

@ -114,3 +114,4 @@ IN: peg.ebnf.tests
"foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol
] unit-test ] unit-test

View File

@ -7,6 +7,8 @@ IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-terminal symbol ;
TUPLE: ebnf-any-character ;
TUPLE: ebnf-ensure-not group ;
TUPLE: ebnf-choice options ; TUPLE: ebnf-choice options ;
TUPLE: ebnf-sequence elements ; TUPLE: ebnf-sequence elements ;
TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-repeat0 group ;
@ -17,6 +19,8 @@ TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-non-terminal> ebnf-non-terminal
C: <ebnf-terminal> ebnf-terminal C: <ebnf-terminal> ebnf-terminal
C: <ebnf-any-character> ebnf-any-character
C: <ebnf-ensure-not> ebnf-ensure-not
C: <ebnf-choice> ebnf-choice C: <ebnf-choice> ebnf-choice
C: <ebnf-sequence> ebnf-sequence C: <ebnf-sequence> ebnf-sequence
C: <ebnf-repeat0> ebnf-repeat0 C: <ebnf-repeat0> ebnf-repeat0
@ -61,6 +65,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id )
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
] [ ] make delay sp store-parser ; ] [ ] make delay sp store-parser ;
M: ebnf-any-character (generate-parser) ( ast -- id )
drop [ drop t ] satisfy store-parser ;
M: ebnf-choice (generate-parser) ( ast -- id ) M: ebnf-choice (generate-parser) ( ast -- id )
ebnf-choice-options [ ebnf-choice-options [
generate-parser get-parser generate-parser get-parser
@ -71,6 +78,9 @@ M: ebnf-sequence (generate-parser) ( ast -- id )
generate-parser get-parser generate-parser get-parser
] map seq store-parser ; ] map seq store-parser ;
M: ebnf-ensure-not (generate-parser) ( ast -- id )
ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ;
M: ebnf-repeat0 (generate-parser) ( ast -- id ) M: ebnf-repeat0 (generate-parser) ( ast -- id )
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
@ -136,6 +146,8 @@ DEFER: 'rhs'
[ dup CHAR: ( = ] [ dup CHAR: ( = ]
[ dup CHAR: ] = ] [ dup CHAR: ] = ]
[ dup CHAR: [ = ] [ dup CHAR: [ = ]
[ dup CHAR: . = ]
[ dup CHAR: ! = ]
} || not nip } || not nip
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
@ -144,6 +156,10 @@ DEFER: 'rhs'
#! and it represents the literal value of the identifier. #! and it represents the literal value of the identifier.
'identifier' [ <ebnf-terminal> ] action ; 'identifier' [ <ebnf-terminal> ] action ;
: 'any-character' ( -- parser )
#! A parser to match the symbol for any character match.
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
: 'element' ( -- parser ) : 'element' ( -- parser )
#! An element of a rule. It can be a terminal or a #! An element of a rule. It can be a terminal or a
#! non-terminal but must not be followed by a "=". #! non-terminal but must not be followed by a "=".
@ -153,6 +169,7 @@ DEFER: 'rhs'
[ [
'non-terminal' , 'non-terminal' ,
'terminal' , 'terminal' ,
'any-character' ,
] choice* , ] choice* ,
"=" syntax ensure-not , "=" syntax ensure-not ,
] seq* [ first ] action ; ] seq* [ first ] action ;
@ -174,10 +191,20 @@ DEFER: 'choice'
: 'optional' ( -- parser ) : 'optional' ( -- parser )
"[" [ <ebnf-optional> ] "]" grouped ; "[" [ <ebnf-optional> ] "]" grouped ;
: 'ensure-not' ( -- parser )
#! Parses the '!' syntax to ensure that
#! something that matches the following elements do
#! not exist in the parse stream.
[
"!" syntax ,
'group' sp ,
] seq* [ first <ebnf-ensure-not> ] action ;
: 'sequence' ( -- parser ) : 'sequence' ( -- parser )
#! A sequence of terminals and non-terminals, including #! A sequence of terminals and non-terminals, including
#! groupings of those. #! groupings of those.
[ [
'ensure-not' sp ,
'element' sp , 'element' sp ,
'group' sp , 'group' sp ,
'repeat0' sp , 'repeat0' sp ,