peg: some cleanup.
parent
f562bc3385
commit
e576fb5fec
|
@ -2,7 +2,6 @@ USING: accessors debugger io kernel math.parser peg prettyprint
|
|||
sequences ;
|
||||
IN: peg.debugger
|
||||
|
||||
|
||||
M: parse-error error.
|
||||
[
|
||||
"Peg parsing error at character position " write
|
||||
|
|
|
@ -1,47 +1,47 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel words arrays strings math.parser
|
||||
sequences quotations vectors namespaces make math assocs
|
||||
continuations peg peg.parsers unicode.categories multiline
|
||||
splitting accessors effects sequences.deep peg.search
|
||||
combinators.short-circuit lexer io.streams.string stack-checker
|
||||
io combinators parser summary ;
|
||||
USING: accessors arrays assocs combinators
|
||||
combinators.short-circuit effects io.streams.string kernel make
|
||||
math.parser multiline namespaces parser peg peg.parsers
|
||||
peg.search quotations sequences splitting stack-checker strings
|
||||
summary unicode.categories words ;
|
||||
FROM: compiler.units => with-compilation-unit ;
|
||||
FROM: vocabs.parser => search ;
|
||||
FROM: peg.search => replace ;
|
||||
IN: peg.ebnf
|
||||
|
||||
: rule ( name word -- parser )
|
||||
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
||||
"ebnf-parser" word-prop at ;
|
||||
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
||||
"ebnf-parser" word-prop at ;
|
||||
|
||||
ERROR: no-rule rule parser ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: lookup-rule ( rule parser -- rule' )
|
||||
2dup rule [ 2nip ] [ no-rule ] if* ;
|
||||
2dup rule [ 2nip ] [ no-rule ] if* ;
|
||||
|
||||
TUPLE: tokenizer-tuple any one many ;
|
||||
|
||||
: default-tokenizer ( -- tokenizer )
|
||||
T{ tokenizer-tuple f
|
||||
[ any-char ]
|
||||
[ token ]
|
||||
[ [ = ] curry any-char swap semantic ]
|
||||
} ;
|
||||
T{ tokenizer-tuple f
|
||||
[ any-char ]
|
||||
[ token ]
|
||||
[ [ = ] curry any-char swap semantic ]
|
||||
} ;
|
||||
|
||||
: parser-tokenizer ( parser -- tokenizer )
|
||||
[ 1quotation ] keep
|
||||
[ swap [ = ] curry semantic ] curry dup \ tokenizer-tuple boa ;
|
||||
[ 1quotation ] keep
|
||||
[ swap [ = ] curry semantic ] curry dup \ tokenizer-tuple boa ;
|
||||
|
||||
: rule-tokenizer ( name word -- tokenizer )
|
||||
rule parser-tokenizer ;
|
||||
rule parser-tokenizer ;
|
||||
|
||||
: tokenizer ( -- word )
|
||||
\ tokenizer get-global [ default-tokenizer ] unless* ;
|
||||
\ tokenizer get-global [ default-tokenizer ] unless* ;
|
||||
|
||||
: reset-tokenizer ( -- )
|
||||
default-tokenizer \ tokenizer set-global ;
|
||||
default-tokenizer \ tokenizer set-global ;
|
||||
|
||||
ERROR: no-tokenizer name ;
|
||||
|
||||
|
@ -49,8 +49,8 @@ M: no-tokenizer summary
|
|||
drop "Tokenizer not found" ;
|
||||
|
||||
SYNTAX: TOKENIZER:
|
||||
scan-word-name dup search [ nip ] [ no-tokenizer ] if*
|
||||
execute( -- tokenizer ) \ tokenizer set-global ;
|
||||
scan-word-name dup search [ nip ] [ no-tokenizer ] if*
|
||||
execute( -- tokenizer ) \ tokenizer set-global ;
|
||||
|
||||
TUPLE: ebnf-non-terminal symbol ;
|
||||
TUPLE: ebnf-terminal symbol ;
|
||||
|
@ -95,279 +95,286 @@ C: <ebnf-semantic> ebnf-semantic
|
|||
C: <ebnf> ebnf
|
||||
|
||||
: filter-hidden ( seq -- seq )
|
||||
#! Remove elements that produce no AST from sequence
|
||||
[ ebnf-ensure-not? not ] filter [ ebnf-ensure? not ] filter ;
|
||||
#! Remove elements that produce no AST from sequence
|
||||
[ ebnf-ensure-not? not ] filter [ ebnf-ensure? not ] filter ;
|
||||
|
||||
: syntax ( string -- parser )
|
||||
#! Parses the string, ignoring white space, and
|
||||
#! does not put the result in the AST.
|
||||
token sp hide ;
|
||||
#! Parses the string, ignoring white space, and
|
||||
#! does not put the result in the AST.
|
||||
token sp hide ;
|
||||
|
||||
: syntax-pack ( begin parser end -- parser )
|
||||
#! Parse 'parser' surrounded by syntax elements
|
||||
#! begin and end.
|
||||
[ syntax ] 2dip syntax pack ;
|
||||
#! Parse 'parser' surrounded by syntax elements
|
||||
#! begin and end.
|
||||
[ syntax ] 2dip syntax pack ;
|
||||
|
||||
#! Don't want to use 'replace' in an action since replace doesn't infer.
|
||||
#! Do the compilation of the peg at parse time and call (replace).
|
||||
PEG: escaper ( string -- ast )
|
||||
[
|
||||
"\\t" token [ drop "\t" ] action ,
|
||||
"\\n" token [ drop "\n" ] action ,
|
||||
"\\r" token [ drop "\r" ] action ,
|
||||
"\\\\" token [ drop "\\" ] action ,
|
||||
] choice* any-char-parser 2array choice repeat0 ;
|
||||
[
|
||||
"\\t" token [ drop "\t" ] action ,
|
||||
"\\n" token [ drop "\n" ] action ,
|
||||
"\\r" token [ drop "\r" ] action ,
|
||||
"\\\\" token [ drop "\\" ] action ,
|
||||
] choice* any-char-parser 2array choice repeat0 ;
|
||||
|
||||
: replace-escapes ( string -- string )
|
||||
escaper sift [ [ tree-write ] each ] with-string-writer ;
|
||||
escaper sift [ [ tree-write ] each ] with-string-writer ;
|
||||
|
||||
: insert-escapes ( string -- string )
|
||||
[
|
||||
"\t" token [ drop "\\t" ] action ,
|
||||
"\n" token [ drop "\\n" ] action ,
|
||||
"\r" token [ drop "\\r" ] action ,
|
||||
] choice* replace ;
|
||||
[
|
||||
"\t" token [ drop "\\t" ] action ,
|
||||
"\n" token [ drop "\\n" ] action ,
|
||||
"\r" token [ drop "\\r" ] action ,
|
||||
] choice* replace ;
|
||||
|
||||
: 'identifier' ( -- parser )
|
||||
#! Return a parser that parses an identifer delimited by
|
||||
#! a quotation character. The quotation can be single
|
||||
#! or double quotes. The AST produced is the identifier
|
||||
#! between the quotes.
|
||||
[
|
||||
[ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by ,
|
||||
[ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by ,
|
||||
] choice* [ >string replace-escapes ] action ;
|
||||
#! Return a parser that parses an identifer delimited by
|
||||
#! a quotation character. The quotation can be single
|
||||
#! or double quotes. The AST produced is the identifier
|
||||
#! between the quotes.
|
||||
[
|
||||
[ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by ,
|
||||
[ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by ,
|
||||
] choice* [ >string replace-escapes ] action ;
|
||||
|
||||
: 'non-terminal' ( -- parser )
|
||||
#! A non-terminal is the name of another rule. It can
|
||||
#! be any non-blank character except for characters used
|
||||
#! in the EBNF syntax itself.
|
||||
[
|
||||
{
|
||||
[ blank? ]
|
||||
[ CHAR: " = ]
|
||||
[ CHAR: ' = ]
|
||||
[ CHAR: | = ]
|
||||
[ CHAR: { = ]
|
||||
[ CHAR: } = ]
|
||||
[ CHAR: = = ]
|
||||
[ CHAR: ) = ]
|
||||
[ CHAR: ( = ]
|
||||
[ CHAR: ] = ]
|
||||
[ CHAR: [ = ]
|
||||
[ CHAR: . = ]
|
||||
[ CHAR: ! = ]
|
||||
[ CHAR: & = ]
|
||||
[ CHAR: * = ]
|
||||
[ CHAR: + = ]
|
||||
[ CHAR: ? = ]
|
||||
[ CHAR: : = ]
|
||||
[ CHAR: ~ = ]
|
||||
[ CHAR: < = ]
|
||||
[ CHAR: > = ]
|
||||
} 1|| not
|
||||
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
|
||||
#! A non-terminal is the name of another rule. It can
|
||||
#! be any non-blank character except for characters used
|
||||
#! in the EBNF syntax itself.
|
||||
[
|
||||
{
|
||||
[ blank? ]
|
||||
[ CHAR: " = ]
|
||||
[ CHAR: ' = ]
|
||||
[ CHAR: | = ]
|
||||
[ CHAR: { = ]
|
||||
[ CHAR: } = ]
|
||||
[ CHAR: = = ]
|
||||
[ CHAR: ) = ]
|
||||
[ CHAR: ( = ]
|
||||
[ CHAR: ] = ]
|
||||
[ CHAR: [ = ]
|
||||
[ CHAR: . = ]
|
||||
[ CHAR: ! = ]
|
||||
[ CHAR: & = ]
|
||||
[ CHAR: * = ]
|
||||
[ CHAR: + = ]
|
||||
[ CHAR: ? = ]
|
||||
[ CHAR: : = ]
|
||||
[ CHAR: ~ = ]
|
||||
[ CHAR: < = ]
|
||||
[ CHAR: > = ]
|
||||
} 1|| not
|
||||
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
|
||||
|
||||
: 'terminal' ( -- parser )
|
||||
#! A terminal is an identifier enclosed in quotations
|
||||
#! and it represents the literal value of the identifier.
|
||||
'identifier' [ <ebnf-terminal> ] action ;
|
||||
#! A terminal is an identifier enclosed in quotations
|
||||
#! and it represents the literal value of the identifier.
|
||||
'identifier' [ <ebnf-terminal> ] action ;
|
||||
|
||||
: 'foreign-name' ( -- parser )
|
||||
#! Parse a valid foreign parser name
|
||||
[
|
||||
{
|
||||
[ blank? ]
|
||||
[ CHAR: > = ]
|
||||
} 1|| not
|
||||
] satisfy repeat1 [ >string ] action ;
|
||||
#! Parse a valid foreign parser name
|
||||
[
|
||||
{
|
||||
[ blank? ]
|
||||
[ CHAR: > = ]
|
||||
} 1|| not
|
||||
] satisfy repeat1 [ >string ] action ;
|
||||
|
||||
: 'foreign' ( -- parser )
|
||||
#! A foreign call is a call to a rule in another ebnf grammar
|
||||
[
|
||||
"<foreign" syntax ,
|
||||
'foreign-name' sp ,
|
||||
'foreign-name' sp optional ,
|
||||
">" syntax ,
|
||||
] seq* [ first2 <ebnf-foreign> ] action ;
|
||||
#! A foreign call is a call to a rule in another ebnf grammar
|
||||
[
|
||||
"<foreign" syntax ,
|
||||
'foreign-name' sp ,
|
||||
'foreign-name' sp optional ,
|
||||
">" syntax ,
|
||||
] seq* [ first2 <ebnf-foreign> ] action ;
|
||||
|
||||
: 'any-character' ( -- parser )
|
||||
#! A parser to match the symbol for any character match.
|
||||
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
|
||||
#! A parser to match the symbol for any character match.
|
||||
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
|
||||
|
||||
: 'range-parser' ( -- parser )
|
||||
#! Match the syntax for declaring character ranges
|
||||
[
|
||||
[ "[" syntax , "[" token ensure-not , ] seq* hide ,
|
||||
[ CHAR: ] = not ] satisfy repeat1 ,
|
||||
"]" syntax ,
|
||||
] seq* [ first >string <ebnf-range> ] action ;
|
||||
#! Match the syntax for declaring character ranges
|
||||
[
|
||||
[ "[" syntax , "[" token ensure-not , ] seq* hide ,
|
||||
[ CHAR: ] = not ] satisfy repeat1 ,
|
||||
"]" syntax ,
|
||||
] seq* [ first >string <ebnf-range> ] action ;
|
||||
|
||||
: ('element') ( -- parser )
|
||||
#! An element of a rule. It can be a terminal or a
|
||||
#! non-terminal but must not be followed by a "=".
|
||||
#! The latter indicates that it is the beginning of a
|
||||
#! new rule.
|
||||
[
|
||||
#! An element of a rule. It can be a terminal or a
|
||||
#! non-terminal but must not be followed by a "=".
|
||||
#! The latter indicates that it is the beginning of a
|
||||
#! new rule.
|
||||
[
|
||||
[
|
||||
'non-terminal' ,
|
||||
'terminal' ,
|
||||
'foreign' ,
|
||||
'range-parser' ,
|
||||
'any-character' ,
|
||||
] choice*
|
||||
[ dup , "~" token hide , ] seq* [ first <ebnf-ignore> ] action ,
|
||||
[ dup , "*" token hide , ] seq* [ first <ebnf-repeat0> ] action ,
|
||||
[ dup , "+" token hide , ] seq* [ first <ebnf-repeat1> ] action ,
|
||||
[ dup , "?[" token ensure-not , "?" token hide , ] seq* [ first <ebnf-optional> ] action ,
|
||||
,
|
||||
] choice* ,
|
||||
[
|
||||
"=" syntax ensure-not ,
|
||||
"=>" syntax ensure ,
|
||||
] choice* ,
|
||||
] seq* [ first ] action ;
|
||||
[
|
||||
[
|
||||
'non-terminal' ,
|
||||
'terminal' ,
|
||||
'foreign' ,
|
||||
'range-parser' ,
|
||||
'any-character' ,
|
||||
] choice*
|
||||
[ dup , "~" token hide , ] seq* [ first <ebnf-ignore> ] action ,
|
||||
[ dup , "*" token hide , ] seq* [ first <ebnf-repeat0> ] action ,
|
||||
[ dup , "+" token hide , ] seq* [ first <ebnf-repeat1> ] action ,
|
||||
[ dup , "?[" token ensure-not , "?" token hide , ] seq* [ first <ebnf-optional> ] action ,
|
||||
,
|
||||
] choice* ,
|
||||
[
|
||||
"=" syntax ensure-not ,
|
||||
"=>" syntax ensure ,
|
||||
] choice* ,
|
||||
] seq* [ first ] action ;
|
||||
|
||||
DEFER: 'action'
|
||||
|
||||
: 'element' ( -- parser )
|
||||
[
|
||||
[
|
||||
('element') , ":" syntax ,
|
||||
"a-zA-Z_" range-pattern
|
||||
"a-zA-Z0-9_-" range-pattern repeat1 2seq [ first2 swap prefix >string ] action ,
|
||||
] seq* [ first2 <ebnf-var> ] action ,
|
||||
('element') ,
|
||||
] choice* ;
|
||||
[
|
||||
('element') , ":" syntax ,
|
||||
"a-zA-Z_" range-pattern
|
||||
"a-zA-Z0-9_-" range-pattern repeat1 2seq [ first2 swap prefix >string ] action ,
|
||||
] seq* [ first2 <ebnf-var> ] action ,
|
||||
('element') ,
|
||||
] choice* ;
|
||||
|
||||
DEFER: 'choice'
|
||||
|
||||
: grouped ( quot suffix -- parser )
|
||||
#! Parse a group of choices, with a suffix indicating
|
||||
#! the type of group (repeat0, repeat1, etc) and
|
||||
#! an quot that is the action that produces the AST.
|
||||
2dup
|
||||
[
|
||||
"(" [ 'choice' sp ] delay ")" syntax-pack
|
||||
swap 2seq
|
||||
[ first ] rot compose action ,
|
||||
"{" [ 'choice' sp ] delay "}" syntax-pack
|
||||
swap 2seq
|
||||
[ first <ebnf-whitespace> ] rot compose action ,
|
||||
] choice* ;
|
||||
: grouped ( quot suffix -- parser )
|
||||
#! Parse a group of choices, with a suffix indicating
|
||||
#! the type of group (repeat0, repeat1, etc) and
|
||||
#! an quot that is the action that produces the AST.
|
||||
2dup
|
||||
[
|
||||
"(" [ 'choice' sp ] delay ")" syntax-pack
|
||||
swap 2seq
|
||||
[ first ] rot compose action ,
|
||||
"{" [ 'choice' sp ] delay "}" syntax-pack
|
||||
swap 2seq
|
||||
[ first <ebnf-whitespace> ] rot compose action ,
|
||||
] choice* ;
|
||||
|
||||
: 'group' ( -- parser )
|
||||
#! A grouping with no suffix. Used for precedence.
|
||||
[ ] [
|
||||
"~" token sp ensure-not ,
|
||||
"*" token sp ensure-not ,
|
||||
"+" token sp ensure-not ,
|
||||
"?" token sp ensure-not ,
|
||||
] seq* hide grouped ;
|
||||
#! A grouping with no suffix. Used for precedence.
|
||||
[ ] [
|
||||
"~" token sp ensure-not ,
|
||||
"*" token sp ensure-not ,
|
||||
"+" token sp ensure-not ,
|
||||
"?" token sp ensure-not ,
|
||||
] seq* hide grouped ;
|
||||
|
||||
: 'ignore' ( -- parser )
|
||||
[ <ebnf-ignore> ] "~" syntax grouped ;
|
||||
[ <ebnf-ignore> ] "~" syntax grouped ;
|
||||
|
||||
: 'repeat0' ( -- parser )
|
||||
[ <ebnf-repeat0> ] "*" syntax grouped ;
|
||||
[ <ebnf-repeat0> ] "*" syntax grouped ;
|
||||
|
||||
: 'repeat1' ( -- parser )
|
||||
[ <ebnf-repeat1> ] "+" syntax grouped ;
|
||||
[ <ebnf-repeat1> ] "+" syntax grouped ;
|
||||
|
||||
: 'optional' ( -- parser )
|
||||
[ <ebnf-optional> ] "?" syntax grouped ;
|
||||
[ <ebnf-optional> ] "?" syntax grouped ;
|
||||
|
||||
: 'factor-code' ( -- parser )
|
||||
[
|
||||
"]]" token ensure-not ,
|
||||
"]?" token ensure-not ,
|
||||
[ drop t ] satisfy ,
|
||||
] seq* repeat0 [ "" concat-as ] action ;
|
||||
[
|
||||
"]]" token ensure-not ,
|
||||
"]?" token ensure-not ,
|
||||
[ drop t ] satisfy ,
|
||||
] seq* repeat0 [ "" concat-as ] action ;
|
||||
|
||||
: 'ensure-not' ( -- parser )
|
||||
#! Parses the '!' syntax to ensure that
|
||||
#! something that matches the following elements do
|
||||
#! not exist in the parse stream.
|
||||
[
|
||||
"!" syntax ,
|
||||
'group' sp ,
|
||||
] seq* [ first <ebnf-ensure-not> ] action ;
|
||||
#! Parses the '!' syntax to ensure that
|
||||
#! something that matches the following elements do
|
||||
#! not exist in the parse stream.
|
||||
[
|
||||
"!" syntax ,
|
||||
'group' sp ,
|
||||
] seq* [ first <ebnf-ensure-not> ] action ;
|
||||
|
||||
: 'ensure' ( -- parser )
|
||||
#! Parses the '&' syntax to ensure that
|
||||
#! something that matches the following elements does
|
||||
#! exist in the parse stream.
|
||||
[
|
||||
"&" syntax ,
|
||||
'group' sp ,
|
||||
] seq* [ first <ebnf-ensure> ] action ;
|
||||
#! Parses the '&' syntax to ensure that
|
||||
#! something that matches the following elements does
|
||||
#! exist in the parse stream.
|
||||
[
|
||||
"&" syntax ,
|
||||
'group' sp ,
|
||||
] seq* [ first <ebnf-ensure> ] action ;
|
||||
|
||||
: ('sequence') ( -- parser )
|
||||
#! A sequence of terminals and non-terminals, including
|
||||
#! groupings of those.
|
||||
[
|
||||
#! A sequence of terminals and non-terminals, including
|
||||
#! groupings of those.
|
||||
[
|
||||
'ensure-not' sp ,
|
||||
'ensure' sp ,
|
||||
'element' sp ,
|
||||
'group' sp ,
|
||||
'ignore' sp ,
|
||||
'repeat0' sp ,
|
||||
'repeat1' sp ,
|
||||
'optional' sp ,
|
||||
] choice*
|
||||
[ dup , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
|
||||
,
|
||||
] choice* ;
|
||||
[
|
||||
'ensure-not' sp ,
|
||||
'ensure' sp ,
|
||||
'element' sp ,
|
||||
'group' sp ,
|
||||
'ignore' sp ,
|
||||
'repeat0' sp ,
|
||||
'repeat1' sp ,
|
||||
'optional' sp ,
|
||||
] choice*
|
||||
[ dup , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
|
||||
,
|
||||
] choice* ;
|
||||
|
||||
: 'action' ( -- parser )
|
||||
"[[" 'factor-code' "]]" syntax-pack ;
|
||||
"[[" 'factor-code' "]]" syntax-pack ;
|
||||
|
||||
: 'semantic' ( -- parser )
|
||||
"?[" 'factor-code' "]?" syntax-pack ;
|
||||
"?[" 'factor-code' "]?" syntax-pack ;
|
||||
|
||||
: 'sequence' ( -- parser )
|
||||
#! A sequence of terminals and non-terminals, including
|
||||
#! groupings of those.
|
||||
[
|
||||
[ ('sequence') , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
|
||||
[ ('sequence') , 'semantic' , ] seq* [ first2 <ebnf-semantic> ] action ,
|
||||
('sequence') ,
|
||||
] choice* repeat1 [
|
||||
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
||||
] action ;
|
||||
#! A sequence of terminals and non-terminals, including
|
||||
#! groupings of those.
|
||||
[
|
||||
[ ('sequence') , 'action' , ] seq*
|
||||
[ first2 <ebnf-action> ] action ,
|
||||
|
||||
[ ('sequence') , 'semantic' , ] seq*
|
||||
[ first2 <ebnf-semantic> ] action ,
|
||||
|
||||
('sequence') ,
|
||||
] choice* repeat1 [
|
||||
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
||||
] action ;
|
||||
|
||||
: 'actioned-sequence' ( -- parser )
|
||||
[
|
||||
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
|
||||
'sequence' ,
|
||||
] choice* ;
|
||||
[
|
||||
[ 'sequence' , "=>" syntax , 'action' , ] seq*
|
||||
[ first2 <ebnf-action> ] action ,
|
||||
'sequence' ,
|
||||
] choice* ;
|
||||
|
||||
: 'choice' ( -- parser )
|
||||
'actioned-sequence' sp repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if ] action "|" token sp list-of [
|
||||
dup length 1 = [ first ] [ <ebnf-choice> ] if
|
||||
] action ;
|
||||
'actioned-sequence' sp repeat1 [
|
||||
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
||||
] action "|" token sp list-of [
|
||||
dup length 1 = [ first ] [ <ebnf-choice> ] if
|
||||
] action ;
|
||||
|
||||
: 'tokenizer' ( -- parser )
|
||||
[
|
||||
"tokenizer" syntax ,
|
||||
"=" syntax ,
|
||||
">" token ensure-not ,
|
||||
[ "default" token sp , 'choice' , ] choice* ,
|
||||
] seq* [ first <ebnf-tokenizer> ] action ;
|
||||
[
|
||||
"tokenizer" syntax ,
|
||||
"=" syntax ,
|
||||
">" token ensure-not ,
|
||||
[ "default" token sp , 'choice' , ] choice* ,
|
||||
] seq* [ first <ebnf-tokenizer> ] action ;
|
||||
|
||||
: 'rule' ( -- parser )
|
||||
[
|
||||
"tokenizer" token ensure-not ,
|
||||
'non-terminal' [ symbol>> ] action ,
|
||||
"=" syntax ,
|
||||
">" token ensure-not ,
|
||||
'choice' ,
|
||||
] seq* [ first2 <ebnf-rule> ] action ;
|
||||
[
|
||||
"tokenizer" token ensure-not ,
|
||||
'non-terminal' [ symbol>> ] action ,
|
||||
"=" syntax ,
|
||||
">" token ensure-not ,
|
||||
'choice' ,
|
||||
] seq* [ first2 <ebnf-rule> ] action ;
|
||||
|
||||
: 'ebnf' ( -- parser )
|
||||
[ 'tokenizer' sp , 'rule' sp , ] choice* repeat1 [ <ebnf> ] action ;
|
||||
[ 'tokenizer' sp , 'rule' sp , ] choice* repeat1 [ <ebnf> ] action ;
|
||||
|
||||
GENERIC: (transform) ( ast -- parser )
|
||||
|
||||
|
@ -376,202 +383,203 @@ SYMBOL: main
|
|||
SYMBOL: ignore-ws
|
||||
|
||||
: transform ( ast -- object )
|
||||
H{ } clone dup dup [
|
||||
f ignore-ws set
|
||||
parser set
|
||||
swap (transform)
|
||||
main set
|
||||
] with-variables ;
|
||||
H{ } clone dup dup [
|
||||
f ignore-ws set
|
||||
parser set
|
||||
swap (transform)
|
||||
main set
|
||||
] with-variables ;
|
||||
|
||||
M: ebnf (transform) ( ast -- parser )
|
||||
rules>> [ (transform) ] map last ;
|
||||
rules>> [ (transform) ] map last ;
|
||||
|
||||
M: ebnf-tokenizer (transform) ( ast -- parser )
|
||||
elements>> dup "default" = [
|
||||
drop default-tokenizer \ tokenizer set-global any-char
|
||||
] [
|
||||
(transform)
|
||||
dup parser-tokenizer \ tokenizer set-global
|
||||
] if ;
|
||||
elements>> dup "default" = [
|
||||
drop default-tokenizer \ tokenizer set-global any-char
|
||||
] [
|
||||
(transform)
|
||||
dup parser-tokenizer \ tokenizer set-global
|
||||
] if ;
|
||||
|
||||
ERROR: redefined-rule name ;
|
||||
|
||||
M: redefined-rule summary
|
||||
name>> "Rule '" "' defined more than once" surround ;
|
||||
name>> "Rule '" "' defined more than once" surround ;
|
||||
|
||||
M: ebnf-rule (transform) ( ast -- parser )
|
||||
dup elements>>
|
||||
(transform) [
|
||||
swap symbol>> dup get parser? [ redefined-rule ] [ set ] if
|
||||
] keep ;
|
||||
dup elements>>
|
||||
(transform) [
|
||||
swap symbol>> dup get parser? [ redefined-rule ] [ set ] if
|
||||
] keep ;
|
||||
|
||||
M: ebnf-sequence (transform) ( ast -- parser )
|
||||
#! If ignore-ws is set then each element of the sequence
|
||||
#! ignores leading whitespace. This is not inherited by
|
||||
#! subelements of the sequence.
|
||||
elements>> [
|
||||
f ignore-ws [ (transform) ] with-variable
|
||||
ignore-ws get [ sp ] when
|
||||
] map seq [ dup length 1 = [ first ] when ] action ;
|
||||
#! If ignore-ws is set then each element of the sequence
|
||||
#! ignores leading whitespace. This is not inherited by
|
||||
#! subelements of the sequence.
|
||||
elements>> [
|
||||
f ignore-ws [ (transform) ] with-variable
|
||||
ignore-ws get [ sp ] when
|
||||
] map seq [ dup length 1 = [ first ] when ] action ;
|
||||
|
||||
M: ebnf-choice (transform) ( ast -- parser )
|
||||
options>> [ (transform) ] map choice ;
|
||||
options>> [ (transform) ] map choice ;
|
||||
|
||||
M: ebnf-any-character (transform) ( ast -- parser )
|
||||
drop tokenizer any>> call( -- parser ) ;
|
||||
drop tokenizer any>> call( -- parser ) ;
|
||||
|
||||
M: ebnf-range (transform) ( ast -- parser )
|
||||
pattern>> range-pattern ;
|
||||
pattern>> range-pattern ;
|
||||
|
||||
: transform-group ( ast -- parser )
|
||||
#! convert a ast node with groups to a parser for that group
|
||||
group>> (transform) ;
|
||||
#! convert a ast node with groups to a parser for that group
|
||||
group>> (transform) ;
|
||||
|
||||
M: ebnf-ensure (transform) ( ast -- parser )
|
||||
transform-group ensure ;
|
||||
transform-group ensure ;
|
||||
|
||||
M: ebnf-ensure-not (transform) ( ast -- parser )
|
||||
transform-group ensure-not ;
|
||||
transform-group ensure-not ;
|
||||
|
||||
M: ebnf-ignore (transform) ( ast -- parser )
|
||||
transform-group [ drop ignore ] action ;
|
||||
transform-group [ drop ignore ] action ;
|
||||
|
||||
M: ebnf-repeat0 (transform) ( ast -- parser )
|
||||
transform-group repeat0 ;
|
||||
transform-group repeat0 ;
|
||||
|
||||
M: ebnf-repeat1 (transform) ( ast -- parser )
|
||||
transform-group repeat1 ;
|
||||
transform-group repeat1 ;
|
||||
|
||||
M: ebnf-optional (transform) ( ast -- parser )
|
||||
transform-group optional ;
|
||||
transform-group optional ;
|
||||
|
||||
M: ebnf-whitespace (transform) ( ast -- parser )
|
||||
t ignore-ws [ transform-group ] with-variable ;
|
||||
t ignore-ws [ transform-group ] with-variable ;
|
||||
|
||||
GENERIC: build-locals ( code ast -- code )
|
||||
|
||||
M: ebnf-sequence build-locals ( code ast -- code )
|
||||
#! Note the need to filter out this ebnf items that
|
||||
#! leave nothing in the AST
|
||||
elements>> filter-hidden dup length 1 = [
|
||||
first build-locals
|
||||
] [
|
||||
dup [ ebnf-var? ] any? not [
|
||||
drop
|
||||
#! Note the need to filter out this ebnf items that
|
||||
#! leave nothing in the AST
|
||||
elements>> filter-hidden dup length 1 = [
|
||||
first build-locals
|
||||
] [
|
||||
[
|
||||
"FROM: locals => [let :> ; FROM: sequences => nth ; FROM: kernel => nip over ; [let " %
|
||||
[
|
||||
over ebnf-var? [
|
||||
" " % # " over nth :> " %
|
||||
name>> %
|
||||
] [
|
||||
2drop
|
||||
] if
|
||||
] each-index
|
||||
" " %
|
||||
%
|
||||
" nip ]" %
|
||||
] "" make
|
||||
] if
|
||||
] if ;
|
||||
dup [ ebnf-var? ] any? not [
|
||||
drop
|
||||
] [
|
||||
[
|
||||
"FROM: locals => [let :> ; FROM: sequences => nth ; FROM: kernel => nip over ; [let " %
|
||||
[
|
||||
over ebnf-var? [
|
||||
" " % # " over nth :> " %
|
||||
name>> %
|
||||
] [
|
||||
2drop
|
||||
] if
|
||||
] each-index
|
||||
" " %
|
||||
%
|
||||
" nip ]" %
|
||||
] "" make
|
||||
] if
|
||||
] if ;
|
||||
|
||||
M: ebnf-var build-locals ( code ast -- code )
|
||||
[
|
||||
"FROM: locals => [let :> ; FROM: kernel => dup nip ; [let " %
|
||||
" dup :> " % name>> %
|
||||
" " %
|
||||
%
|
||||
" nip ]" %
|
||||
] "" make ;
|
||||
[
|
||||
"FROM: locals => [let :> ; FROM: kernel => dup nip ; [let " %
|
||||
" dup :> " % name>> %
|
||||
" " %
|
||||
%
|
||||
" nip ]" %
|
||||
] "" make ;
|
||||
|
||||
M: object build-locals ( code ast -- code )
|
||||
drop ;
|
||||
drop ;
|
||||
|
||||
ERROR: bad-effect quot effect ;
|
||||
|
||||
: check-action-effect ( quot -- quot )
|
||||
dup infer {
|
||||
{ [ dup ( a -- b ) effect<= ] [ drop ] }
|
||||
{ [ dup ( -- b ) effect<= ] [ drop [ drop ] prepose ] }
|
||||
[ bad-effect ]
|
||||
} cond ;
|
||||
dup infer {
|
||||
{ [ dup ( a -- b ) effect<= ] [ drop ] }
|
||||
{ [ dup ( -- b ) effect<= ] [ drop [ drop ] prepose ] }
|
||||
[ bad-effect ]
|
||||
} cond ;
|
||||
|
||||
: ebnf-transform ( ast -- parser quot )
|
||||
[ parser>> (transform) ]
|
||||
[ code>> insert-escapes ]
|
||||
[ parser>> ] tri build-locals
|
||||
[ string-lines parse-lines ] call( string -- quot ) ;
|
||||
[ parser>> (transform) ]
|
||||
[ code>> insert-escapes ]
|
||||
[ parser>> ] tri build-locals
|
||||
string-lines parse-lines ;
|
||||
|
||||
M: ebnf-action (transform) ( ast -- parser )
|
||||
ebnf-transform check-action-effect action ;
|
||||
ebnf-transform check-action-effect action ;
|
||||
|
||||
M: ebnf-semantic (transform) ( ast -- parser )
|
||||
ebnf-transform semantic ;
|
||||
ebnf-transform semantic ;
|
||||
|
||||
M: ebnf-var (transform) ( ast -- parser )
|
||||
parser>> (transform) ;
|
||||
parser>> (transform) ;
|
||||
|
||||
M: ebnf-terminal (transform) ( ast -- parser )
|
||||
symbol>> tokenizer one>> call( symbol -- parser ) ;
|
||||
symbol>> tokenizer one>> call( symbol -- parser ) ;
|
||||
|
||||
ERROR: ebnf-foreign-not-found name ;
|
||||
|
||||
M: ebnf-foreign-not-found summary
|
||||
name>> "Foreign word '" "' not found" surround ;
|
||||
name>> "Foreign word '" "' not found" surround ;
|
||||
|
||||
M: ebnf-foreign (transform) ( ast -- parser )
|
||||
dup word>> search [ word>> ebnf-foreign-not-found ] unless*
|
||||
swap rule>> [ main ] unless* over rule [
|
||||
nip
|
||||
] [
|
||||
execute( -- parser )
|
||||
] if* ;
|
||||
dup word>> search [ word>> ebnf-foreign-not-found ] unless*
|
||||
swap rule>> [ main ] unless* over rule [
|
||||
nip
|
||||
] [
|
||||
execute( -- parser )
|
||||
] if* ;
|
||||
|
||||
ERROR: parser-not-found name ;
|
||||
|
||||
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||
symbol>> [
|
||||
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
|
||||
] [ ] make box ;
|
||||
symbol>> [
|
||||
, \ dup , parser get , \ at ,
|
||||
[ parser-not-found ] , \ unless* , \ nip ,
|
||||
] [ ] make box ;
|
||||
|
||||
: transform-ebnf ( string -- object )
|
||||
'ebnf' parse transform ;
|
||||
'ebnf' parse transform ;
|
||||
|
||||
ERROR: unable-to-fully-parse-ebnf remaining ;
|
||||
|
||||
ERROR: could-not-parse-ebnf ;
|
||||
|
||||
: check-parse-result ( result -- result )
|
||||
[
|
||||
dup remaining>> [ blank? ] trim [
|
||||
unable-to-fully-parse-ebnf
|
||||
] unless-empty
|
||||
] [
|
||||
could-not-parse-ebnf
|
||||
] if* ;
|
||||
[
|
||||
dup remaining>> [ blank? ] trim [
|
||||
unable-to-fully-parse-ebnf
|
||||
] unless-empty
|
||||
] [
|
||||
could-not-parse-ebnf
|
||||
] if* ;
|
||||
|
||||
: parse-ebnf ( string -- hashtable )
|
||||
'ebnf' (parse) check-parse-result ast>> transform ;
|
||||
'ebnf' (parse) check-parse-result ast>> transform ;
|
||||
|
||||
: ebnf>quot ( string -- hashtable quot )
|
||||
parse-ebnf dup dup parser [ main of compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
|
||||
parse-ebnf dup dup parser [ main of compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
SYNTAX: <EBNF
|
||||
"EBNF>"
|
||||
reset-tokenizer parse-multiline-string parse-ebnf main of
|
||||
suffix! reset-tokenizer ;
|
||||
"EBNF>"
|
||||
reset-tokenizer parse-multiline-string parse-ebnf main of
|
||||
suffix! reset-tokenizer ;
|
||||
|
||||
SYNTAX: [EBNF
|
||||
"EBNF]"
|
||||
reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
suffix! \ call suffix! reset-tokenizer ;
|
||||
"EBNF]"
|
||||
reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
suffix! \ call suffix! reset-tokenizer ;
|
||||
|
||||
SYNTAX: EBNF:
|
||||
reset-tokenizer scan-new-word dup ";EBNF" parse-multiline-string
|
||||
ebnf>quot swapd
|
||||
( input -- ast ) define-declared "ebnf-parser" set-word-prop
|
||||
reset-tokenizer ;
|
||||
reset-tokenizer scan-new-word dup ";EBNF" parse-multiline-string
|
||||
ebnf>quot swapd
|
||||
( input -- ast ) define-declared "ebnf-parser" set-word-prop
|
||||
reset-tokenizer ;
|
||||
|
|
|
@ -7,31 +7,30 @@ IN: peg.parsers
|
|||
|
||||
TUPLE: just-parser p1 ;
|
||||
|
||||
CONSTANT: just-pattern
|
||||
[
|
||||
CONSTANT: just-pattern [
|
||||
dup [
|
||||
dup remaining>> empty? [ drop f ] unless
|
||||
dup remaining>> empty? [ drop f ] unless
|
||||
] when
|
||||
]
|
||||
|
||||
]
|
||||
|
||||
M: just-parser (compile) ( parser -- quot )
|
||||
p1>> compile-parser-quot just-pattern compose ;
|
||||
p1>> compile-parser-quot just-pattern compose ;
|
||||
|
||||
: just ( parser -- parser )
|
||||
just-parser boa wrap-peg ;
|
||||
just-parser boa wrap-peg ;
|
||||
|
||||
: 1token ( ch -- parser ) 1string token ;
|
||||
|
||||
: (list-of) ( items separator repeat1? -- parser )
|
||||
[ over 2seq ] dip [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq
|
||||
[ unclip 1vector swap first append ] action ;
|
||||
[ over 2seq ] dip [ repeat1 ] [ repeat0 ] if
|
||||
[ concat ] action 2seq
|
||||
[ unclip 1vector swap first append ] action ;
|
||||
|
||||
: list-of ( items separator -- parser )
|
||||
hide f (list-of) ;
|
||||
hide f (list-of) ;
|
||||
|
||||
: list-of-many ( items separator -- parser )
|
||||
hide t (list-of) ;
|
||||
hide t (list-of) ;
|
||||
|
||||
: epsilon ( -- parser ) V{ } token ;
|
||||
|
||||
|
@ -40,70 +39,70 @@ M: just-parser (compile) ( parser -- quot )
|
|||
<PRIVATE
|
||||
|
||||
: flatten-vectors ( pair -- vector )
|
||||
first2 append! ;
|
||||
first2 append! ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: exactly-n ( parser n -- parser' )
|
||||
swap <repetition> seq ;
|
||||
swap <repetition> seq ;
|
||||
|
||||
: at-most-n ( parser n -- parser' )
|
||||
[
|
||||
drop epsilon
|
||||
] [
|
||||
[ exactly-n ] [ 1 - at-most-n ] 2bi 2choice
|
||||
] if-zero ;
|
||||
[
|
||||
drop epsilon
|
||||
] [
|
||||
[ exactly-n ] [ 1 - at-most-n ] 2bi 2choice
|
||||
] if-zero ;
|
||||
|
||||
: at-least-n ( parser n -- parser' )
|
||||
dupd exactly-n swap repeat0 2seq
|
||||
[ flatten-vectors ] action ;
|
||||
dupd exactly-n swap repeat0 2seq
|
||||
[ flatten-vectors ] action ;
|
||||
|
||||
: from-m-to-n ( parser m n -- parser' )
|
||||
[ [ exactly-n ] 2keep ] dip swap - at-most-n 2seq
|
||||
[ flatten-vectors ] action ;
|
||||
[ [ exactly-n ] 2keep ] dip swap - at-most-n 2seq
|
||||
[ flatten-vectors ] action ;
|
||||
|
||||
: pack ( begin body end -- parser )
|
||||
[ hide ] 2dip hide 3seq [ first ] action ;
|
||||
[ hide ] 2dip hide 3seq [ first ] action ;
|
||||
|
||||
: surrounded-by ( parser begin end -- parser' )
|
||||
[ token ] bi@ swapd pack ;
|
||||
[ token ] bi@ swapd pack ;
|
||||
|
||||
: 'digit' ( -- parser )
|
||||
[ digit? ] satisfy [ digit> ] action ;
|
||||
[ digit? ] satisfy [ digit> ] action ;
|
||||
|
||||
: 'integer' ( -- parser )
|
||||
'digit' repeat1 [ 10 digits>integer ] action ;
|
||||
'digit' repeat1 [ 10 digits>integer ] action ;
|
||||
|
||||
: 'string' ( -- parser )
|
||||
[
|
||||
[ CHAR: " = ] satisfy hide ,
|
||||
[ CHAR: " = not ] satisfy repeat0 ,
|
||||
[ CHAR: " = ] satisfy hide ,
|
||||
] seq* [ first >string ] action ;
|
||||
[
|
||||
[ CHAR: " = ] satisfy hide ,
|
||||
[ CHAR: " = not ] satisfy repeat0 ,
|
||||
[ CHAR: " = ] satisfy hide ,
|
||||
] seq* [ first >string ] action ;
|
||||
|
||||
: (range-pattern) ( pattern -- string )
|
||||
#! Given a range pattern, produce a string containing
|
||||
#! all characters within that range.
|
||||
[
|
||||
any-char ,
|
||||
[ CHAR: - = ] satisfy hide ,
|
||||
any-char ,
|
||||
] seq* [
|
||||
first2 [a,b] >string
|
||||
] action
|
||||
replace ;
|
||||
#! Given a range pattern, produce a string containing
|
||||
#! all characters within that range.
|
||||
[
|
||||
any-char ,
|
||||
[ CHAR: - = ] satisfy hide ,
|
||||
any-char ,
|
||||
] seq* [
|
||||
first2 [a,b] >string
|
||||
] action
|
||||
replace ;
|
||||
|
||||
: range-pattern ( pattern -- parser )
|
||||
#! 'pattern' is a set of characters describing the
|
||||
#! parser to be produced. Any single character in
|
||||
#! the pattern matches that character. If the pattern
|
||||
#! begins with a ^ then the set is negated (the element
|
||||
#! matches any character not in the set). Any pair of
|
||||
#! characters separated with a dash (-) represents the
|
||||
#! range of characters from the first to the second,
|
||||
#! inclusive.
|
||||
dup first CHAR: ^ = [
|
||||
rest (range-pattern) [ member? not ] curry satisfy
|
||||
] [
|
||||
(range-pattern) [ member? ] curry satisfy
|
||||
] if ;
|
||||
#! 'pattern' is a set of characters describing the
|
||||
#! parser to be produced. Any single character in
|
||||
#! the pattern matches that character. If the pattern
|
||||
#! begins with a ^ then the set is negated (the element
|
||||
#! matches any character not in the set). Any pair of
|
||||
#! characters separated with a dash (-) represents the
|
||||
#! range of characters from the first to the second,
|
||||
#! inclusive.
|
||||
dup first CHAR: ^ = [
|
||||
rest (range-pattern) [ member? not ] curry satisfy
|
||||
] [
|
||||
(range-pattern) [ member? ] curry satisfy
|
||||
] if ;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
! Copyright (C) 2007, 2008 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences strings fry namespaces make math assocs
|
||||
io vectors arrays math.parser math.order combinators classes
|
||||
sets unicode.categories compiler.units parser effects.parser
|
||||
words quotations memoize accessors locals splitting
|
||||
combinators.short-circuit generalizations ;
|
||||
USING: accessors arrays assocs classes combinators
|
||||
combinators.short-circuit compiler.units effects.parser fry
|
||||
generalizations kernel locals make math math.order namespaces
|
||||
quotations sequences sets splitting unicode.categories vectors
|
||||
words ;
|
||||
FROM: namespaces => set ;
|
||||
IN: peg
|
||||
|
||||
|
|
Loading…
Reference in New Issue