2008-01-13 04:31:31 -05:00
|
|
|
! Copyright (C) 2007 Chris Double.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
USING: kernel parser words arrays strings math.parser sequences
|
2008-02-01 19:26:32 -05:00
|
|
|
quotations vectors namespaces math assocs continuations peg
|
2008-03-19 00:34:28 -04:00
|
|
|
peg.parsers unicode.categories multiline combinators.lib ;
|
2008-01-13 04:31:31 -05:00
|
|
|
IN: peg.ebnf
|
|
|
|
|
|
|
|
TUPLE: ebnf-non-terminal symbol ;
|
|
|
|
TUPLE: ebnf-terminal symbol ;
|
|
|
|
TUPLE: ebnf-choice options ;
|
|
|
|
TUPLE: ebnf-sequence elements ;
|
|
|
|
TUPLE: ebnf-repeat0 group ;
|
|
|
|
TUPLE: ebnf-optional elements ;
|
|
|
|
TUPLE: ebnf-rule symbol elements ;
|
|
|
|
TUPLE: ebnf-action word ;
|
|
|
|
TUPLE: ebnf rules ;
|
|
|
|
|
|
|
|
C: <ebnf-non-terminal> ebnf-non-terminal
|
|
|
|
C: <ebnf-terminal> ebnf-terminal
|
|
|
|
C: <ebnf-choice> ebnf-choice
|
|
|
|
C: <ebnf-sequence> ebnf-sequence
|
|
|
|
C: <ebnf-repeat0> ebnf-repeat0
|
|
|
|
C: <ebnf-optional> ebnf-optional
|
|
|
|
C: <ebnf-rule> ebnf-rule
|
|
|
|
C: <ebnf-action> ebnf-action
|
|
|
|
C: <ebnf> ebnf
|
|
|
|
|
|
|
|
SYMBOL: parsers
|
|
|
|
SYMBOL: non-terminals
|
|
|
|
SYMBOL: last-parser
|
|
|
|
|
|
|
|
: reset-parser-generation ( -- )
|
|
|
|
V{ } clone parsers set
|
|
|
|
H{ } clone non-terminals set
|
|
|
|
f last-parser set ;
|
|
|
|
|
|
|
|
: store-parser ( parser -- number )
|
|
|
|
parsers get [ push ] keep length 1- ;
|
|
|
|
|
|
|
|
: get-parser ( index -- parser )
|
|
|
|
parsers get nth ;
|
|
|
|
|
|
|
|
: non-terminal-index ( name -- number )
|
|
|
|
dup non-terminals get at [
|
|
|
|
nip
|
|
|
|
] [
|
|
|
|
f store-parser [ swap non-terminals get set-at ] keep
|
|
|
|
] if* ;
|
|
|
|
|
|
|
|
GENERIC: (generate-parser) ( ast -- id )
|
|
|
|
|
|
|
|
: generate-parser ( ast -- id )
|
|
|
|
(generate-parser) dup last-parser set ;
|
|
|
|
|
|
|
|
M: ebnf-terminal (generate-parser) ( ast -- id )
|
|
|
|
ebnf-terminal-symbol token sp store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-non-terminal (generate-parser) ( ast -- id )
|
|
|
|
[
|
|
|
|
ebnf-non-terminal-symbol dup non-terminal-index ,
|
|
|
|
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
|
|
|
|
] [ ] make delay sp store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-choice (generate-parser) ( ast -- id )
|
|
|
|
ebnf-choice-options [
|
|
|
|
generate-parser get-parser
|
|
|
|
] map choice store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-sequence (generate-parser) ( ast -- id )
|
|
|
|
ebnf-sequence-elements [
|
|
|
|
generate-parser get-parser
|
|
|
|
] map seq store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-repeat0 (generate-parser) ( ast -- id )
|
|
|
|
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-optional (generate-parser) ( ast -- id )
|
|
|
|
ebnf-optional-elements generate-parser get-parser optional store-parser ;
|
|
|
|
|
|
|
|
M: ebnf-rule (generate-parser) ( ast -- id )
|
|
|
|
dup ebnf-rule-symbol non-terminal-index swap
|
|
|
|
ebnf-rule-elements generate-parser get-parser ! nt-id body
|
|
|
|
swap [ parsers get set-nth ] keep ;
|
|
|
|
|
|
|
|
M: ebnf-action (generate-parser) ( ast -- id )
|
|
|
|
ebnf-action-word search 1quotation
|
|
|
|
last-parser get get-parser swap action store-parser ;
|
|
|
|
|
|
|
|
M: vector (generate-parser) ( ast -- id )
|
|
|
|
[ generate-parser ] map peek ;
|
|
|
|
|
|
|
|
M: f (generate-parser) ( ast -- id )
|
|
|
|
drop last-parser get ;
|
|
|
|
|
|
|
|
M: ebnf (generate-parser) ( ast -- id )
|
|
|
|
ebnf-rules [
|
|
|
|
generate-parser
|
|
|
|
] map peek ;
|
|
|
|
|
|
|
|
DEFER: 'rhs'
|
|
|
|
|
2008-03-19 00:34:28 -04:00
|
|
|
: 'identifier' ( -- parser )
|
|
|
|
#! Return a parser that parses an identifer delimited by
|
|
|
|
#! a quotation character. The quotation can be single
|
|
|
|
#! or double quotes. The AST produced is the identifier
|
|
|
|
#! between the quotes.
|
|
|
|
[
|
|
|
|
[ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by ,
|
|
|
|
[ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by ,
|
|
|
|
] choice* [ >string ] action ;
|
|
|
|
|
2008-01-13 04:31:31 -05:00
|
|
|
: 'non-terminal' ( -- parser )
|
2008-03-19 00:34:28 -04:00
|
|
|
#! A non-terminal is the name of another rule. It can
|
|
|
|
#! be any non-blank character except for characters used
|
|
|
|
#! in the EBNF syntax itself.
|
|
|
|
[
|
|
|
|
{
|
|
|
|
[ dup blank? ]
|
|
|
|
[ dup CHAR: " = ]
|
|
|
|
[ dup CHAR: ' = ]
|
|
|
|
[ dup CHAR: | = ]
|
|
|
|
[ dup CHAR: { = ]
|
|
|
|
[ dup CHAR: } = ]
|
|
|
|
[ dup CHAR: = = ]
|
|
|
|
[ dup CHAR: ) = ]
|
|
|
|
[ dup CHAR: ( = ]
|
|
|
|
[ dup CHAR: ] = ]
|
|
|
|
[ dup CHAR: [ = ]
|
|
|
|
} || not nip
|
|
|
|
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'terminal' ( -- parser )
|
2008-03-19 00:34:28 -04:00
|
|
|
#! A terminal is an identifier enclosed in quotations
|
|
|
|
#! and it represents the literal value of the identifier.
|
|
|
|
'identifier' [ <ebnf-terminal> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'element' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
'non-terminal' ,
|
|
|
|
'terminal' ,
|
|
|
|
] choice* ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
DEFER: 'choice'
|
|
|
|
|
|
|
|
: 'group' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
"(" token sp hide ,
|
|
|
|
[ 'choice' sp ] delay ,
|
|
|
|
")" token sp hide ,
|
|
|
|
] seq* [ first ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'repeat0' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
"{" token sp hide ,
|
|
|
|
[ 'choice' sp ] delay ,
|
|
|
|
"}" token sp hide ,
|
|
|
|
] seq* [ first <ebnf-repeat0> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'optional' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
"[" token sp hide ,
|
|
|
|
[ 'choice' sp ] delay ,
|
|
|
|
"]" token sp hide ,
|
|
|
|
] seq* [ first <ebnf-optional> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'sequence' ( -- parser )
|
|
|
|
[
|
|
|
|
'element' sp ,
|
|
|
|
'group' sp ,
|
|
|
|
'repeat0' sp ,
|
|
|
|
'optional' sp ,
|
2008-03-19 00:00:53 -04:00
|
|
|
] choice* repeat1 [
|
2008-01-13 04:31:31 -05:00
|
|
|
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
|
|
|
] action ;
|
|
|
|
|
|
|
|
: 'choice' ( -- parser )
|
|
|
|
'sequence' sp "|" token sp list-of [
|
|
|
|
dup length 1 = [ first ] [ <ebnf-choice> ] if
|
|
|
|
] action ;
|
|
|
|
|
|
|
|
: 'action' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
"=>" token hide ,
|
|
|
|
[
|
|
|
|
[ blank? ] satisfy ensure-not ,
|
|
|
|
[ drop t ] satisfy ,
|
|
|
|
] seq* [ first ] action repeat1 [ >string ] action sp ,
|
|
|
|
] seq* [ first <ebnf-action> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'rhs' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
'choice' ,
|
|
|
|
'action' sp optional ,
|
|
|
|
] seq* ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'rule' ( -- parser )
|
2008-03-19 00:00:53 -04:00
|
|
|
[
|
|
|
|
'non-terminal' [ ebnf-non-terminal-symbol ] action ,
|
|
|
|
"=" token sp hide ,
|
|
|
|
'rhs' ,
|
|
|
|
] seq* [ first2 <ebnf-rule> ] action ;
|
2008-01-13 04:31:31 -05:00
|
|
|
|
|
|
|
: 'ebnf' ( -- parser )
|
|
|
|
'rule' sp "." token sp hide list-of [ <ebnf> ] action ;
|
|
|
|
|
|
|
|
: ebnf>quot ( string -- quot )
|
|
|
|
'ebnf' parse [
|
|
|
|
parse-result-ast [
|
|
|
|
reset-parser-generation
|
|
|
|
generate-parser drop
|
|
|
|
[
|
|
|
|
non-terminals get
|
|
|
|
[
|
|
|
|
get-parser [
|
|
|
|
swap , \ in , \ get , \ create ,
|
|
|
|
1quotation , \ define ,
|
|
|
|
] [
|
|
|
|
drop
|
|
|
|
] if*
|
|
|
|
] assoc-each
|
|
|
|
] [ ] make
|
|
|
|
] with-scope
|
|
|
|
] [
|
|
|
|
f
|
|
|
|
] if* ;
|
|
|
|
|
2008-03-18 23:54:42 -04:00
|
|
|
: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing
|