Merge commit 'doublec/master'

release
Slava Pestov 2007-11-28 23:50:40 -05:00
commit 31102fc37c
5 changed files with 266 additions and 135 deletions

View File

@ -9,27 +9,91 @@ IN: temporary
] unit-test
{ T{ ebnf-terminal f "55" } } [
"\"55\"" 'terminal' parse parse-result-ast
"'55'" 'terminal' parse parse-result-ast
] unit-test
{
T{ ebnf-rule f
"digit"
V{
T{ ebnf-choice f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
f
}
}
} [
"digit = '1' | '2'" 'rule' parse parse-result-ast
] unit-test
{
T{ ebnf-rule f
"digit"
T{ ebnf-choice f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
V{
T{ ebnf-sequence f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
f
}
}
}
} [
"digit = \"1\" | \"2\"" 'rule' parse parse-result-ast
"digit = '1' '2'" 'rule' parse parse-result-ast
] unit-test
{
T{ ebnf-rule f
"digit"
T{ ebnf-sequence f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
T{ ebnf-choice f
V{
T{ ebnf-sequence f
V{ T{ ebnf-non-terminal f "one" } T{ ebnf-non-terminal f "two" } }
}
T{ ebnf-non-terminal f "three" }
}
}
} [
"digit = \"1\" \"2\"" 'rule' parse parse-result-ast
] unit-test
"one two | three" 'choice' parse parse-result-ast
] unit-test
{
T{ ebnf-sequence f
V{
T{ ebnf-non-terminal f "one" }
T{ ebnf-choice f
V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
}
}
}
} [
"one (two | three)" 'choice' parse parse-result-ast
] unit-test
{
T{ ebnf-sequence f
V{
T{ ebnf-non-terminal f "one" }
T{ ebnf-repeat0 f
T{ ebnf-sequence f
V{
T{ ebnf-choice f
V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
}
T{ ebnf-non-terminal f "four" }
}
}
}
}
}
} [
"one {(two | three) four}" 'choice' parse parse-result-ast
] unit-test
{
T{ ebnf-sequence f
V{
T{ ebnf-non-terminal f "one" }
T{ ebnf-optional f T{ ebnf-non-terminal f "two" } }
T{ ebnf-non-terminal f "three" }
}
}
} [
"one [ two ] three" 'choice' parse parse-result-ast
] unit-test

View File

@ -1,6 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel parser words arrays strings math.parser sequences namespaces peg ;
USING: kernel parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
@ -8,7 +9,9 @@ TUPLE: ebnf-terminal symbol ;
TUPLE: ebnf-choice options ;
TUPLE: ebnf-sequence elements ;
TUPLE: ebnf-repeat0 group ;
TUPLE: ebnf-optional elements ;
TUPLE: ebnf-rule symbol elements ;
TUPLE: ebnf-action word ;
TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal
@ -16,58 +19,82 @@ C: <ebnf-terminal> ebnf-terminal
C: <ebnf-choice> ebnf-choice
C: <ebnf-sequence> ebnf-sequence
C: <ebnf-repeat0> ebnf-repeat0
C: <ebnf-optional> ebnf-optional
C: <ebnf-rule> ebnf-rule
C: <ebnf-action> ebnf-action
C: <ebnf> ebnf
GENERIC: ebnf-compile ( ast -- quot )
SYMBOL: parsers
SYMBOL: non-terminals
SYMBOL: last-parser
M: ebnf-terminal ebnf-compile ( ast -- quot )
[
ebnf-terminal-symbol , \ token ,
] [ ] make ;
: reset-parser-generation ( -- )
V{ } clone parsers set
H{ } clone non-terminals set
f last-parser set ;
M: ebnf-non-terminal ebnf-compile ( ast -- quot )
[
ebnf-non-terminal-symbol , \ in , \ get , \ lookup , \ execute ,
] [ ] make ;
: store-parser ( parser -- number )
parsers get [ push ] keep length 1- ;
M: ebnf-choice ebnf-compile ( ast -- quot )
[
[
ebnf-choice-options [
ebnf-compile ,
] each
] { } make ,
[ call ] , \ map , \ choice ,
] [ ] make ;
: get-parser ( index -- parser )
parsers get nth ;
: non-terminal-index ( name -- number )
dup non-terminals get at [
nip
] [
f store-parser [ swap non-terminals get set-at ] keep
] if* ;
M: ebnf-sequence ebnf-compile ( ast -- quot )
[
[
ebnf-sequence-elements [
ebnf-compile ,
] each
] { } make ,
[ call ] , \ map , \ seq ,
] [ ] make ;
GENERIC: (generate-parser) ( ast -- id )
M: ebnf-repeat0 ebnf-compile ( ast -- quot )
[
ebnf-repeat0-group ebnf-compile % \ repeat0 ,
] [ ] make ;
: generate-parser ( ast -- id )
(generate-parser) dup last-parser set ;
M: ebnf-rule ebnf-compile ( ast -- quot )
[
dup ebnf-rule-symbol , \ in , \ get , \ create ,
ebnf-rule-elements ebnf-compile , \ define-compound ,
] [ ] make ;
M: ebnf-terminal (generate-parser) ( ast -- id )
ebnf-terminal-symbol token sp store-parser ;
M: ebnf ebnf-compile ( ast -- quot )
M: ebnf-non-terminal (generate-parser) ( ast -- id )
[
ebnf-rules [
ebnf-compile %
] each
] [ ] make ;
ebnf-non-terminal-symbol dup non-terminal-index ,
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
] [ ] make delay sp store-parser ;
M: ebnf-choice (generate-parser) ( ast -- id )
ebnf-choice-options [
generate-parser get-parser
] map choice store-parser ;
M: ebnf-sequence (generate-parser) ( ast -- id )
ebnf-sequence-elements [
generate-parser get-parser
] map seq store-parser ;
M: ebnf-repeat0 (generate-parser) ( ast -- id )
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
M: ebnf-optional (generate-parser) ( ast -- id )
ebnf-optional-elements generate-parser get-parser optional store-parser ;
M: ebnf-rule (generate-parser) ( ast -- id )
dup ebnf-rule-symbol non-terminal-index swap
ebnf-rule-elements generate-parser get-parser ! nt-id body
swap [ parsers get set-nth ] keep ;
M: ebnf-action (generate-parser) ( ast -- id )
ebnf-action-word search 1quotation
last-parser get get-parser swap action store-parser ;
M: vector (generate-parser) ( ast -- id )
[ generate-parser ] map peek ;
M: f (generate-parser) ( ast -- id )
drop last-parser get ;
M: ebnf (generate-parser) ( ast -- id )
ebnf-rules [
generate-parser
] map peek ;
DEFER: 'rhs'
@ -75,32 +102,55 @@ DEFER: 'rhs'
CHAR: a CHAR: z range repeat1 [ >string <ebnf-non-terminal> ] action ;
: 'terminal' ( -- parser )
"\"" token hide [ CHAR: " = not ] satisfy repeat1 "\"" token hide 3array seq [ first >string <ebnf-terminal> ] action ;
"'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string <ebnf-terminal> ] action ;
: 'element' ( -- parser )
'non-terminal' 'terminal' 2array choice ;
: 'sequence' ( -- parser )
'element' sp
"|" token sp ensure-not 2array seq [ first ] action
repeat1 [ <ebnf-sequence> ] action ;
: 'choice' ( -- parser )
'element' sp "|" token sp list-of [ <ebnf-choice> ] action ;
DEFER: 'choice'
: 'group' ( -- parser )
"(" token sp hide
[ 'choice' sp ] delay
")" token sp hide
3array seq [ first ] action ;
: 'repeat0' ( -- parser )
"{" token sp hide
[ 'rhs' sp ] delay
[ 'choice' sp ] delay
"}" token sp hide
3array seq [ first <ebnf-repeat0> ] action ;
: 'rhs' ( -- parser )
'repeat0'
'sequence'
'choice'
'element'
4array choice ;
: 'optional' ( -- parser )
"[" token sp hide
[ 'choice' sp ] delay
"]" token sp hide
3array seq [ first <ebnf-optional> ] action ;
: 'sequence' ( -- parser )
[
'element' sp ,
'group' sp ,
'repeat0' sp ,
'optional' sp ,
] { } make choice
repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if
] action ;
: 'choice' ( -- parser )
'sequence' sp "|" token sp list-of [
dup length 1 = [ first ] [ <ebnf-choice> ] if
] action ;
: 'action' ( -- parser )
"=>" token hide
[ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp
2array seq [ first <ebnf-action> ] action ;
: 'rhs' ( -- parser )
'choice' 'action' sp optional 2array seq ;
: 'rule' ( -- parser )
'non-terminal' [ ebnf-non-terminal-symbol ] action
"=" token sp hide
@ -112,9 +162,23 @@ DEFER: 'rhs'
: ebnf>quot ( string -- quot )
'ebnf' parse [
parse-result-ast ebnf-compile
parse-result-ast [
reset-parser-generation
generate-parser drop
[
non-terminals get
[
get-parser [
swap , \ in , \ get , \ create ,
1quotation , \ define-compound ,
] [
drop
] if*
] assoc-each
] [ ] make
] with-scope
] [
f
] if* ;
: <EBNF "EBNF>" parse-tokens "" join ebnf>quot call ; parsing
: <EBNF "EBNF>" parse-tokens " " join ebnf>quot call ; parsing

View File

@ -1,14 +1,16 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ;
USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib ;
IN: peg
TUPLE: parse-result remaining ast ;
GENERIC: parse ( state parser -- result )
GENERIC: (parse) ( state parser -- result )
<PRIVATE
SYMBOL: packrat-cache
SYMBOL: ignore
: <parse-result> ( remaining ast -- parse-result )
@ -24,18 +26,48 @@ TUPLE: parser id ;
: init-parser ( parser -- parser )
get-next-id parser construct-boa over set-delegate ;
: from ( slice-or-string -- index )
dup slice? [ slice-from ] [ drop 0 ] if ;
: get-cached ( input parser -- result )
[ from ] dip parser-id packrat-cache get at at ;
: put-cached ( result input parser -- )
parser-id dup packrat-cache get at [
nip
] [
H{ } clone dup >r swap packrat-cache get set-at r>
] if*
[ from ] dip set-at ;
PRIVATE>
: parse ( input parser -- result )
packrat-cache get [
2dup get-cached [
[ (parse) dup ] 2keep put-cached
] unless*
] [
(parse)
] if ;
: packrat-parse ( input parser -- result )
H{ } clone packrat-cache [ parse ] with-variable ;
<PRIVATE
TUPLE: token-parser symbol ;
M: token-parser parse ( state parser -- result )
M: token-parser (parse) ( input parser -- result )
token-parser-symbol 2dup head? [
dup >r length tail-slice r> <parse-result>
] [
2drop f
] if ;
TUPLE: satisfy-parser quot ;
M: satisfy-parser parse ( state parser -- result )
M: satisfy-parser (parse) ( state parser -- result )
over empty? [
2drop f
] [
@ -48,7 +80,7 @@ M: satisfy-parser parse ( state parser -- result )
TUPLE: range-parser min max ;
M: range-parser parse ( state parser -- result )
M: range-parser (parse) ( state parser -- result )
over empty? [
2drop f
] [
@ -77,7 +109,7 @@ TUPLE: seq-parser parsers ;
drop
] if ;
M: seq-parser parse ( state parser -- result )
M: seq-parser (parse) ( state parser -- result )
seq-parser-parsers [ V{ } clone <parse-result> ] dip (seq-parser) ;
TUPLE: choice-parser parsers ;
@ -93,7 +125,7 @@ TUPLE: choice-parser parsers ;
] if*
] if ;
M: choice-parser parse ( state parser -- result )
M: choice-parser (parse) ( state parser -- result )
choice-parser-parsers (choice-parser) ;
TUPLE: repeat0-parser p1 ;
@ -111,7 +143,7 @@ TUPLE: repeat0-parser p1 ;
{ parse-result-remaining parse-result-ast }
get-slots 1vector <parse-result> ;
M: repeat0-parser parse ( state parser -- result )
M: repeat0-parser (parse) ( state parser -- result )
repeat0-parser-p1 2dup parse [
nipd clone-result (repeat-parser)
] [
@ -120,17 +152,17 @@ M: repeat0-parser parse ( state parser -- result )
TUPLE: repeat1-parser p1 ;
M: repeat1-parser parse ( state parser -- result )
M: repeat1-parser (parse) ( state parser -- result )
repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ;
TUPLE: optional-parser p1 ;
M: optional-parser parse ( state parser -- result )
M: optional-parser (parse) ( state parser -- result )
dupd optional-parser-p1 parse swap f <parse-result> or ;
TUPLE: ensure-parser p1 ;
M: ensure-parser parse ( state parser -- result )
M: ensure-parser (parse) ( state parser -- result )
dupd ensure-parser-p1 parse [
ignore <parse-result>
] [
@ -139,7 +171,7 @@ M: ensure-parser parse ( state parser -- result )
TUPLE: ensure-not-parser p1 ;
M: ensure-not-parser parse ( state parser -- result )
M: ensure-not-parser (parse) ( state parser -- result )
dupd ensure-not-parser-p1 parse [
drop f
] [
@ -148,7 +180,7 @@ M: ensure-not-parser parse ( state parser -- result )
TUPLE: action-parser p1 quot ;
M: action-parser parse ( state parser -- result )
M: action-parser (parse) ( state parser -- result )
tuck action-parser-p1 parse dup [
dup parse-result-ast rot action-parser-quot call
swap [ set-parse-result-ast ] keep
@ -165,12 +197,12 @@ M: action-parser parse ( state parser -- result )
TUPLE: sp-parser p1 ;
M: sp-parser parse ( state parser -- result )
M: sp-parser (parse) ( state parser -- result )
[ left-trim-slice ] dip sp-parser-p1 parse ;
TUPLE: delay-parser quot ;
M: delay-parser parse ( state parser -- result )
M: delay-parser (parse) ( state parser -- result )
delay-parser-quot call parse ;
PRIVATE>

View File

@ -5,9 +5,9 @@ USING: kernel tools.test peg peg.pl0 ;
IN: temporary
{ "abc" } [
"abc" 'ident' parse parse-result-ast
"abc" ident parse parse-result-ast
] unit-test
{ 55 } [
"55abc" 'number' parse parse-result-ast
"55abc" number parse parse-result-ast
] unit-test

View File

@ -1,58 +1,29 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays strings math.parser sequences peg ;
USING: kernel arrays strings math.parser sequences peg peg.ebnf ;
IN: peg.pl0
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
: 'ident' ( -- parser )
: ident ( -- parser )
CHAR: a CHAR: z range
CHAR: A CHAR: Z range 2array choice repeat1
[ >string ] action ;
: 'number' ( -- parser )
: number ( -- parser )
CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ;
DEFER: 'factor'
: 'term' ( -- parser )
'factor' "*" token "/" token 2array choice sp 'factor' sp 2array seq repeat0 2array seq ;
: 'expression' ( -- parser )
[ "+" token "-" token 2array choice sp optional 'term' sp 2dup 2array seq repeat0 3array seq ] delay ;
: 'factor' ( -- parser )
'ident' 'number' "(" token hide 'expression' sp ")" token sp hide 3array seq 3array choice ;
: 'condition' ( -- parser )
"odd" token 'expression' sp 2array seq
'expression' { "=" "#" "<=" "<" ">=" ">" } [ token ] map choice sp 'expression' sp 3array seq
2array choice ;
: 'statement' ( -- parser )
[
'ident' ":=" token sp 'expression' sp 3array seq
"call" token 'ident' sp 2array seq
"begin" token 'statement' sp ";" token sp 'statement' sp 2array seq repeat0 "end" token sp 4array seq
"if" token 'condition' sp "then" token sp 'statement' sp 4array seq
4array choice
"while" token 'condition' sp "do" token sp 'statement' sp 4array seq
2array choice optional
] delay ;
: 'block' ( -- parser )
[
"const" token 'ident' sp "=" token sp 'number' sp 4array seq
"," token sp 'ident' sp "=" token sp 'number' sp 4array seq repeat0
";" token sp 3array seq optional
"var" token 'ident' sp "," token sp 'ident' sp 2array seq repeat0
";" token sp 4array seq optional
"procedure" token 'ident' sp ";" token sp 'block' sp 4array seq ";" token sp 2array seq repeat0 'statement' sp 2array seq
3array seq
] delay ;
: 'program' ( -- parser )
'block' "." token sp 2array seq ;
<EBNF
program = block '.' .
block = [ 'const' ident '=' number { ',' ident '=' number } ';' ]
[ 'var' ident { ',' ident } ';' ]
{ 'procedure' ident ';' [ block ';' ] } statement .
statement = [ ident ':=' expression | 'call' ident |
'begin' statement {';' statement } 'end' |
'if' condition 'then' statement |
'while' condition 'do' statement ] .
condition = 'odd' expression |
expression ('=' | '#' | '<=' | '<' | '>=' | '>') expression .
expression = ['+' | '-'] term {('+' | '-') term } .
term = factor {('*' | '/') factor } .
factor = ident | number | '(' expression ')'
EBNF>