Merge branch 'master' of git://factorcode.org/git/factor

db4
erg 2008-04-03 18:10:29 -05:00
commit bc4892e740
5 changed files with 212 additions and 54 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.ebnf words math math.parser ;
USING: kernel tools.test peg peg.ebnf words math math.parser sequences ;
IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ f } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" f "b" } } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ f } [
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
@ -198,9 +247,13 @@ IN: peg.ebnf.tests
"1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast
] unit-test
{ t } [
"abcd='9' | ('8'):x => [[ drop x ]]" 'ebnf' parse parse-result-remaining empty?
] unit-test
EBNF: primary
Primary = PrimaryNoNewArray
PrimaryNoNewArray = ClassInstanceCreationExpression
PrimaryNoNewArray = ClassInstanceCreationExpression
| MethodInvocation
| FieldAccess
| ArrayAccess
@ -211,7 +264,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
| MethodName "(" ")"
FieldAccess = Primary "." Identifier
| "super" "." Identifier
ArrayAccess = Primary "[" Expression "]"
ArrayAccess = Primary "[" Expression "]"
| ExpressionName "[" Expression "]"
ClassOrInterfaceType = ClassName | InterfaceTypeName
ClassName = "C" | "D"

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib
splitting accessors effects sequences.deep ;
splitting accessors effects sequences.deep peg.search ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
@ -213,6 +213,7 @@ DEFER: 'choice'
: 'actioned-sequence' ( -- parser )
[
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , "=>" syntax , 'action' , ] seq* [ first3 >r <ebnf-var> r> <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
'sequence' ,
] choice* ;
@ -237,22 +238,21 @@ GENERIC: (transform) ( ast -- parser )
SYMBOL: parser
SYMBOL: main
SYMBOL: vars
: transform ( ast -- object )
H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ;
H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
M: ebnf (transform) ( ast -- parser )
rules>> [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser )
dup elements>>
vars get clone vars [ (transform) ] with-variable [
(transform) [
swap symbol>> set
] keep ;
M: ebnf-sequence (transform) ( ast -- parser )
elements>> [ (transform) ] map seq ;
elements>> [ (transform) ] map seq [ dup length 1 = [ first ] when ] action ;
M: ebnf-choice (transform) ( ast -- parser )
options>> [ (transform) ] map choice ;
@ -282,37 +282,62 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
M: ebnf-optional (transform) ( ast -- parser )
transform-group optional ;
: build-locals ( string vars -- string )
dup empty? [
drop
] [
GENERIC: build-locals ( code ast -- code )
M: ebnf-sequence build-locals ( code ast -- code )
elements>> dup [ ebnf-var? ] subset empty? [
drop
] [
[
"USING: locals namespaces ; [let* | " %
[ dup % " [ \"" % % "\" get ] " % ] each
" | " %
%
" ] with-locals" %
"USING: locals sequences ; [let* | " %
dup length swap [
dup ebnf-var? [
name>> %
" [ " % # " over nth ] " %
] [
2drop
] if
] 2each
" | " %
%
" ] with-locals" %
] "" make
] if ;
M: ebnf-var build-locals ( code ast -- )
[
"USING: locals kernel ; [let* | " %
name>> % " [ dup ] " %
" | " %
%
" ] with-locals" %
] "" make ;
M: object build-locals ( code ast -- )
drop ;
M: ebnf-action (transform) ( ast -- parser )
[ parser>> (transform) ] keep
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ;
[ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-semantic (transform) ( ast -- parser )
[ parser>> (transform) ] keep
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ;
[ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
string-lines [ parse-lines ] with-compilation-unit semantic ;
M: ebnf-var (transform) ( ast -- parser )
[ parser>> (transform) ] [ name>> ] bi
dup vars get push [ dupd set ] curry action ;
parser>> (transform) ;
M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ;
symbol>> token ;
: parser-not-found ( name -- * )
[
"Parser " % % " not found." %
] "" make throw ;
M: ebnf-non-terminal (transform) ( ast -- parser )
symbol>> [
, parser get , \ at , \ sp ,
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
] [ ] make box ;
: transform-ebnf ( string -- object )
@ -320,7 +345,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
: check-parse-result ( result -- result )
dup [
dup parse-result-remaining empty? [
dup parse-result-remaining [ blank? ] trim empty? [
[
"Unable to fully parse EBNF. Left to parse was: " %
parse-result-remaining %
@ -335,10 +360,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
: replace-escapes ( string -- string )
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
: EBNF:
CREATE-WORD dup
";EBNF" parse-multiline-string
";EBNF" parse-multiline-string replace-escapes
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
: rule ( name word -- parser )
#! Given an EBNF word produced from EBNF: return the EBNF rule
"ebnf-parser" word-prop at ;

View File

@ -30,6 +30,14 @@ SYMBOL: fail
SYMBOL: lrstack
SYMBOL: heads
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-pegs ( -- )
H{ } clone \ delegates set-global ;
reset-pegs
TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry
@ -253,14 +261,6 @@ SYMBOL: id
1 id set-global 0
] if* ;
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-delegates ( -- )
H{ } clone \ delegates set-global ;
reset-delegates
: init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id.
@ -590,7 +590,13 @@ PRIVATE>
#! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word
#! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ;
#! Why the [ ] action at the end? Box parsers don't get
#! memoized during parsing due to all box parsers being
#! unique. This breaks left recursion detection during the
#! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed...
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
: PEG:
(:) [

View File

@ -1,9 +1,45 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.pl0 multiline sequences ;
USING: kernel tools.test peg peg.ebnf peg.pl0 multiline sequences ;
IN: peg.pl0.tests
{ t } [
"CONST foo = 1;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"VAR foo;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"VAR foo,bar , baz;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"BEGIN foo := 5 END" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
<"
VAR x, squ;

View File

@ -7,20 +7,52 @@ IN: peg.pl0
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
EBNF: pl0
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
( "VAR" ident ( "," ident )* ";" )?
( "PROCEDURE" ident ";" ( block ";" )? )* statement
statement = ( ident ":=" expression | "CALL" ident |
"BEGIN" statement (";" statement )* "END" |
"IF" condition "THEN" statement |
"WHILE" condition "DO" statement )?
condition = "ODD" expression |
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
expression = ("+" | "-")? term (("+" | "-") term )*
term = factor (("*" | "/") factor )*
factor = ident | number | "(" expression ")"
ident = (([a-zA-Z])+) [[ >string ]]
digit = ([0-9]) [[ digit> ]]
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
program = block "."
_ = (" " | "\t" | "\n")* => [[ drop ignore ]]
BEGIN = "BEGIN" _
CALL = "CALL" _
CONST = "CONST" _
DO = "DO" _
END = "END" _
IF = "IF" _
THEN = "THEN" _
ODD = "ODD" _
PROCEDURE = "PROCEDURE" _
VAR = "VAR" _
WHILE = "WHILE" _
EQ = "=" _
LTEQ = "<=" _
LT = "<" _
GT = ">" _
GTEQ = ">=" _
NEQ = "#" _
COMMA = "," _
SEMICOLON = ";" _
ASSIGN = ":=" _
ADD = "+" _
SUBTRACT = "-" _
MULTIPLY = "*" _
DIVIDE = "/" _
LPAREN = "(" _
RPAREN = ")" _
block = ( CONST ident EQ number ( COMMA ident EQ number )* SEMICOLON )?
( VAR ident ( COMMA ident )* SEMICOLON )?
( PROCEDURE ident SEMICOLON ( block SEMICOLON )? )* statement
statement = ( ident ASSIGN expression
| CALL ident
| BEGIN statement ( SEMICOLON statement )* END
| IF condition THEN statement
| WHILE condition DO statement )?
condition = ODD expression
| expression (EQ | NEQ | LTEQ | LT | GTEQ | GT) expression
expression = (ADD | SUBTRACT)? term ( (ADD | SUBTRACT) term )* _
term = factor ( (MULTIPLY | DIVIDE) factor )*
factor = ident | number | LPAREN expression RPAREN
ident = (([a-zA-Z])+) _ => [[ >string ]]
digit = ([0-9]) => [[ digit> ]]
number = ((digit)+) _ => [[ 10 digits>integer ]]
program = _ block "."
;EBNF