Merge branch 'master' of git://factorcode.org/git/factor

db4
erg 2008-04-03 18:10:29 -05:00
commit bc4892e740
5 changed files with 212 additions and 54 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.ebnf words math math.parser ; USING: kernel tools.test peg peg.ebnf words math math.parser sequences ;
IN: peg.ebnf.tests IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [ { T{ ebnf-non-terminal f "abc" } } [
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test ] unit-test
{ f } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" f "b" } } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ V{ "a" "b" } } [
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
] unit-test
{ f } [
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [ { V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion. #! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used #! Using packrat, so first part of expr fails, causing 2nd choice to be used
@ -198,9 +247,13 @@ IN: peg.ebnf.tests
"1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast
] unit-test ] unit-test
{ t } [
"abcd='9' | ('8'):x => [[ drop x ]]" 'ebnf' parse parse-result-remaining empty?
] unit-test
EBNF: primary EBNF: primary
Primary = PrimaryNoNewArray Primary = PrimaryNoNewArray
PrimaryNoNewArray = ClassInstanceCreationExpression PrimaryNoNewArray = ClassInstanceCreationExpression
| MethodInvocation | MethodInvocation
| FieldAccess | FieldAccess
| ArrayAccess | ArrayAccess
@ -211,7 +264,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
| MethodName "(" ")" | MethodName "(" ")"
FieldAccess = Primary "." Identifier FieldAccess = Primary "." Identifier
| "super" "." Identifier | "super" "." Identifier
ArrayAccess = Primary "[" Expression "]" ArrayAccess = Primary "[" Expression "]"
| ExpressionName "[" Expression "]" | ExpressionName "[" Expression "]"
ClassOrInterfaceType = ClassName | InterfaceTypeName ClassOrInterfaceType = ClassName | InterfaceTypeName
ClassName = "C" | "D" ClassName = "C" | "D"

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib peg.parsers unicode.categories multiline combinators.lib
splitting accessors effects sequences.deep ; splitting accessors effects sequences.deep peg.search ;
IN: peg.ebnf IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
@ -213,6 +213,7 @@ DEFER: 'choice'
: 'actioned-sequence' ( -- parser ) : 'actioned-sequence' ( -- parser )
[ [
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action , [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , "=>" syntax , 'action' , ] seq* [ first3 >r <ebnf-var> r> <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action , [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
'sequence' , 'sequence' ,
] choice* ; ] choice* ;
@ -237,22 +238,21 @@ GENERIC: (transform) ( ast -- parser )
SYMBOL: parser SYMBOL: parser
SYMBOL: main SYMBOL: main
SYMBOL: vars
: transform ( ast -- object ) : transform ( ast -- object )
H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ; H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
M: ebnf (transform) ( ast -- parser ) M: ebnf (transform) ( ast -- parser )
rules>> [ (transform) ] map peek ; rules>> [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser ) M: ebnf-rule (transform) ( ast -- parser )
dup elements>> dup elements>>
vars get clone vars [ (transform) ] with-variable [ (transform) [
swap symbol>> set swap symbol>> set
] keep ; ] keep ;
M: ebnf-sequence (transform) ( ast -- parser ) M: ebnf-sequence (transform) ( ast -- parser )
elements>> [ (transform) ] map seq ; elements>> [ (transform) ] map seq [ dup length 1 = [ first ] when ] action ;
M: ebnf-choice (transform) ( ast -- parser ) M: ebnf-choice (transform) ( ast -- parser )
options>> [ (transform) ] map choice ; options>> [ (transform) ] map choice ;
@ -282,37 +282,62 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
M: ebnf-optional (transform) ( ast -- parser ) M: ebnf-optional (transform) ( ast -- parser )
transform-group optional ; transform-group optional ;
: build-locals ( string vars -- string ) GENERIC: build-locals ( code ast -- code )
dup empty? [
drop M: ebnf-sequence build-locals ( code ast -- code )
] [ elements>> dup [ ebnf-var? ] subset empty? [
drop
] [
[ [
"USING: locals namespaces ; [let* | " % "USING: locals sequences ; [let* | " %
[ dup % " [ \"" % % "\" get ] " % ] each dup length swap [
" | " % dup ebnf-var? [
% name>> %
" ] with-locals" % " [ " % # " over nth ] " %
] [
2drop
] if
] 2each
" | " %
%
" ] with-locals" %
] "" make ] "" make
] if ; ] if ;
M: ebnf-var build-locals ( code ast -- )
[
"USING: locals kernel ; [let* | " %
name>> % " [ dup ] " %
" | " %
%
" ] with-locals" %
] "" make ;
M: object build-locals ( code ast -- )
drop ;
M: ebnf-action (transform) ( ast -- parser ) M: ebnf-action (transform) ( ast -- parser )
[ parser>> (transform) ] keep [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ; string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-semantic (transform) ( ast -- parser ) M: ebnf-semantic (transform) ( ast -- parser )
[ parser>> (transform) ] keep [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ; string-lines [ parse-lines ] with-compilation-unit semantic ;
M: ebnf-var (transform) ( ast -- parser ) M: ebnf-var (transform) ( ast -- parser )
[ parser>> (transform) ] [ name>> ] bi parser>> (transform) ;
dup vars get push [ dupd set ] curry action ;
M: ebnf-terminal (transform) ( ast -- parser ) M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ; symbol>> token ;
: parser-not-found ( name -- * )
[
"Parser " % % " not found." %
] "" make throw ;
M: ebnf-non-terminal (transform) ( ast -- parser ) M: ebnf-non-terminal (transform) ( ast -- parser )
symbol>> [ symbol>> [
, parser get , \ at , \ sp , , \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
] [ ] make box ; ] [ ] make box ;
: transform-ebnf ( string -- object ) : transform-ebnf ( string -- object )
@ -320,7 +345,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
: check-parse-result ( result -- result ) : check-parse-result ( result -- result )
dup [ dup [
dup parse-result-remaining empty? [ dup parse-result-remaining [ blank? ] trim empty? [
[ [
"Unable to fully parse EBNF. Left to parse was: " % "Unable to fully parse EBNF. Left to parse was: " %
parse-result-remaining % parse-result-remaining %
@ -335,10 +360,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ] curry ; [ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing : replace-escapes ( string -- string )
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
: EBNF: : EBNF:
CREATE-WORD dup CREATE-WORD dup
";EBNF" parse-multiline-string ";EBNF" parse-multiline-string replace-escapes
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
: rule ( name word -- parser )
#! Given an EBNF word produced from EBNF: return the EBNF rule
"ebnf-parser" word-prop at ;

View File

@ -30,6 +30,14 @@ SYMBOL: fail
SYMBOL: lrstack SYMBOL: lrstack
SYMBOL: heads SYMBOL: heads
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-pegs ( -- )
H{ } clone \ delegates set-global ;
reset-pegs
TUPLE: memo-entry ans pos ; TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry C: <memo-entry> memo-entry
@ -253,14 +261,6 @@ SYMBOL: id
1 id set-global 0 1 id set-global 0
] if* ; ] if* ;
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-delegates ( -- )
H{ } clone \ delegates set-global ;
reset-delegates
: init-parser ( parser -- parser ) : init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers #! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id. #! get a delegate with the same id.
@ -590,7 +590,13 @@ PRIVATE>
#! not a cached one. This is because the same box, #! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word #! compiled twice can have a different compiled word
#! due to running at compile time. #! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ; #! Why the [ ] action at the end? Box parsers don't get
#! memoized during parsing due to all box parsers being
#! unique. This breaks left recursion detection during the
#! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed...
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
: PEG: : PEG:
(:) [ (:) [

View File

@ -1,9 +1,45 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.pl0 multiline sequences ; USING: kernel tools.test peg peg.ebnf peg.pl0 multiline sequences ;
IN: peg.pl0.tests IN: peg.pl0.tests
{ t } [
"CONST foo = 1;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"VAR foo;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"VAR foo,bar , baz;" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"BEGIN foo := 5 END" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [
"PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse parse-result-remaining empty?
] unit-test
{ t } [ { t } [
<" <"
VAR x, squ; VAR x, squ;

View File

@ -7,20 +7,52 @@ IN: peg.pl0
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
EBNF: pl0 EBNF: pl0
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )? _ = (" " | "\t" | "\n")* => [[ drop ignore ]]
( "VAR" ident ( "," ident )* ";" )?
( "PROCEDURE" ident ";" ( block ";" )? )* statement BEGIN = "BEGIN" _
statement = ( ident ":=" expression | "CALL" ident | CALL = "CALL" _
"BEGIN" statement (";" statement )* "END" | CONST = "CONST" _
"IF" condition "THEN" statement | DO = "DO" _
"WHILE" condition "DO" statement )? END = "END" _
condition = "ODD" expression | IF = "IF" _
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression THEN = "THEN" _
expression = ("+" | "-")? term (("+" | "-") term )* ODD = "ODD" _
term = factor (("*" | "/") factor )* PROCEDURE = "PROCEDURE" _
factor = ident | number | "(" expression ")" VAR = "VAR" _
ident = (([a-zA-Z])+) [[ >string ]] WHILE = "WHILE" _
digit = ([0-9]) [[ digit> ]] EQ = "=" _
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] LTEQ = "<=" _
program = block "." LT = "<" _
GT = ">" _
GTEQ = ">=" _
NEQ = "#" _
COMMA = "," _
SEMICOLON = ";" _
ASSIGN = ":=" _
ADD = "+" _
SUBTRACT = "-" _
MULTIPLY = "*" _
DIVIDE = "/" _
LPAREN = "(" _
RPAREN = ")" _
block = ( CONST ident EQ number ( COMMA ident EQ number )* SEMICOLON )?
( VAR ident ( COMMA ident )* SEMICOLON )?
( PROCEDURE ident SEMICOLON ( block SEMICOLON )? )* statement
statement = ( ident ASSIGN expression
| CALL ident
| BEGIN statement ( SEMICOLON statement )* END
| IF condition THEN statement
| WHILE condition DO statement )?
condition = ODD expression
| expression (EQ | NEQ | LTEQ | LT | GTEQ | GT) expression
expression = (ADD | SUBTRACT)? term ( (ADD | SUBTRACT) term )* _
term = factor ( (MULTIPLY | DIVIDE) factor )*
factor = ident | number | LPAREN expression RPAREN
ident = (([a-zA-Z])+) _ => [[ >string ]]
digit = ([0-9]) => [[ digit> ]]
number = ((digit)+) _ => [[ 10 digits>integer ]]
program = _ block "."
;EBNF ;EBNF