Merge branch 'master' of git://factorcode.org/git/factor
commit
bc4892e740
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
!
|
||||
USING: kernel tools.test peg peg.ebnf words math math.parser ;
|
||||
USING: kernel tools.test peg peg.ebnf words math math.parser sequences ;
|
||||
IN: peg.ebnf.tests
|
||||
|
||||
{ T{ ebnf-non-terminal f "abc" } } [
|
||||
|
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
|
|||
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" " " "b" } } [
|
||||
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\t" "b" } } [
|
||||
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\n" "b" } } [
|
||||
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" f "b" } } [
|
||||
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" " " "b" } } [
|
||||
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
|
||||
{ V{ "a" "\t" "b" } } [
|
||||
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "\n" "b" } } [
|
||||
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "a" "b" } } [
|
||||
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call
|
||||
] unit-test
|
||||
|
||||
{ V{ V{ 49 } "+" V{ 49 } } } [
|
||||
#! Test direct left recursion.
|
||||
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||
|
@ -198,9 +247,13 @@ IN: peg.ebnf.tests
|
|||
"1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"abcd='9' | ('8'):x => [[ drop x ]]" 'ebnf' parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
EBNF: primary
|
||||
Primary = PrimaryNoNewArray
|
||||
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||
| MethodInvocation
|
||||
| FieldAccess
|
||||
| ArrayAccess
|
||||
|
@ -211,7 +264,7 @@ MethodInvocation = Primary "." MethodName "(" ")"
|
|||
| MethodName "(" ")"
|
||||
FieldAccess = Primary "." Identifier
|
||||
| "super" "." Identifier
|
||||
ArrayAccess = Primary "[" Expression "]"
|
||||
ArrayAccess = Primary "[" Expression "]"
|
||||
| ExpressionName "[" Expression "]"
|
||||
ClassOrInterfaceType = ClassName | InterfaceTypeName
|
||||
ClassName = "C" | "D"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: kernel compiler.units parser words arrays strings math.parser sequences
|
||||
quotations vectors namespaces math assocs continuations peg
|
||||
peg.parsers unicode.categories multiline combinators.lib
|
||||
splitting accessors effects sequences.deep ;
|
||||
splitting accessors effects sequences.deep peg.search ;
|
||||
IN: peg.ebnf
|
||||
|
||||
TUPLE: ebnf-non-terminal symbol ;
|
||||
|
@ -213,6 +213,7 @@ DEFER: 'choice'
|
|||
: 'actioned-sequence' ( -- parser )
|
||||
[
|
||||
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
|
||||
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , "=>" syntax , 'action' , ] seq* [ first3 >r <ebnf-var> r> <ebnf-action> ] action ,
|
||||
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
|
||||
'sequence' ,
|
||||
] choice* ;
|
||||
|
@ -237,22 +238,21 @@ GENERIC: (transform) ( ast -- parser )
|
|||
|
||||
SYMBOL: parser
|
||||
SYMBOL: main
|
||||
SYMBOL: vars
|
||||
|
||||
: transform ( ast -- object )
|
||||
H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ;
|
||||
H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
|
||||
|
||||
M: ebnf (transform) ( ast -- parser )
|
||||
rules>> [ (transform) ] map peek ;
|
||||
|
||||
M: ebnf-rule (transform) ( ast -- parser )
|
||||
dup elements>>
|
||||
vars get clone vars [ (transform) ] with-variable [
|
||||
(transform) [
|
||||
swap symbol>> set
|
||||
] keep ;
|
||||
|
||||
M: ebnf-sequence (transform) ( ast -- parser )
|
||||
elements>> [ (transform) ] map seq ;
|
||||
elements>> [ (transform) ] map seq [ dup length 1 = [ first ] when ] action ;
|
||||
|
||||
M: ebnf-choice (transform) ( ast -- parser )
|
||||
options>> [ (transform) ] map choice ;
|
||||
|
@ -282,37 +282,62 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
|
|||
M: ebnf-optional (transform) ( ast -- parser )
|
||||
transform-group optional ;
|
||||
|
||||
: build-locals ( string vars -- string )
|
||||
dup empty? [
|
||||
drop
|
||||
] [
|
||||
GENERIC: build-locals ( code ast -- code )
|
||||
|
||||
M: ebnf-sequence build-locals ( code ast -- code )
|
||||
elements>> dup [ ebnf-var? ] subset empty? [
|
||||
drop
|
||||
] [
|
||||
[
|
||||
"USING: locals namespaces ; [let* | " %
|
||||
[ dup % " [ \"" % % "\" get ] " % ] each
|
||||
" | " %
|
||||
%
|
||||
" ] with-locals" %
|
||||
"USING: locals sequences ; [let* | " %
|
||||
dup length swap [
|
||||
dup ebnf-var? [
|
||||
name>> %
|
||||
" [ " % # " over nth ] " %
|
||||
] [
|
||||
2drop
|
||||
] if
|
||||
] 2each
|
||||
" | " %
|
||||
%
|
||||
" ] with-locals" %
|
||||
] "" make
|
||||
] if ;
|
||||
|
||||
M: ebnf-var build-locals ( code ast -- )
|
||||
[
|
||||
"USING: locals kernel ; [let* | " %
|
||||
name>> % " [ dup ] " %
|
||||
" | " %
|
||||
%
|
||||
" ] with-locals" %
|
||||
] "" make ;
|
||||
|
||||
M: object build-locals ( code ast -- )
|
||||
drop ;
|
||||
|
||||
M: ebnf-action (transform) ( ast -- parser )
|
||||
[ parser>> (transform) ] keep
|
||||
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ;
|
||||
[ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
|
||||
string-lines [ parse-lines ] with-compilation-unit action ;
|
||||
|
||||
M: ebnf-semantic (transform) ( ast -- parser )
|
||||
[ parser>> (transform) ] keep
|
||||
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ;
|
||||
[ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals
|
||||
string-lines [ parse-lines ] with-compilation-unit semantic ;
|
||||
|
||||
M: ebnf-var (transform) ( ast -- parser )
|
||||
[ parser>> (transform) ] [ name>> ] bi
|
||||
dup vars get push [ dupd set ] curry action ;
|
||||
parser>> (transform) ;
|
||||
|
||||
M: ebnf-terminal (transform) ( ast -- parser )
|
||||
symbol>> token sp ;
|
||||
symbol>> token ;
|
||||
|
||||
: parser-not-found ( name -- * )
|
||||
[
|
||||
"Parser " % % " not found." %
|
||||
] "" make throw ;
|
||||
|
||||
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||
symbol>> [
|
||||
, parser get , \ at , \ sp ,
|
||||
, \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,
|
||||
] [ ] make box ;
|
||||
|
||||
: transform-ebnf ( string -- object )
|
||||
|
@ -320,7 +345,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
|
||||
: check-parse-result ( result -- result )
|
||||
dup [
|
||||
dup parse-result-remaining empty? [
|
||||
dup parse-result-remaining [ blank? ] trim empty? [
|
||||
[
|
||||
"Unable to fully parse EBNF. Left to parse was: " %
|
||||
parse-result-remaining %
|
||||
|
@ -335,10 +360,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ] curry ;
|
||||
|
||||
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
||||
: replace-escapes ( string -- string )
|
||||
"\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ;
|
||||
|
||||
: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
|
||||
|
||||
: EBNF:
|
||||
CREATE-WORD dup
|
||||
";EBNF" parse-multiline-string
|
||||
";EBNF" parse-multiline-string replace-escapes
|
||||
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
|
||||
|
||||
: rule ( name word -- parser )
|
||||
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
||||
"ebnf-parser" word-prop at ;
|
|
@ -30,6 +30,14 @@ SYMBOL: fail
|
|||
SYMBOL: lrstack
|
||||
SYMBOL: heads
|
||||
|
||||
: delegates ( -- cache )
|
||||
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||
|
||||
: reset-pegs ( -- )
|
||||
H{ } clone \ delegates set-global ;
|
||||
|
||||
reset-pegs
|
||||
|
||||
TUPLE: memo-entry ans pos ;
|
||||
C: <memo-entry> memo-entry
|
||||
|
||||
|
@ -253,14 +261,6 @@ SYMBOL: id
|
|||
1 id set-global 0
|
||||
] if* ;
|
||||
|
||||
: delegates ( -- cache )
|
||||
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||
|
||||
: reset-delegates ( -- )
|
||||
H{ } clone \ delegates set-global ;
|
||||
|
||||
reset-delegates
|
||||
|
||||
: init-parser ( parser -- parser )
|
||||
#! Set the delegate for the parser. Equivalent parsers
|
||||
#! get a delegate with the same id.
|
||||
|
@ -590,7 +590,13 @@ PRIVATE>
|
|||
#! not a cached one. This is because the same box,
|
||||
#! compiled twice can have a different compiled word
|
||||
#! due to running at compile time.
|
||||
box-parser construct-boa next-id f <parser> over set-delegate ;
|
||||
#! Why the [ ] action at the end? Box parsers don't get
|
||||
#! memoized during parsing due to all box parsers being
|
||||
#! unique. This breaks left recursion detection during the
|
||||
#! parse. The action adds an indirection with a parser type
|
||||
#! that gets memoized and fixes this. Need to rethink how
|
||||
#! to fix boxes so this isn't needed...
|
||||
box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
|
||||
|
||||
: PEG:
|
||||
(:) [
|
||||
|
|
|
@ -1,9 +1,45 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
!
|
||||
USING: kernel tools.test peg peg.pl0 multiline sequences ;
|
||||
USING: kernel tools.test peg peg.ebnf peg.pl0 multiline sequences ;
|
||||
IN: peg.pl0.tests
|
||||
|
||||
{ t } [
|
||||
"CONST foo = 1;" "block" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"VAR foo;" "block" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"VAR foo,bar , baz;" "block" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"BEGIN foo := 5 END" "statement" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
"PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse parse-result-remaining empty?
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
<"
|
||||
VAR x, squ;
|
||||
|
|
|
@ -7,20 +7,52 @@ IN: peg.pl0
|
|||
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
|
||||
|
||||
EBNF: pl0
|
||||
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
|
||||
( "VAR" ident ( "," ident )* ";" )?
|
||||
( "PROCEDURE" ident ";" ( block ";" )? )* statement
|
||||
statement = ( ident ":=" expression | "CALL" ident |
|
||||
"BEGIN" statement (";" statement )* "END" |
|
||||
"IF" condition "THEN" statement |
|
||||
"WHILE" condition "DO" statement )?
|
||||
condition = "ODD" expression |
|
||||
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
|
||||
expression = ("+" | "-")? term (("+" | "-") term )*
|
||||
term = factor (("*" | "/") factor )*
|
||||
factor = ident | number | "(" expression ")"
|
||||
ident = (([a-zA-Z])+) [[ >string ]]
|
||||
digit = ([0-9]) [[ digit> ]]
|
||||
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
|
||||
program = block "."
|
||||
_ = (" " | "\t" | "\n")* => [[ drop ignore ]]
|
||||
|
||||
BEGIN = "BEGIN" _
|
||||
CALL = "CALL" _
|
||||
CONST = "CONST" _
|
||||
DO = "DO" _
|
||||
END = "END" _
|
||||
IF = "IF" _
|
||||
THEN = "THEN" _
|
||||
ODD = "ODD" _
|
||||
PROCEDURE = "PROCEDURE" _
|
||||
VAR = "VAR" _
|
||||
WHILE = "WHILE" _
|
||||
EQ = "=" _
|
||||
LTEQ = "<=" _
|
||||
LT = "<" _
|
||||
GT = ">" _
|
||||
GTEQ = ">=" _
|
||||
NEQ = "#" _
|
||||
COMMA = "," _
|
||||
SEMICOLON = ";" _
|
||||
ASSIGN = ":=" _
|
||||
|
||||
ADD = "+" _
|
||||
SUBTRACT = "-" _
|
||||
MULTIPLY = "*" _
|
||||
DIVIDE = "/" _
|
||||
|
||||
LPAREN = "(" _
|
||||
RPAREN = ")" _
|
||||
|
||||
block = ( CONST ident EQ number ( COMMA ident EQ number )* SEMICOLON )?
|
||||
( VAR ident ( COMMA ident )* SEMICOLON )?
|
||||
( PROCEDURE ident SEMICOLON ( block SEMICOLON )? )* statement
|
||||
statement = ( ident ASSIGN expression
|
||||
| CALL ident
|
||||
| BEGIN statement ( SEMICOLON statement )* END
|
||||
| IF condition THEN statement
|
||||
| WHILE condition DO statement )?
|
||||
condition = ODD expression
|
||||
| expression (EQ | NEQ | LTEQ | LT | GTEQ | GT) expression
|
||||
expression = (ADD | SUBTRACT)? term ( (ADD | SUBTRACT) term )* _
|
||||
term = factor ( (MULTIPLY | DIVIDE) factor )*
|
||||
factor = ident | number | LPAREN expression RPAREN
|
||||
ident = (([a-zA-Z])+) _ => [[ >string ]]
|
||||
digit = ([0-9]) => [[ digit> ]]
|
||||
number = ((digit)+) _ => [[ 10 digits>integer ]]
|
||||
program = _ block "."
|
||||
;EBNF
|
||||
|
|
Loading…
Reference in New Issue