diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 4f802c5207..0879ecda49 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf words math math.parser ; +USING: kernel tools.test peg peg.ebnf words math math.parser sequences ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -180,6 +180,55 @@ IN: peg.ebnf.tests { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast ] unit-test +{ f } [ + "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call +] unit-test + +{ V{ "a" " " "b" } } [ + "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "\t" "b" } } [ + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "\n" "b" } } [ + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" f "b" } } [ + "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" " " "b" } } [ + "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast +] unit-test + + +{ V{ "a" "\t" "b" } } [ + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "\n" "b" } } [ + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "b" } } [ + "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "b" } } [ + "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ V{ "a" "b" } } [ + "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast +] unit-test + +{ f } [ + "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call +] unit-test + { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used @@ -198,9 +247,13 @@ IN: peg.ebnf.tests "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast ] unit-test +{ t } [ + "abcd='9' | ('8'):x => [[ drop x ]]" 'ebnf' parse parse-result-remaining empty? +] unit-test + EBNF: primary Primary = PrimaryNoNewArray -PrimaryNoNewArray = ClassInstanceCreationExpression +PrimaryNoNewArray = ClassInstanceCreationExpression | MethodInvocation | FieldAccess | ArrayAccess @@ -211,7 +264,7 @@ MethodInvocation = Primary "." MethodName "(" ")" | MethodName "(" ")" FieldAccess = Primary "." Identifier | "super" "." Identifier -ArrayAccess = Primary "[" Expression "]" +ArrayAccess = Primary "[" Expression "]" | ExpressionName "[" Expression "]" ClassOrInterfaceType = ClassName | InterfaceTypeName ClassName = "C" | "D" diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 4f00edbd3c..e5787e6cf8 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -3,7 +3,7 @@ USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib - splitting accessors effects sequences.deep ; + splitting accessors effects sequences.deep peg.search ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -213,6 +213,7 @@ DEFER: 'choice' : 'actioned-sequence' ( -- parser ) [ [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 ] action , + [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , "=>" syntax , 'action' , ] seq* [ first3 >r r> ] action , [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 ] action , 'sequence' , ] choice* ; @@ -237,22 +238,21 @@ GENERIC: (transform) ( ast -- parser ) SYMBOL: parser SYMBOL: main -SYMBOL: vars : transform ( ast -- object ) - H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ; + H{ } clone dup dup [ parser set swap (transform) main set ] bind ; M: ebnf (transform) ( ast -- parser ) rules>> [ (transform) ] map peek ; M: ebnf-rule (transform) ( ast -- parser ) dup elements>> - vars get clone vars [ (transform) ] with-variable [ + (transform) [ swap symbol>> set ] keep ; M: ebnf-sequence (transform) ( ast -- parser ) - elements>> [ (transform) ] map seq ; + elements>> [ (transform) ] map seq [ dup length 1 = [ first ] when ] action ; M: ebnf-choice (transform) ( ast -- parser ) options>> [ (transform) ] map choice ; @@ -282,37 +282,62 @@ M: ebnf-repeat1 (transform) ( ast -- parser ) M: ebnf-optional (transform) ( ast -- parser ) transform-group optional ; -: build-locals ( string vars -- string ) - dup empty? [ - drop - ] [ +GENERIC: build-locals ( code ast -- code ) + +M: ebnf-sequence build-locals ( code ast -- code ) + elements>> dup [ ebnf-var? ] subset empty? [ + drop + ] [ [ - "USING: locals namespaces ; [let* | " % - [ dup % " [ \"" % % "\" get ] " % ] each - " | " % - % - " ] with-locals" % + "USING: locals sequences ; [let* | " % + dup length swap [ + dup ebnf-var? [ + name>> % + " [ " % # " over nth ] " % + ] [ + 2drop + ] if + ] 2each + " | " % + % + " ] with-locals" % ] "" make ] if ; +M: ebnf-var build-locals ( code ast -- ) + [ + "USING: locals kernel ; [let* | " % + name>> % " [ dup ] " % + " | " % + % + " ] with-locals" % + ] "" make ; + +M: object build-locals ( code ast -- ) + drop ; + M: ebnf-action (transform) ( ast -- parser ) - [ parser>> (transform) ] keep - code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ; + [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals + string-lines [ parse-lines ] with-compilation-unit action ; M: ebnf-semantic (transform) ( ast -- parser ) - [ parser>> (transform) ] keep - code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ; + [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals + string-lines [ parse-lines ] with-compilation-unit semantic ; M: ebnf-var (transform) ( ast -- parser ) - [ parser>> (transform) ] [ name>> ] bi - dup vars get push [ dupd set ] curry action ; + parser>> (transform) ; M: ebnf-terminal (transform) ( ast -- parser ) - symbol>> token sp ; + symbol>> token ; + +: parser-not-found ( name -- * ) + [ + "Parser " % % " not found." % + ] "" make throw ; M: ebnf-non-terminal (transform) ( ast -- parser ) symbol>> [ - , parser get , \ at , \ sp , + , \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip , ] [ ] make box ; : transform-ebnf ( string -- object ) @@ -320,7 +345,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : check-parse-result ( result -- result ) dup [ - dup parse-result-remaining empty? [ + dup parse-result-remaining [ blank? ] trim empty? [ [ "Unable to fully parse EBNF. Left to parse was: " % parse-result-remaining % @@ -335,10 +360,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) parse-result-ast transform dup dup parser [ main swap at compile ] with-variable [ compiled-parse ] curry [ with-scope ] curry ; -: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing +: replace-escapes ( string -- string ) + "\\t" token [ drop "\t" ] action "\\n" token [ drop "\n" ] action 2choice replace ; + +: [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing : EBNF: CREATE-WORD dup - ";EBNF" parse-multiline-string + ";EBNF" parse-multiline-string replace-escapes ebnf>quot swapd 1 1 define-declared "ebnf-parser" set-word-prop ; parsing +: rule ( name word -- parser ) + #! Given an EBNF word produced from EBNF: return the EBNF rule + "ebnf-parser" word-prop at ; \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 3e0ce815f0..217805ce47 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -30,6 +30,14 @@ SYMBOL: fail SYMBOL: lrstack SYMBOL: heads +: delegates ( -- cache ) + \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ; + +: reset-pegs ( -- ) + H{ } clone \ delegates set-global ; + +reset-pegs + TUPLE: memo-entry ans pos ; C: memo-entry @@ -253,14 +261,6 @@ SYMBOL: id 1 id set-global 0 ] if* ; -: delegates ( -- cache ) - \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ; - -: reset-delegates ( -- ) - H{ } clone \ delegates set-global ; - -reset-delegates - : init-parser ( parser -- parser ) #! Set the delegate for the parser. Equivalent parsers #! get a delegate with the same id. @@ -590,7 +590,13 @@ PRIVATE> #! not a cached one. This is because the same box, #! compiled twice can have a different compiled word #! due to running at compile time. - box-parser construct-boa next-id f over set-delegate ; + #! Why the [ ] action at the end? Box parsers don't get + #! memoized during parsing due to all box parsers being + #! unique. This breaks left recursion detection during the + #! parse. The action adds an indirection with a parser type + #! that gets memoized and fixes this. Need to rethink how + #! to fix boxes so this isn't needed... + box-parser construct-boa next-id f over set-delegate [ ] action ; : PEG: (:) [ diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index b3d2135da7..88993c354b 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -1,9 +1,45 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.pl0 multiline sequences ; +USING: kernel tools.test peg peg.ebnf peg.pl0 multiline sequences ; IN: peg.pl0.tests +{ t } [ + "CONST foo = 1;" "block" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "VAR foo;" "block" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "VAR foo,bar , baz;" "block" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "BEGIN foo := 5 END" "statement" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? +] unit-test + +{ t } [ + "PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse parse-result-remaining empty? +] unit-test + { t } [ <" VAR x, squ; diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index f7eb3cad23..1b97814ca7 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -7,20 +7,52 @@ IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 EBNF: pl0 -block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )? - ( "VAR" ident ( "," ident )* ";" )? - ( "PROCEDURE" ident ";" ( block ";" )? )* statement -statement = ( ident ":=" expression | "CALL" ident | - "BEGIN" statement (";" statement )* "END" | - "IF" condition "THEN" statement | - "WHILE" condition "DO" statement )? -condition = "ODD" expression | - expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression -expression = ("+" | "-")? term (("+" | "-") term )* -term = factor (("*" | "/") factor )* -factor = ident | number | "(" expression ")" -ident = (([a-zA-Z])+) [[ >string ]] -digit = ([0-9]) [[ digit> ]] -number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] -program = block "." +_ = (" " | "\t" | "\n")* => [[ drop ignore ]] + +BEGIN = "BEGIN" _ +CALL = "CALL" _ +CONST = "CONST" _ +DO = "DO" _ +END = "END" _ +IF = "IF" _ +THEN = "THEN" _ +ODD = "ODD" _ +PROCEDURE = "PROCEDURE" _ +VAR = "VAR" _ +WHILE = "WHILE" _ +EQ = "=" _ +LTEQ = "<=" _ +LT = "<" _ +GT = ">" _ +GTEQ = ">=" _ +NEQ = "#" _ +COMMA = "," _ +SEMICOLON = ";" _ +ASSIGN = ":=" _ + +ADD = "+" _ +SUBTRACT = "-" _ +MULTIPLY = "*" _ +DIVIDE = "/" _ + +LPAREN = "(" _ +RPAREN = ")" _ + +block = ( CONST ident EQ number ( COMMA ident EQ number )* SEMICOLON )? + ( VAR ident ( COMMA ident )* SEMICOLON )? + ( PROCEDURE ident SEMICOLON ( block SEMICOLON )? )* statement +statement = ( ident ASSIGN expression + | CALL ident + | BEGIN statement ( SEMICOLON statement )* END + | IF condition THEN statement + | WHILE condition DO statement )? +condition = ODD expression + | expression (EQ | NEQ | LTEQ | LT | GTEQ | GT) expression +expression = (ADD | SUBTRACT)? term ( (ADD | SUBTRACT) term )* _ +term = factor ( (MULTIPLY | DIVIDE) factor )* +factor = ident | number | LPAREN expression RPAREN +ident = (([a-zA-Z])+) _ => [[ >string ]] +digit = ([0-9]) => [[ digit> ]] +number = ((digit)+) _ => [[ 10 digits>integer ]] +program = _ block "." ;EBNF