Merge branch 'master' of git://factorcode.org/git/factor

2008-04-03 18:10:29 -05:00 · 2008-04-03 18:10:29 -05:00 · bc4892e740
parent d27252e232 a829e0c1fa
commit bc4892e740
5 changed files with 212 additions and 54 deletions
--- a/extra/peg/ebnf/ebnf-tests.factor
+++ b/extra/peg/ebnf/ebnf-tests.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2007 Chris Double.
 ! See http://factorcode.org/license.txt for BSD license.
 !
-USING: kernel tools.test peg peg.ebnf words math math.parser ;
+USING: kernel tools.test peg peg.ebnf words math math.parser sequences ;
 IN: peg.ebnf.tests
 { T{ ebnf-non-terminal f "abc" } } [
@ -180,6 +180,55 @@ IN: peg.ebnf.tests
  { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
 ] unit-test
 { f } [
  "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call 
 ] unit-test
 { V{ "a" " " "b" } } [
  "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "\t" "b" } } [
  "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast 
 ] unit-test
 { V{ "a" "\n" "b" } } [
  "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" f "b" } } [
  "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" " " "b" } } [
  "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "\t" "b" } } [
  "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "\n" "b" } } [
  "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "b" } } [
  "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "b" } } [
  "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
 ] unit-test
 { V{ "a" "b" } } [
  "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call parse-result-ast
 ] unit-test
 { f } [
  "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call 
 ] unit-test
 { V{ V{ 49 } "+" V{ 49 } } } [ 
  #! Test direct left recursion. 
  #! Using packrat, so first part of expr fails, causing 2nd choice to be used  
@ -198,9 +247,13 @@ IN: peg.ebnf.tests
  "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast
 ] unit-test
 { t } [
  "abcd='9' | ('8'):x => [[ drop x ]]" 'ebnf' parse parse-result-remaining empty?
 ] unit-test
 EBNF: primary 
 Primary = PrimaryNoNewArray
-PrimaryNoNewArray =  ClassInstanceCreationExpression 
+PrimaryNoNewArray =  ClassInstanceCreationExpression
                   | MethodInvocation
                   | FieldAccess
                   | ArrayAccess
@ -211,7 +264,7 @@ MethodInvocation =  Primary "." MethodName "(" ")"
                  | MethodName "(" ")"
 FieldAccess =  Primary "." Identifier
             | "super" "." Identifier  
-ArrayAccess =  Primary "[" Expression "]"
+ArrayAccess =  Primary "[" Expression "]" 
             | ExpressionName "[" Expression "]"
 ClassOrInterfaceType = ClassName | InterfaceTypeName
 ClassName = "C" | "D"
--- a/extra/peg/ebnf/ebnf.factor
+++ b/extra/peg/ebnf/ebnf.factor
@ -3,7 +3,7 @@
 USING: kernel compiler.units parser words arrays strings math.parser sequences 
       quotations vectors namespaces math assocs continuations peg
       peg.parsers unicode.categories multiline combinators.lib 
-       splitting accessors effects sequences.deep ;
+       splitting accessors effects sequences.deep peg.search ;
 IN: peg.ebnf
 TUPLE: ebnf-non-terminal symbol ;
@ -213,6 +213,7 @@ DEFER: 'choice'
 : 'actioned-sequence' ( -- parser )
  [
    [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
    [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , "=>" syntax , 'action' , ] seq* [ first3 >r <ebnf-var> r> <ebnf-action> ] action ,
    [ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
    'sequence' ,
  ] choice* ;
@ -237,22 +238,21 @@ GENERIC: (transform) ( ast -- parser )
 SYMBOL: parser
 SYMBOL: main
 SYMBOL: vars
 : transform ( ast -- object )
-  H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ;
+  H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
 M: ebnf (transform) ( ast -- parser )
  rules>> [ (transform) ] map peek ;
 M: ebnf-rule (transform) ( ast -- parser )
  dup elements>> 
-  vars get clone vars [ (transform) ] with-variable [
+  (transform) [
    swap symbol>> set
  ] keep ;
 M: ebnf-sequence (transform) ( ast -- parser )
-  elements>> [ (transform) ] map seq ;
+  elements>> [ (transform) ] map seq [ dup length 1 = [ first ] when ] action ;
 M: ebnf-choice (transform) ( ast -- parser )
  options>> [ (transform) ] map choice ;
@ -282,37 +282,62 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
 M: ebnf-optional (transform) ( ast -- parser )
  transform-group optional ;
-: build-locals ( string vars -- string )
+GENERIC: build-locals ( code ast -- code )
-  dup empty? [
+
-    drop
+M: ebnf-sequence build-locals ( code ast -- code )
-  ] [
+  elements>> dup [ ebnf-var? ] subset empty? [
    drop 
  ] [ 
    [
-      "USING: locals namespaces ;  [let* | " %
+      "USING: locals sequences ;  [let* | " %
-      [ dup % " [ \"" % % "\" get ] " % ] each
+        dup length swap [
-      " | " %
+          dup ebnf-var? [
-      %  
+            name>> % 
-      " ] with-locals" %     
+            " [ " % # " over nth ] " %
          ] [
            2drop
          ] if
        ] 2each
        " | " %
        %  
        " ] with-locals" %     
    ] "" make 
  ] if ;
 M: ebnf-var build-locals ( code ast -- )
  [
    "USING: locals kernel ;  [let* | " %
    name>> % " [ dup ] " %
    " | " %
    %  
    " ] with-locals" %     
  ] "" make ;
 M: object build-locals ( code ast -- )
  drop ;
 M: ebnf-action (transform) ( ast -- parser )
-  [ parser>> (transform) ] keep
+  [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals 
-  code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ;
+  string-lines [ parse-lines ] with-compilation-unit action ;
 M: ebnf-semantic (transform) ( ast -- parser )
-  [ parser>> (transform) ] keep
+  [ parser>> (transform) ] [ code>> ] [ parser>> ] tri build-locals 
-  code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ;
+  string-lines [ parse-lines ] with-compilation-unit semantic ;
 M: ebnf-var (transform) ( ast -- parser )
-  [ parser>> (transform) ] [ name>> ] bi 
+  parser>> (transform) ;
  dup vars get push [ dupd set ] curry action ;
 M: ebnf-terminal (transform) ( ast -- parser )
-  symbol>> token sp ;
+  symbol>> token ;
 : parser-not-found ( name -- * )
  [
    "Parser " % % " not found." %
  ] "" make throw ;
 M: ebnf-non-terminal (transform) ( ast -- parser )
  symbol>>  [
-    , parser get , \ at , \ sp ,   
+    , \ dup , parser get , \ at , [ parser-not-found ] , \ unless* , \ nip ,    
  ] [ ] make box ;
 : transform-ebnf ( string -- object )
@ -320,7 +345,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
 : check-parse-result ( result -- result )
  dup [
-    dup parse-result-remaining empty? [
+    dup parse-result-remaining [ blank? ] trim empty? [
      [ 
        "Unable to fully parse EBNF. Left to parse was: " %
        parse-result-remaining % 
@ -335,10 +360,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
  parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
  [ compiled-parse ] curry [ with-scope ] curry ;
-: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
+: replace-escapes ( string -- string )
  "\\t" token [ drop "\t" ] action  "\\n" token [ drop "\n" ] action 2choice replace ;
 : [EBNF "EBNF]" parse-multiline-string replace-escapes ebnf>quot nip parsed ; parsing
 : EBNF: 
  CREATE-WORD dup 
-  ";EBNF" parse-multiline-string
+  ";EBNF" parse-multiline-string replace-escapes
  ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
 : rule ( name word -- parser )
  #! Given an EBNF word produced from EBNF: return the EBNF rule
  "ebnf-parser" word-prop at ;
--- a/extra/peg/peg.factor
+++ b/extra/peg/peg.factor
@ -30,6 +30,14 @@ SYMBOL: fail
 SYMBOL: lrstack
 SYMBOL: heads
 : delegates ( -- cache )
  \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
 : reset-pegs ( -- )
  H{ } clone \ delegates set-global ;
 reset-pegs 
 TUPLE: memo-entry ans pos ;
 C: <memo-entry> memo-entry
@ -253,14 +261,6 @@ SYMBOL: id
    1 id set-global 0
  ] if* ;
 : delegates ( -- cache )
  \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
 : reset-delegates ( -- )
  H{ } clone \ delegates set-global ;
 reset-delegates 
 : init-parser ( parser -- parser )
  #! Set the delegate for the parser. Equivalent parsers
  #! get a delegate with the same id.
@ -590,7 +590,13 @@ PRIVATE>
  #! not a cached one. This is because the same box,
  #! compiled twice can have a different compiled word
  #! due to running at compile time.
-  box-parser construct-boa next-id f <parser> over set-delegate ;
+  #! Why the [ ] action at the end? Box parsers don't get
  #! memoized during parsing due to all box parsers being
  #! unique. This breaks left recursion detection during the
  #! parse. The action adds an indirection with a parser type
  #! that gets memoized and fixes this. Need to rethink how
  #! to fix boxes so this isn't needed...
  box-parser construct-boa next-id f <parser> over set-delegate [ ] action ;
 : PEG:
  (:) [
--- a/extra/peg/pl0/pl0-tests.factor
+++ b/extra/peg/pl0/pl0-tests.factor
@ -1,9 +1,45 @@
 ! Copyright (C) 2007 Chris Double.
 ! See http://factorcode.org/license.txt for BSD license.
 !
-USING: kernel tools.test peg peg.pl0 multiline sequences ;
+USING: kernel tools.test peg peg.ebnf peg.pl0 multiline sequences ;
 IN: peg.pl0.tests
 { t } [
  "CONST foo = 1;" "block" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "VAR foo;" "block" \ pl0 rule parse parse-result-remaining empty?
 ] unit-test
 { t } [
  "VAR foo,bar , baz;" "block" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "BEGIN foo := 5 END" "statement" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  "PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse parse-result-remaining empty? 
 ] unit-test
 { t } [
  <"
 VAR x, squ;
--- a/extra/peg/pl0/pl0.factor
+++ b/extra/peg/pl0/pl0.factor
@ -7,20 +7,52 @@ IN: peg.pl0
 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
 EBNF: pl0 
-block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
+_ = (" " | "\t" | "\n")* => [[ drop ignore ]]
-        ( "VAR" ident ( "," ident )* ";" )?
+
-        ( "PROCEDURE" ident ";" ( block ";" )? )* statement 
+BEGIN       = "BEGIN" _
-statement = ( ident ":=" expression | "CALL" ident |
+CALL        = "CALL" _
-              "BEGIN" statement (";" statement )* "END" |
+CONST       = "CONST" _
-              "IF" condition "THEN" statement |
+DO          = "DO" _
-              "WHILE" condition "DO" statement )?
+END         = "END" _
-condition = "ODD" expression |
+IF          = "IF" _
-            expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression 
+THEN        = "THEN" _
-expression = ("+" | "-")? term (("+" | "-") term )* 
+ODD         = "ODD" _
-term = factor (("*" | "/") factor )* 
+PROCEDURE   = "PROCEDURE" _
-factor = ident | number | "(" expression ")"
+VAR         = "VAR" _
-ident = (([a-zA-Z])+) [[ >string ]]
+WHILE       = "WHILE" _
-digit = ([0-9]) [[ digit> ]]
+EQ          = "=" _
-number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
+LTEQ        = "<=" _
-program = block "."
+LT          = "<" _
 GT          = ">" _
 GTEQ        = ">=" _
 NEQ         = "#" _
 COMMA       = "," _
 SEMICOLON   = ";" _
 ASSIGN      = ":=" _
 ADD         = "+" _
 SUBTRACT    = "-" _
 MULTIPLY    = "*" _
 DIVIDE      = "/" _
 LPAREN      = "(" _
 RPAREN      = ")" _
 block       =  ( CONST ident EQ number ( COMMA ident EQ number )* SEMICOLON )? 
               ( VAR ident ( COMMA ident )* SEMICOLON )? 
               ( PROCEDURE ident SEMICOLON ( block SEMICOLON )? )* statement 
 statement   =  (  ident ASSIGN expression 
                | CALL ident 
                | BEGIN statement ( SEMICOLON statement )* END 
                | IF condition THEN statement 
                | WHILE condition DO statement )?  
 condition   =  ODD expression 
             | expression (EQ | NEQ | LTEQ | LT | GTEQ | GT) expression
 expression  = (ADD | SUBTRACT)? term ( (ADD | SUBTRACT) term )* _
 term        = factor ( (MULTIPLY | DIVIDE) factor )* 
 factor      = ident | number | LPAREN expression RPAREN
 ident       = (([a-zA-Z])+) _ => [[ >string ]]
 digit       = ([0-9])         => [[ digit> ]]
 number      = ((digit)+) _    => [[ 10 digits>integer ]]
 program     = _ block "."
 ;EBNF