Merge branch 'master' of git://double.co.nz/git/factor
						commit
						21ad2dace2
					
				| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
!
 | 
			
		||||
USING: kernel tools.test peg peg.ebnf ;
 | 
			
		||||
USING: kernel parser words tools.test peg peg.ebnf compiler.units ;
 | 
			
		||||
IN: peg.ebnf.tests
 | 
			
		||||
 | 
			
		||||
{ T{ ebnf-non-terminal f "abc" } } [
 | 
			
		||||
| 
						 | 
				
			
			@ -15,12 +15,9 @@ IN: peg.ebnf.tests
 | 
			
		|||
{
 | 
			
		||||
  T{ ebnf-rule f 
 | 
			
		||||
     "digit"
 | 
			
		||||
     V{ 
 | 
			
		||||
     T{ ebnf-choice f
 | 
			
		||||
        V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
 | 
			
		||||
     }
 | 
			
		||||
       f
 | 
			
		||||
     }
 | 
			
		||||
  } 
 | 
			
		||||
} [
 | 
			
		||||
  "digit = '1' | '2'" 'rule' parse parse-result-ast
 | 
			
		||||
| 
						 | 
				
			
			@ -29,12 +26,9 @@ IN: peg.ebnf.tests
 | 
			
		|||
{
 | 
			
		||||
  T{ ebnf-rule f 
 | 
			
		||||
     "digit" 
 | 
			
		||||
     V{
 | 
			
		||||
     T{ ebnf-sequence f
 | 
			
		||||
        V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
 | 
			
		||||
     }
 | 
			
		||||
       f
 | 
			
		||||
     }
 | 
			
		||||
  }   
 | 
			
		||||
} [
 | 
			
		||||
  "digit = '1' '2'" 'rule' parse parse-result-ast
 | 
			
		||||
| 
						 | 
				
			
			@ -83,7 +77,7 @@ IN: peg.ebnf.tests
 | 
			
		|||
     }
 | 
			
		||||
  } 
 | 
			
		||||
} [
 | 
			
		||||
  "one {(two | three) four}" 'choice' parse parse-result-ast
 | 
			
		||||
  "one ((two | three) four)*" 'choice' parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -95,5 +89,57 @@ IN: peg.ebnf.tests
 | 
			
		|||
     }
 | 
			
		||||
  } 
 | 
			
		||||
} [
 | 
			
		||||
  "one [ two ] three" 'choice' parse parse-result-ast
 | 
			
		||||
  "one ( two )? three" 'choice' parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "foo" } [
 | 
			
		||||
  "\"foo\"" 'identifier' parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "foo" } [
 | 
			
		||||
  "'foo'" 'identifier' parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "foo" } [
 | 
			
		||||
  "foo" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "foo" } [
 | 
			
		||||
  "foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ V{ "a" "b" } } [
 | 
			
		||||
  "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ V{ 1 "b" } } [
 | 
			
		||||
  "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ V{ 1 2 } } [
 | 
			
		||||
  "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ CHAR: A } [
 | 
			
		||||
  "foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ CHAR: Z } [
 | 
			
		||||
  "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse  
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ CHAR: 0 } [
 | 
			
		||||
  "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse  
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse  
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -2,37 +2,93 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel parser words arrays strings math.parser sequences 
 | 
			
		||||
       quotations vectors namespaces math assocs continuations peg
 | 
			
		||||
       peg.parsers unicode.categories ;
 | 
			
		||||
       peg.parsers unicode.categories multiline combinators.lib 
 | 
			
		||||
       splitting ;
 | 
			
		||||
IN: peg.ebnf
 | 
			
		||||
 | 
			
		||||
TUPLE: ebnf-non-terminal symbol ;
 | 
			
		||||
TUPLE: ebnf-terminal symbol ;
 | 
			
		||||
TUPLE: ebnf-any-character ;
 | 
			
		||||
TUPLE: ebnf-range pattern ;
 | 
			
		||||
TUPLE: ebnf-ensure-not group ;
 | 
			
		||||
TUPLE: ebnf-choice options ;
 | 
			
		||||
TUPLE: ebnf-sequence elements ;
 | 
			
		||||
TUPLE: ebnf-repeat0 group ;
 | 
			
		||||
TUPLE: ebnf-repeat1 group ;
 | 
			
		||||
TUPLE: ebnf-optional elements ;
 | 
			
		||||
TUPLE: ebnf-rule symbol elements ;
 | 
			
		||||
TUPLE: ebnf-action word ;
 | 
			
		||||
TUPLE: ebnf-action parser code ;
 | 
			
		||||
TUPLE: ebnf rules ;
 | 
			
		||||
 | 
			
		||||
C: <ebnf-non-terminal> ebnf-non-terminal
 | 
			
		||||
C: <ebnf-terminal> ebnf-terminal
 | 
			
		||||
C: <ebnf-any-character> ebnf-any-character
 | 
			
		||||
C: <ebnf-range> ebnf-range
 | 
			
		||||
C: <ebnf-ensure-not> ebnf-ensure-not
 | 
			
		||||
C: <ebnf-choice> ebnf-choice
 | 
			
		||||
C: <ebnf-sequence> ebnf-sequence
 | 
			
		||||
C: <ebnf-repeat0> ebnf-repeat0
 | 
			
		||||
C: <ebnf-repeat1> ebnf-repeat1
 | 
			
		||||
C: <ebnf-optional> ebnf-optional
 | 
			
		||||
C: <ebnf-rule> ebnf-rule
 | 
			
		||||
C: <ebnf-action> ebnf-action
 | 
			
		||||
C: <ebnf> ebnf
 | 
			
		||||
 | 
			
		||||
GENERIC: (transform) ( ast -- parser )
 | 
			
		||||
 | 
			
		||||
: transform ( ast -- object )
 | 
			
		||||
  H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ;
 | 
			
		||||
 | 
			
		||||
M: ebnf (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-rules [ (transform) ] map peek ;
 | 
			
		||||
  
 | 
			
		||||
M: ebnf-rule (transform) ( ast -- parser )
 | 
			
		||||
  dup ebnf-rule-elements (transform) [
 | 
			
		||||
    swap ebnf-rule-symbol set
 | 
			
		||||
  ] keep ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-sequence (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-sequence-elements [ (transform) ] map seq ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-choice (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-choice-options [ (transform) ] map choice ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-any-character (transform) ( ast -- parser )
 | 
			
		||||
  drop any-char ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-range (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-range-pattern range-pattern ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-ensure-not (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-ensure-not-group (transform) ensure-not ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-repeat0 (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-repeat0-group (transform) repeat0 ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-repeat1 (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-repeat1-group (transform) repeat1 ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-optional (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-optional-elements (transform) optional ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-action (transform) ( ast -- parser )
 | 
			
		||||
  [ ebnf-action-parser (transform) ] keep
 | 
			
		||||
  ebnf-action-code string-lines parse-lines action ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-terminal (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-terminal-symbol token sp ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-non-terminal (transform) ( ast -- parser )
 | 
			
		||||
  ebnf-non-terminal-symbol  [
 | 
			
		||||
    , "parser" get , \ at ,  
 | 
			
		||||
  ] [ ] make delay ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: parsers
 | 
			
		||||
SYMBOL: non-terminals
 | 
			
		||||
SYMBOL: last-parser
 | 
			
		||||
 | 
			
		||||
: reset-parser-generation ( -- ) 
 | 
			
		||||
  V{ } clone parsers set 
 | 
			
		||||
  H{ } clone non-terminals set 
 | 
			
		||||
  f last-parser set ;
 | 
			
		||||
  H{ } clone non-terminals set ;
 | 
			
		||||
 | 
			
		||||
: store-parser ( parser -- number )
 | 
			
		||||
  parsers get [ push ] keep length 1- ;
 | 
			
		||||
| 
						 | 
				
			
			@ -50,7 +106,7 @@ SYMBOL: last-parser
 | 
			
		|||
GENERIC: (generate-parser) ( ast -- id )
 | 
			
		||||
 | 
			
		||||
: generate-parser ( ast -- id )
 | 
			
		||||
  (generate-parser) dup last-parser set ;
 | 
			
		||||
  (generate-parser) ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-terminal (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-terminal-symbol token sp store-parser ;
 | 
			
		||||
| 
						 | 
				
			
			@ -61,6 +117,12 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id )
 | 
			
		|||
    parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
 | 
			
		||||
  ] [ ] make delay sp store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-any-character (generate-parser) ( ast -- id )
 | 
			
		||||
  drop [ drop t ] satisfy store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-range (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-range-pattern range-pattern store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-choice (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-choice-options [
 | 
			
		||||
    generate-parser get-parser 
 | 
			
		||||
| 
						 | 
				
			
			@ -71,9 +133,15 @@ M: ebnf-sequence (generate-parser) ( ast -- id )
 | 
			
		|||
    generate-parser get-parser
 | 
			
		||||
  ] map seq store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-ensure-not (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-repeat0 (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-repeat1 (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-optional (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-optional-elements generate-parser get-parser optional store-parser ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -83,15 +151,12 @@ M: ebnf-rule (generate-parser) ( ast -- id )
 | 
			
		|||
  swap [ parsers get set-nth ] keep ;
 | 
			
		||||
 | 
			
		||||
M: ebnf-action (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-action-word search 1quotation 
 | 
			
		||||
  last-parser get get-parser swap action store-parser ;
 | 
			
		||||
  [ ebnf-action-parser generate-parser get-parser ] keep
 | 
			
		||||
  ebnf-action-code string-lines parse-lines action store-parser ;
 | 
			
		||||
 | 
			
		||||
M: vector (generate-parser) ( ast -- id )
 | 
			
		||||
  [ generate-parser ] map peek ;
 | 
			
		||||
 | 
			
		||||
M: f (generate-parser) ( ast -- id )
 | 
			
		||||
  drop last-parser get ;
 | 
			
		||||
 | 
			
		||||
M: ebnf (generate-parser) ( ast -- id )
 | 
			
		||||
  ebnf-rules [
 | 
			
		||||
    generate-parser 
 | 
			
		||||
| 
						 | 
				
			
			@ -99,43 +164,147 @@ M: ebnf (generate-parser) ( ast -- id )
 | 
			
		|||
 | 
			
		||||
DEFER: 'rhs'
 | 
			
		||||
 | 
			
		||||
: syntax ( string -- parser )
 | 
			
		||||
  #! Parses the string, ignoring white space, and
 | 
			
		||||
  #! does not put the result in the AST.
 | 
			
		||||
  token sp hide ;
 | 
			
		||||
 | 
			
		||||
: syntax-pack ( begin parser end -- parser )
 | 
			
		||||
  #! Parse 'parser' surrounded by syntax elements
 | 
			
		||||
  #! begin and end.
 | 
			
		||||
  [ syntax ] dipd syntax pack ;
 | 
			
		||||
 | 
			
		||||
: 'identifier' ( -- parser )
 | 
			
		||||
  #! Return a parser that parses an identifer delimited by
 | 
			
		||||
  #! a quotation character. The quotation can be single
 | 
			
		||||
  #! or double quotes. The AST produced is the identifier
 | 
			
		||||
  #! between the quotes.
 | 
			
		||||
  [
 | 
			
		||||
    [ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by ,
 | 
			
		||||
    [ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by ,
 | 
			
		||||
  ] choice* [ >string ] action ;
 | 
			
		||||
  
 | 
			
		||||
: 'non-terminal' ( -- parser )
 | 
			
		||||
  CHAR: a CHAR: z range "-" token [ first ] action  2array choice repeat1 [ >string <ebnf-non-terminal> ] action ;
 | 
			
		||||
  #! A non-terminal is the name of another rule. It can
 | 
			
		||||
  #! be any non-blank character except for characters used
 | 
			
		||||
  #! in the EBNF syntax itself.
 | 
			
		||||
  [
 | 
			
		||||
    {
 | 
			
		||||
      [ dup blank?    ]
 | 
			
		||||
      [ dup CHAR: " = ]
 | 
			
		||||
      [ dup CHAR: ' = ]
 | 
			
		||||
      [ dup CHAR: | = ]
 | 
			
		||||
      [ dup CHAR: { = ]
 | 
			
		||||
      [ dup CHAR: } = ]
 | 
			
		||||
      [ dup CHAR: = = ]
 | 
			
		||||
      [ dup CHAR: ) = ]
 | 
			
		||||
      [ dup CHAR: ( = ]
 | 
			
		||||
      [ dup CHAR: ] = ]
 | 
			
		||||
      [ dup CHAR: [ = ]
 | 
			
		||||
      [ dup CHAR: . = ]
 | 
			
		||||
      [ dup CHAR: ! = ]
 | 
			
		||||
      [ dup CHAR: * = ]
 | 
			
		||||
      [ dup CHAR: + = ]
 | 
			
		||||
      [ dup CHAR: ? = ]
 | 
			
		||||
    } || not nip    
 | 
			
		||||
  ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
 | 
			
		||||
 | 
			
		||||
: 'terminal' ( -- parser )
 | 
			
		||||
  "'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string <ebnf-terminal> ] action ;
 | 
			
		||||
  #! A terminal is an identifier enclosed in quotations
 | 
			
		||||
  #! and it represents the literal value of the identifier.
 | 
			
		||||
  'identifier' [ <ebnf-terminal> ] action ;
 | 
			
		||||
 | 
			
		||||
: 'any-character' ( -- parser )
 | 
			
		||||
  #! A parser to match the symbol for any character match.
 | 
			
		||||
  [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
 | 
			
		||||
 | 
			
		||||
: 'range-parser' ( -- parser )
 | 
			
		||||
  #! Match the syntax for declaring character ranges
 | 
			
		||||
  [
 | 
			
		||||
    [ "[" syntax , "[" token ensure-not , ] seq* hide ,
 | 
			
		||||
    [ CHAR: ] = not ] satisfy repeat1 , 
 | 
			
		||||
    "]" syntax ,
 | 
			
		||||
  ] seq* [ first >string <ebnf-range> ] action ;
 | 
			
		||||
 
 | 
			
		||||
: 'element' ( -- parser )
 | 
			
		||||
  'non-terminal' 'terminal' 2array choice ;
 | 
			
		||||
  #! An element of a rule. It can be a terminal or a 
 | 
			
		||||
  #! non-terminal but must not be followed by a "=". 
 | 
			
		||||
  #! The latter indicates that it is the beginning of a
 | 
			
		||||
  #! new rule.
 | 
			
		||||
  [
 | 
			
		||||
    [ 
 | 
			
		||||
      'non-terminal' ,
 | 
			
		||||
      'terminal' ,
 | 
			
		||||
      'range-parser' ,
 | 
			
		||||
      'any-character' ,
 | 
			
		||||
    ] choice* ,
 | 
			
		||||
    "=" syntax ensure-not ,
 | 
			
		||||
  ] seq* [ first ] action ;
 | 
			
		||||
 | 
			
		||||
DEFER: 'choice'
 | 
			
		||||
 | 
			
		||||
: grouped ( quot suffix  -- parser )
 | 
			
		||||
  #! Parse a group of choices, with a suffix indicating
 | 
			
		||||
  #! the type of group (repeat0, repeat1, etc) and
 | 
			
		||||
  #! an quot that is the action that produces the AST.
 | 
			
		||||
  "(" [ 'choice' sp ] delay ")" syntax-pack 
 | 
			
		||||
  swap 2seq  
 | 
			
		||||
  [ first ] rot compose action ;
 | 
			
		||||
  
 | 
			
		||||
: 'group' ( -- parser )
 | 
			
		||||
  "(" token sp hide
 | 
			
		||||
  [ 'choice' sp ] delay
 | 
			
		||||
  ")" token sp hide 
 | 
			
		||||
  3array seq [ first ] action ;
 | 
			
		||||
  #! A grouping with no suffix. Used for precedence.
 | 
			
		||||
  [ ] [
 | 
			
		||||
    "*" token sp ensure-not ,
 | 
			
		||||
    "+" token sp ensure-not ,
 | 
			
		||||
    "?" token sp ensure-not ,
 | 
			
		||||
  ] seq* hide grouped ; 
 | 
			
		||||
 | 
			
		||||
: 'repeat0' ( -- parser )
 | 
			
		||||
  "{" token sp hide
 | 
			
		||||
  [ 'choice' sp ] delay
 | 
			
		||||
  "}" token sp hide 
 | 
			
		||||
  3array seq [ first <ebnf-repeat0> ] action ;
 | 
			
		||||
  [ <ebnf-repeat0> ] "*" syntax grouped ;
 | 
			
		||||
 | 
			
		||||
: 'repeat1' ( -- parser )
 | 
			
		||||
  [ <ebnf-repeat1> ] "+" syntax grouped ;
 | 
			
		||||
 | 
			
		||||
: 'optional' ( -- parser )
 | 
			
		||||
  "[" token sp hide
 | 
			
		||||
  [ 'choice' sp ] delay
 | 
			
		||||
  "]" token sp hide 
 | 
			
		||||
  3array seq [ first <ebnf-optional> ] action ;
 | 
			
		||||
  [ <ebnf-optional> ] "?" syntax grouped ;
 | 
			
		||||
 | 
			
		||||
: 'sequence' ( -- parser )
 | 
			
		||||
: 'factor-code' ( -- parser )
 | 
			
		||||
  [
 | 
			
		||||
    "]]" token ensure-not ,
 | 
			
		||||
    [ drop t ] satisfy ,
 | 
			
		||||
  ] seq* [ first ] action repeat0 [ >string ] action ;
 | 
			
		||||
 | 
			
		||||
: 'ensure-not' ( -- parser )
 | 
			
		||||
  #! Parses the '!' syntax to ensure that 
 | 
			
		||||
  #! something that matches the following elements do
 | 
			
		||||
  #! not exist in the parse stream.
 | 
			
		||||
  [
 | 
			
		||||
    "!" syntax ,
 | 
			
		||||
    'group' sp ,
 | 
			
		||||
  ] seq* [ first <ebnf-ensure-not> ] action ;
 | 
			
		||||
 | 
			
		||||
: ('sequence') ( -- parser )
 | 
			
		||||
  #! A sequence of terminals and non-terminals, including
 | 
			
		||||
  #! groupings of those. 
 | 
			
		||||
  [ 
 | 
			
		||||
    'ensure-not' sp ,
 | 
			
		||||
    'element' sp ,
 | 
			
		||||
    'group' sp , 
 | 
			
		||||
    'repeat0' sp ,
 | 
			
		||||
    'repeat1' sp ,
 | 
			
		||||
    'optional' sp , 
 | 
			
		||||
   ] { } make  choice  
 | 
			
		||||
   repeat1 [ 
 | 
			
		||||
  ] choice* ;  
 | 
			
		||||
 | 
			
		||||
: 'sequence' ( -- parser )
 | 
			
		||||
  #! A sequence of terminals and non-terminals, including
 | 
			
		||||
  #! groupings of those. 
 | 
			
		||||
  [
 | 
			
		||||
    [ 
 | 
			
		||||
      ('sequence') ,
 | 
			
		||||
      "[[" 'factor-code' "]]" syntax-pack ,
 | 
			
		||||
    ] seq* [ first2 <ebnf-action> ] action ,
 | 
			
		||||
    ('sequence') ,
 | 
			
		||||
  ] choice* repeat1 [ 
 | 
			
		||||
     dup length 1 = [ first ] [ <ebnf-sequence> ] if
 | 
			
		||||
  ] action ;
 | 
			
		||||
  
 | 
			
		||||
| 
						 | 
				
			
			@ -144,22 +313,15 @@ DEFER: 'choice'
 | 
			
		|||
    dup length 1 = [ first ] [ <ebnf-choice> ] if
 | 
			
		||||
  ] action ;
 | 
			
		||||
 
 | 
			
		||||
: 'action' ( -- parser )
 | 
			
		||||
  "=>" token hide
 | 
			
		||||
  [ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp
 | 
			
		||||
  2array seq [ first <ebnf-action> ] action ;
 | 
			
		||||
  
 | 
			
		||||
: 'rhs' ( -- parser )
 | 
			
		||||
  'choice' 'action' sp optional 2array seq ;
 | 
			
		||||
 
 | 
			
		||||
: 'rule' ( -- parser )
 | 
			
		||||
  'non-terminal' [ ebnf-non-terminal-symbol ] action 
 | 
			
		||||
  "=" token sp hide 
 | 
			
		||||
  'rhs' 
 | 
			
		||||
  3array seq [ first2 <ebnf-rule> ] action ;
 | 
			
		||||
  [
 | 
			
		||||
    'non-terminal' [ ebnf-non-terminal-symbol ] action  ,
 | 
			
		||||
    "=" syntax  ,
 | 
			
		||||
    'choice'  ,
 | 
			
		||||
  ] seq* [ first2 <ebnf-rule> ] action ;
 | 
			
		||||
 | 
			
		||||
: 'ebnf' ( -- parser )
 | 
			
		||||
  'rule' sp "." token sp hide list-of [ <ebnf> ] action ;
 | 
			
		||||
  'rule' sp repeat1 [ <ebnf> ] action ;
 | 
			
		||||
 | 
			
		||||
: ebnf>quot ( string -- quot )
 | 
			
		||||
  'ebnf' parse [
 | 
			
		||||
| 
						 | 
				
			
			@ -182,4 +344,7 @@ DEFER: 'choice'
 | 
			
		|||
    f
 | 
			
		||||
   ] if* ;
 | 
			
		||||
 | 
			
		||||
: <EBNF "EBNF>" parse-tokens " " join ebnf>quot call ; parsing
 | 
			
		||||
: transform-ebnf ( string -- object )
 | 
			
		||||
  'ebnf' parse parse-result-ast transform ;
 | 
			
		||||
 | 
			
		||||
: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Chris Double
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,25 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
!
 | 
			
		||||
USING: kernel tools.test peg peg.expr multiline sequences ;
 | 
			
		||||
IN: temporary
 | 
			
		||||
 | 
			
		||||
{ 5 } [
 | 
			
		||||
  "2+3" eval-expr 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ 6 } [
 | 
			
		||||
  "2*3" eval-expr 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ 14 } [
 | 
			
		||||
  "2+3*4" eval-expr 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ 17 } [
 | 
			
		||||
  "2+3*4+3" eval-expr 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ 23 } [
 | 
			
		||||
  "2+3*(4+3)" eval-expr 
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,30 @@
 | 
			
		|||
! Copyright (C) 2008 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel arrays strings math.parser sequences
 | 
			
		||||
peg peg.ebnf peg.parsers memoize math ;
 | 
			
		||||
IN: peg.expr
 | 
			
		||||
 | 
			
		||||
: operator-fold ( lhs seq -- value )
 | 
			
		||||
 #! Perform a fold of a lhs, followed by a sequence of pairs being
 | 
			
		||||
 #! { operator rhs } in to a tree structure of the correct precedence.
 | 
			
		||||
 swap [ first2 swap call ] reduce ;
 | 
			
		||||
 | 
			
		||||
<EBNF
 | 
			
		||||
 | 
			
		||||
times    = ("*") [[ drop [ * ] ]]
 | 
			
		||||
divide   = ("/") [[ drop [ / ] ]]
 | 
			
		||||
add      = ("+") [[ drop [ + ] ]]
 | 
			
		||||
subtract = ("-") [[ drop [ - ] ]]
 | 
			
		||||
 | 
			
		||||
digit    = ([0-9]) [[ digit> ]]
 | 
			
		||||
number   = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
 | 
			
		||||
 | 
			
		||||
value    = number | ("(" expr ")") [[ second ]] 
 | 
			
		||||
product = (value ((times | divide) value)*) [[ first2 operator-fold ]]
 | 
			
		||||
sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]]
 | 
			
		||||
expr = sum
 | 
			
		||||
EBNF>
 | 
			
		||||
 | 
			
		||||
: eval-expr ( string -- number )
 | 
			
		||||
  expr parse parse-result-ast ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Simple expression evaluator using EBNF
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
parsing
 | 
			
		||||
| 
						 | 
				
			
			@ -159,3 +159,21 @@ HELP: 'string'
 | 
			
		|||
} { $description
 | 
			
		||||
    "Returns a parser that matches an string composed of a \", anything that is not \", and another \"."
 | 
			
		||||
} { $see-also 'integer' } ;
 | 
			
		||||
 | 
			
		||||
HELP: range-pattern
 | 
			
		||||
{ $values
 | 
			
		||||
    { "pattern" "a string" }
 | 
			
		||||
    { "parser" "a parser" }
 | 
			
		||||
} { $description
 | 
			
		||||
"Returns a parser that matches a single character based on the set "
 | 
			
		||||
"of characters in the pattern string."
 | 
			
		||||
"Any single character in the pattern matches that character. "
 | 
			
		||||
"If the pattern begins with a ^ then the set is negated "
 | 
			
		||||
"(the element matches any character not in the set). Any pair "
 | 
			
		||||
"of characters separated with a dash (-) represents the "
 | 
			
		||||
"range of characters from the first to the second, inclusive."
 | 
			
		||||
{ $examples
 | 
			
		||||
    { $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" } 
 | 
			
		||||
    { $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" } 
 | 
			
		||||
}
 | 
			
		||||
}  ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,8 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel sequences strings namespaces math assocs shuffle 
 | 
			
		||||
     vectors arrays combinators.lib memoize math.parser match
 | 
			
		||||
     unicode.categories sequences.deep peg peg.private ;
 | 
			
		||||
     unicode.categories sequences.deep peg peg.private 
 | 
			
		||||
     peg.search math.ranges ;
 | 
			
		||||
IN: peg.parsers
 | 
			
		||||
 | 
			
		||||
TUPLE: just-parser p1 ;
 | 
			
		||||
| 
						 | 
				
			
			@ -83,3 +84,30 @@ MEMO: 'string' ( -- parser )
 | 
			
		|||
    [ CHAR: " = not ] satisfy repeat0 ,
 | 
			
		||||
    [ CHAR: " = ] satisfy hide ,
 | 
			
		||||
  ] { } make seq [ first >string ] action ;
 | 
			
		||||
 | 
			
		||||
: (range-pattern) ( pattern -- string )
 | 
			
		||||
  #! Given a range pattern, produce a string containing
 | 
			
		||||
  #! all characters within that range.
 | 
			
		||||
  [ 
 | 
			
		||||
    any-char , 
 | 
			
		||||
    [ CHAR: - = ] satisfy hide , 
 | 
			
		||||
    any-char , 
 | 
			
		||||
  ] seq* [
 | 
			
		||||
    first2 [a,b] >string    
 | 
			
		||||
  ] action
 | 
			
		||||
  replace ;
 | 
			
		||||
 | 
			
		||||
MEMO: range-pattern ( pattern -- parser )
 | 
			
		||||
  #! 'pattern' is a set of characters describing the
 | 
			
		||||
  #! parser to be produced. Any single character in
 | 
			
		||||
  #! the pattern matches that character. If the pattern
 | 
			
		||||
  #! begins with a ^ then the set is negated (the element
 | 
			
		||||
  #! matches any character not in the set). Any pair of
 | 
			
		||||
  #! characters separated with a dash (-) represents the
 | 
			
		||||
  #! range of characters from the first to the second,
 | 
			
		||||
  #! inclusive.
 | 
			
		||||
  dup first CHAR: ^ = [
 | 
			
		||||
    1 tail (range-pattern) [ member? not ] curry satisfy 
 | 
			
		||||
  ] [
 | 
			
		||||
    (range-pattern) [ member? ] curry satisfy
 | 
			
		||||
  ] if ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
!
 | 
			
		||||
USING: kernel tools.test peg peg.pl0 ;
 | 
			
		||||
USING: kernel tools.test peg peg.pl0 multiline sequences ;
 | 
			
		||||
IN: peg.pl0.tests
 | 
			
		||||
 | 
			
		||||
{ "abc" } [
 | 
			
		||||
| 
						 | 
				
			
			@ -11,3 +11,89 @@ IN: peg.pl0.tests
 | 
			
		|||
{ 55 } [
 | 
			
		||||
  "55abc" number parse parse-result-ast 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ t } [
 | 
			
		||||
  <"
 | 
			
		||||
VAR x, squ;
 | 
			
		||||
 | 
			
		||||
PROCEDURE square;
 | 
			
		||||
BEGIN
 | 
			
		||||
   squ := x * x
 | 
			
		||||
END;
 | 
			
		||||
 | 
			
		||||
BEGIN
 | 
			
		||||
   x := 1;
 | 
			
		||||
   WHILE x <= 10 DO
 | 
			
		||||
   BEGIN
 | 
			
		||||
      CALL square;
 | 
			
		||||
      x := x + 1;
 | 
			
		||||
   END
 | 
			
		||||
END.
 | 
			
		||||
"> program parse parse-result-remaining empty?
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  <"
 | 
			
		||||
CONST
 | 
			
		||||
  m =  7,
 | 
			
		||||
  n = 85;
 | 
			
		||||
 | 
			
		||||
VAR
 | 
			
		||||
  x, y, z, q, r;
 | 
			
		||||
 | 
			
		||||
PROCEDURE multiply;
 | 
			
		||||
VAR a, b;
 | 
			
		||||
 | 
			
		||||
BEGIN
 | 
			
		||||
  a := x;
 | 
			
		||||
  b := y;
 | 
			
		||||
  z := 0;
 | 
			
		||||
  WHILE b > 0 DO BEGIN
 | 
			
		||||
    IF ODD b THEN z := z + a;
 | 
			
		||||
    a := 2 * a;
 | 
			
		||||
    b := b / 2;
 | 
			
		||||
  END
 | 
			
		||||
END;
 | 
			
		||||
 | 
			
		||||
PROCEDURE divide;
 | 
			
		||||
VAR w;
 | 
			
		||||
BEGIN
 | 
			
		||||
  r := x;
 | 
			
		||||
  q := 0;
 | 
			
		||||
  w := y;
 | 
			
		||||
  WHILE w <= r DO w := 2 * w;
 | 
			
		||||
  WHILE w > y DO BEGIN
 | 
			
		||||
    q := 2 * q;
 | 
			
		||||
    w := w / 2;
 | 
			
		||||
    IF w <= r THEN BEGIN
 | 
			
		||||
      r := r - w;
 | 
			
		||||
      q := q + 1
 | 
			
		||||
    END
 | 
			
		||||
  END
 | 
			
		||||
END;
 | 
			
		||||
 | 
			
		||||
PROCEDURE gcd;
 | 
			
		||||
VAR f, g;
 | 
			
		||||
BEGIN
 | 
			
		||||
  f := x;
 | 
			
		||||
  g := y;
 | 
			
		||||
  WHILE f # g DO BEGIN
 | 
			
		||||
    IF f < g THEN g := g - f;
 | 
			
		||||
    IF g < f THEN f := f - g;
 | 
			
		||||
  END;
 | 
			
		||||
  z := f
 | 
			
		||||
END;
 | 
			
		||||
 | 
			
		||||
BEGIN
 | 
			
		||||
  x := m;
 | 
			
		||||
  y := n;
 | 
			
		||||
  CALL multiply;
 | 
			
		||||
  x := 25;
 | 
			
		||||
  y :=  3;
 | 
			
		||||
  CALL divide;
 | 
			
		||||
  x := 84;
 | 
			
		||||
  y := 36;
 | 
			
		||||
  CALL gcd;
 | 
			
		||||
END.
 | 
			
		||||
  "> program parse parse-result-remaining empty?
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -1,30 +1,26 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel arrays strings math.parser sequences
 | 
			
		||||
peg peg.ebnf peg.parsers memoize ;
 | 
			
		||||
peg peg.ebnf peg.parsers memoize namespaces math ;
 | 
			
		||||
IN: peg.pl0
 | 
			
		||||
 | 
			
		||||
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
 | 
			
		||||
MEMO: ident ( -- parser )
 | 
			
		||||
  CHAR: a CHAR: z range 
 | 
			
		||||
  CHAR: A CHAR: Z range 2array choice repeat1 
 | 
			
		||||
  [ >string ] action ;
 | 
			
		||||
 | 
			
		||||
MEMO: number ( -- parser )
 | 
			
		||||
  CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ;
 | 
			
		||||
 | 
			
		||||
<EBNF
 | 
			
		||||
program = block '.' .
 | 
			
		||||
block = [ 'const' ident '=' number { ',' ident '=' number } ';' ]
 | 
			
		||||
        [ 'var' ident { ',' ident } ';' ]
 | 
			
		||||
        { 'procedure' ident ';' [ block ';' ] } statement .
 | 
			
		||||
statement = [ ident ':=' expression | 'call' ident |
 | 
			
		||||
              'begin' statement {';' statement } 'end' |
 | 
			
		||||
              'if' condition 'then' statement |
 | 
			
		||||
              'while' condition 'do' statement ] .
 | 
			
		||||
condition = 'odd' expression |
 | 
			
		||||
            expression ('=' | '#' | '<=' | '<' | '>=' | '>') expression .
 | 
			
		||||
expression = ['+' | '-'] term {('+' | '-') term } .
 | 
			
		||||
term = factor {('*' | '/') factor } .
 | 
			
		||||
factor = ident | number | '(' expression ')'
 | 
			
		||||
program = block "." 
 | 
			
		||||
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
 | 
			
		||||
        ( "VAR" ident ( "," ident )* ";" )?
 | 
			
		||||
        ( "PROCEDURE" ident ";" ( block ";" )? )* statement 
 | 
			
		||||
statement = ( ident ":=" expression | "CALL" ident |
 | 
			
		||||
              "BEGIN" statement (";" statement )* "END" |
 | 
			
		||||
              "IF" condition "THEN" statement |
 | 
			
		||||
              "WHILE" condition "DO" statement )?
 | 
			
		||||
condition = "ODD" expression |
 | 
			
		||||
            expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression 
 | 
			
		||||
expression = ("+" | "-")? term (("+" | "-") term )* 
 | 
			
		||||
term = factor (("*" | "/") factor )* 
 | 
			
		||||
factor = ident | number | "(" expression ")"
 | 
			
		||||
ident = (([a-zA-Z])+) [[ >string ]]
 | 
			
		||||
digit = ([0-9]) [[ digit> ]]
 | 
			
		||||
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
 | 
			
		||||
EBNF>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue