Merge branch 'master' of git://double.co.nz/git/factor

db4
Daniel Ehrenberg 2008-03-19 23:08:23 -04:00
commit 21ad2dace2
11 changed files with 479 additions and 82 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.ebnf ;
USING: kernel parser words tools.test peg peg.ebnf compiler.units ;
IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [
@ -15,11 +15,8 @@ IN: peg.ebnf.tests
{
T{ ebnf-rule f
"digit"
V{
T{ ebnf-choice f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
f
T{ ebnf-choice f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
}
} [
@ -29,11 +26,8 @@ IN: peg.ebnf.tests
{
T{ ebnf-rule f
"digit"
V{
T{ ebnf-sequence f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
f
T{ ebnf-sequence f
V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
}
}
} [
@ -83,7 +77,7 @@ IN: peg.ebnf.tests
}
}
} [
"one {(two | three) four}" 'choice' parse parse-result-ast
"one ((two | three) four)*" 'choice' parse parse-result-ast
] unit-test
{
@ -95,5 +89,57 @@ IN: peg.ebnf.tests
}
}
} [
"one [ two ] three" 'choice' parse parse-result-ast
"one ( two )? three" 'choice' parse parse-result-ast
] unit-test
{ "foo" } [
"\"foo\"" 'identifier' parse parse-result-ast
] unit-test
{ "foo" } [
"'foo'" 'identifier' parse parse-result-ast
] unit-test
{ "foo" } [
"foo" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol
] unit-test
{ "foo" } [
"foo]" 'non-terminal' parse parse-result-ast ebnf-non-terminal-symbol
] unit-test
{ V{ "a" "b" } } [
"foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast
] unit-test
{ V{ 1 "b" } } [
"foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast
] unit-test
{ V{ 1 2 } } [
"foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast
] unit-test
{ CHAR: A } [
"foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast
] unit-test
{ CHAR: Z } [
"foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast
] unit-test
{ f } [
"foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse
] unit-test
{ CHAR: 0 } [
"foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast
] unit-test
{ f } [
"foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse
] unit-test
{ f } [
"foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse
] unit-test

View File

@ -2,37 +2,93 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories ;
peg.parsers unicode.categories multiline combinators.lib
splitting ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
TUPLE: ebnf-terminal symbol ;
TUPLE: ebnf-any-character ;
TUPLE: ebnf-range pattern ;
TUPLE: ebnf-ensure-not group ;
TUPLE: ebnf-choice options ;
TUPLE: ebnf-sequence elements ;
TUPLE: ebnf-repeat0 group ;
TUPLE: ebnf-repeat1 group ;
TUPLE: ebnf-optional elements ;
TUPLE: ebnf-rule symbol elements ;
TUPLE: ebnf-action word ;
TUPLE: ebnf-action parser code ;
TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal
C: <ebnf-terminal> ebnf-terminal
C: <ebnf-any-character> ebnf-any-character
C: <ebnf-range> ebnf-range
C: <ebnf-ensure-not> ebnf-ensure-not
C: <ebnf-choice> ebnf-choice
C: <ebnf-sequence> ebnf-sequence
C: <ebnf-repeat0> ebnf-repeat0
C: <ebnf-repeat1> ebnf-repeat1
C: <ebnf-optional> ebnf-optional
C: <ebnf-rule> ebnf-rule
C: <ebnf-action> ebnf-action
C: <ebnf> ebnf
GENERIC: (transform) ( ast -- parser )
: transform ( ast -- object )
H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ;
M: ebnf (transform) ( ast -- parser )
ebnf-rules [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser )
dup ebnf-rule-elements (transform) [
swap ebnf-rule-symbol set
] keep ;
M: ebnf-sequence (transform) ( ast -- parser )
ebnf-sequence-elements [ (transform) ] map seq ;
M: ebnf-choice (transform) ( ast -- parser )
ebnf-choice-options [ (transform) ] map choice ;
M: ebnf-any-character (transform) ( ast -- parser )
drop any-char ;
M: ebnf-range (transform) ( ast -- parser )
ebnf-range-pattern range-pattern ;
M: ebnf-ensure-not (transform) ( ast -- parser )
ebnf-ensure-not-group (transform) ensure-not ;
M: ebnf-repeat0 (transform) ( ast -- parser )
ebnf-repeat0-group (transform) repeat0 ;
M: ebnf-repeat1 (transform) ( ast -- parser )
ebnf-repeat1-group (transform) repeat1 ;
M: ebnf-optional (transform) ( ast -- parser )
ebnf-optional-elements (transform) optional ;
M: ebnf-action (transform) ( ast -- parser )
[ ebnf-action-parser (transform) ] keep
ebnf-action-code string-lines parse-lines action ;
M: ebnf-terminal (transform) ( ast -- parser )
ebnf-terminal-symbol token sp ;
M: ebnf-non-terminal (transform) ( ast -- parser )
ebnf-non-terminal-symbol [
, "parser" get , \ at ,
] [ ] make delay ;
SYMBOL: parsers
SYMBOL: non-terminals
SYMBOL: last-parser
: reset-parser-generation ( -- )
V{ } clone parsers set
H{ } clone non-terminals set
f last-parser set ;
H{ } clone non-terminals set ;
: store-parser ( parser -- number )
parsers get [ push ] keep length 1- ;
@ -50,7 +106,7 @@ SYMBOL: last-parser
GENERIC: (generate-parser) ( ast -- id )
: generate-parser ( ast -- id )
(generate-parser) dup last-parser set ;
(generate-parser) ;
M: ebnf-terminal (generate-parser) ( ast -- id )
ebnf-terminal-symbol token sp store-parser ;
@ -61,6 +117,12 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id )
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
] [ ] make delay sp store-parser ;
M: ebnf-any-character (generate-parser) ( ast -- id )
drop [ drop t ] satisfy store-parser ;
M: ebnf-range (generate-parser) ( ast -- id )
ebnf-range-pattern range-pattern store-parser ;
M: ebnf-choice (generate-parser) ( ast -- id )
ebnf-choice-options [
generate-parser get-parser
@ -71,9 +133,15 @@ M: ebnf-sequence (generate-parser) ( ast -- id )
generate-parser get-parser
] map seq store-parser ;
M: ebnf-ensure-not (generate-parser) ( ast -- id )
ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ;
M: ebnf-repeat0 (generate-parser) ( ast -- id )
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
M: ebnf-repeat1 (generate-parser) ( ast -- id )
ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ;
M: ebnf-optional (generate-parser) ( ast -- id )
ebnf-optional-elements generate-parser get-parser optional store-parser ;
@ -83,15 +151,12 @@ M: ebnf-rule (generate-parser) ( ast -- id )
swap [ parsers get set-nth ] keep ;
M: ebnf-action (generate-parser) ( ast -- id )
ebnf-action-word search 1quotation
last-parser get get-parser swap action store-parser ;
[ ebnf-action-parser generate-parser get-parser ] keep
ebnf-action-code string-lines parse-lines action store-parser ;
M: vector (generate-parser) ( ast -- id )
[ generate-parser ] map peek ;
M: f (generate-parser) ( ast -- id )
drop last-parser get ;
M: ebnf (generate-parser) ( ast -- id )
ebnf-rules [
generate-parser
@ -99,67 +164,164 @@ M: ebnf (generate-parser) ( ast -- id )
DEFER: 'rhs'
: syntax ( string -- parser )
#! Parses the string, ignoring white space, and
#! does not put the result in the AST.
token sp hide ;
: syntax-pack ( begin parser end -- parser )
#! Parse 'parser' surrounded by syntax elements
#! begin and end.
[ syntax ] dipd syntax pack ;
: 'identifier' ( -- parser )
#! Return a parser that parses an identifer delimited by
#! a quotation character. The quotation can be single
#! or double quotes. The AST produced is the identifier
#! between the quotes.
[
[ CHAR: " = not ] satisfy repeat1 "\"" "\"" surrounded-by ,
[ CHAR: ' = not ] satisfy repeat1 "'" "'" surrounded-by ,
] choice* [ >string ] action ;
: 'non-terminal' ( -- parser )
CHAR: a CHAR: z range "-" token [ first ] action 2array choice repeat1 [ >string <ebnf-non-terminal> ] action ;
#! A non-terminal is the name of another rule. It can
#! be any non-blank character except for characters used
#! in the EBNF syntax itself.
[
{
[ dup blank? ]
[ dup CHAR: " = ]
[ dup CHAR: ' = ]
[ dup CHAR: | = ]
[ dup CHAR: { = ]
[ dup CHAR: } = ]
[ dup CHAR: = = ]
[ dup CHAR: ) = ]
[ dup CHAR: ( = ]
[ dup CHAR: ] = ]
[ dup CHAR: [ = ]
[ dup CHAR: . = ]
[ dup CHAR: ! = ]
[ dup CHAR: * = ]
[ dup CHAR: + = ]
[ dup CHAR: ? = ]
} || not nip
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
: 'terminal' ( -- parser )
"'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string <ebnf-terminal> ] action ;
#! A terminal is an identifier enclosed in quotations
#! and it represents the literal value of the identifier.
'identifier' [ <ebnf-terminal> ] action ;
: 'any-character' ( -- parser )
#! A parser to match the symbol for any character match.
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
: 'range-parser' ( -- parser )
#! Match the syntax for declaring character ranges
[
[ "[" syntax , "[" token ensure-not , ] seq* hide ,
[ CHAR: ] = not ] satisfy repeat1 ,
"]" syntax ,
] seq* [ first >string <ebnf-range> ] action ;
: 'element' ( -- parser )
'non-terminal' 'terminal' 2array choice ;
#! An element of a rule. It can be a terminal or a
#! non-terminal but must not be followed by a "=".
#! The latter indicates that it is the beginning of a
#! new rule.
[
[
'non-terminal' ,
'terminal' ,
'range-parser' ,
'any-character' ,
] choice* ,
"=" syntax ensure-not ,
] seq* [ first ] action ;
DEFER: 'choice'
: grouped ( quot suffix -- parser )
#! Parse a group of choices, with a suffix indicating
#! the type of group (repeat0, repeat1, etc) and
#! an quot that is the action that produces the AST.
"(" [ 'choice' sp ] delay ")" syntax-pack
swap 2seq
[ first ] rot compose action ;
: 'group' ( -- parser )
"(" token sp hide
[ 'choice' sp ] delay
")" token sp hide
3array seq [ first ] action ;
#! A grouping with no suffix. Used for precedence.
[ ] [
"*" token sp ensure-not ,
"+" token sp ensure-not ,
"?" token sp ensure-not ,
] seq* hide grouped ;
: 'repeat0' ( -- parser )
"{" token sp hide
[ 'choice' sp ] delay
"}" token sp hide
3array seq [ first <ebnf-repeat0> ] action ;
[ <ebnf-repeat0> ] "*" syntax grouped ;
: 'repeat1' ( -- parser )
[ <ebnf-repeat1> ] "+" syntax grouped ;
: 'optional' ( -- parser )
"[" token sp hide
[ 'choice' sp ] delay
"]" token sp hide
3array seq [ first <ebnf-optional> ] action ;
[ <ebnf-optional> ] "?" syntax grouped ;
: 'sequence' ( -- parser )
: 'factor-code' ( -- parser )
[
"]]" token ensure-not ,
[ drop t ] satisfy ,
] seq* [ first ] action repeat0 [ >string ] action ;
: 'ensure-not' ( -- parser )
#! Parses the '!' syntax to ensure that
#! something that matches the following elements do
#! not exist in the parse stream.
[
"!" syntax ,
'group' sp ,
] seq* [ first <ebnf-ensure-not> ] action ;
: ('sequence') ( -- parser )
#! A sequence of terminals and non-terminals, including
#! groupings of those.
[
'ensure-not' sp ,
'element' sp ,
'group' sp ,
'repeat0' sp ,
'repeat1' sp ,
'optional' sp ,
] { } make choice
repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if
] action ;
] choice* ;
: 'sequence' ( -- parser )
#! A sequence of terminals and non-terminals, including
#! groupings of those.
[
[
('sequence') ,
"[[" 'factor-code' "]]" syntax-pack ,
] seq* [ first2 <ebnf-action> ] action ,
('sequence') ,
] choice* repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if
] action ;
: 'choice' ( -- parser )
'sequence' sp "|" token sp list-of [
dup length 1 = [ first ] [ <ebnf-choice> ] if
] action ;
: 'action' ( -- parser )
"=>" token hide
[ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp
2array seq [ first <ebnf-action> ] action ;
: 'rhs' ( -- parser )
'choice' 'action' sp optional 2array seq ;
] action ;
: 'rule' ( -- parser )
'non-terminal' [ ebnf-non-terminal-symbol ] action
"=" token sp hide
'rhs'
3array seq [ first2 <ebnf-rule> ] action ;
[
'non-terminal' [ ebnf-non-terminal-symbol ] action ,
"=" syntax ,
'choice' ,
] seq* [ first2 <ebnf-rule> ] action ;
: 'ebnf' ( -- parser )
'rule' sp "." token sp hide list-of [ <ebnf> ] action ;
'rule' sp repeat1 [ <ebnf> ] action ;
: ebnf>quot ( string -- quot )
'ebnf' parse [
@ -182,4 +344,7 @@ DEFER: 'choice'
f
] if* ;
: <EBNF "EBNF>" parse-tokens " " join ebnf>quot call ; parsing
: transform-ebnf ( string -- object )
'ebnf' parse parse-result-ast transform ;
: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing

View File

@ -0,0 +1 @@
Chris Double

View File

@ -0,0 +1,25 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.expr multiline sequences ;
IN: temporary
{ 5 } [
"2+3" eval-expr
] unit-test
{ 6 } [
"2*3" eval-expr
] unit-test
{ 14 } [
"2+3*4" eval-expr
] unit-test
{ 17 } [
"2+3*4+3" eval-expr
] unit-test
{ 23 } [
"2+3*(4+3)" eval-expr
] unit-test

View File

@ -0,0 +1,30 @@
! Copyright (C) 2008 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays strings math.parser sequences
peg peg.ebnf peg.parsers memoize math ;
IN: peg.expr
: operator-fold ( lhs seq -- value )
#! Perform a fold of a lhs, followed by a sequence of pairs being
#! { operator rhs } in to a tree structure of the correct precedence.
swap [ first2 swap call ] reduce ;
<EBNF
times = ("*") [[ drop [ * ] ]]
divide = ("/") [[ drop [ / ] ]]
add = ("+") [[ drop [ + ] ]]
subtract = ("-") [[ drop [ - ] ]]
digit = ([0-9]) [[ digit> ]]
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
value = number | ("(" expr ")") [[ second ]]
product = (value ((times | divide) value)*) [[ first2 operator-fold ]]
sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]]
expr = sum
EBNF>
: eval-expr ( string -- number )
expr parse parse-result-ast ;

View File

@ -0,0 +1 @@
Simple expression evaluator using EBNF

1
extra/peg/expr/tags.txt Normal file
View File

@ -0,0 +1 @@
parsing

View File

@ -159,3 +159,21 @@ HELP: 'string'
} { $description
"Returns a parser that matches an string composed of a \", anything that is not \", and another \"."
} { $see-also 'integer' } ;
HELP: range-pattern
{ $values
{ "pattern" "a string" }
{ "parser" "a parser" }
} { $description
"Returns a parser that matches a single character based on the set "
"of characters in the pattern string."
"Any single character in the pattern matches that character. "
"If the pattern begins with a ^ then the set is negated "
"(the element matches any character not in the set). Any pair "
"of characters separated with a dash (-) represents the "
"range of characters from the first to the second, inclusive."
{ $examples
{ $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" }
{ $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" }
}
} ;

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib memoize math.parser match
unicode.categories sequences.deep peg peg.private ;
unicode.categories sequences.deep peg peg.private
peg.search math.ranges ;
IN: peg.parsers
TUPLE: just-parser p1 ;
@ -83,3 +84,30 @@ MEMO: 'string' ( -- parser )
[ CHAR: " = not ] satisfy repeat0 ,
[ CHAR: " = ] satisfy hide ,
] { } make seq [ first >string ] action ;
: (range-pattern) ( pattern -- string )
#! Given a range pattern, produce a string containing
#! all characters within that range.
[
any-char ,
[ CHAR: - = ] satisfy hide ,
any-char ,
] seq* [
first2 [a,b] >string
] action
replace ;
MEMO: range-pattern ( pattern -- parser )
#! 'pattern' is a set of characters describing the
#! parser to be produced. Any single character in
#! the pattern matches that character. If the pattern
#! begins with a ^ then the set is negated (the element
#! matches any character not in the set). Any pair of
#! characters separated with a dash (-) represents the
#! range of characters from the first to the second,
#! inclusive.
dup first CHAR: ^ = [
1 tail (range-pattern) [ member? not ] curry satisfy
] [
(range-pattern) [ member? ] curry satisfy
] if ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.pl0 ;
USING: kernel tools.test peg peg.pl0 multiline sequences ;
IN: peg.pl0.tests
{ "abc" } [
@ -11,3 +11,89 @@ IN: peg.pl0.tests
{ 55 } [
"55abc" number parse parse-result-ast
] unit-test
{ t } [
<"
VAR x, squ;
PROCEDURE square;
BEGIN
squ := x * x
END;
BEGIN
x := 1;
WHILE x <= 10 DO
BEGIN
CALL square;
x := x + 1;
END
END.
"> program parse parse-result-remaining empty?
] unit-test
{ f } [
<"
CONST
m = 7,
n = 85;
VAR
x, y, z, q, r;
PROCEDURE multiply;
VAR a, b;
BEGIN
a := x;
b := y;
z := 0;
WHILE b > 0 DO BEGIN
IF ODD b THEN z := z + a;
a := 2 * a;
b := b / 2;
END
END;
PROCEDURE divide;
VAR w;
BEGIN
r := x;
q := 0;
w := y;
WHILE w <= r DO w := 2 * w;
WHILE w > y DO BEGIN
q := 2 * q;
w := w / 2;
IF w <= r THEN BEGIN
r := r - w;
q := q + 1
END
END
END;
PROCEDURE gcd;
VAR f, g;
BEGIN
f := x;
g := y;
WHILE f # g DO BEGIN
IF f < g THEN g := g - f;
IF g < f THEN f := f - g;
END;
z := f
END;
BEGIN
x := m;
y := n;
CALL multiply;
x := 25;
y := 3;
CALL divide;
x := 84;
y := 36;
CALL gcd;
END.
"> program parse parse-result-remaining empty?
] unit-test

View File

@ -1,30 +1,26 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays strings math.parser sequences
peg peg.ebnf peg.parsers memoize ;
peg peg.ebnf peg.parsers memoize namespaces math ;
IN: peg.pl0
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
MEMO: ident ( -- parser )
CHAR: a CHAR: z range
CHAR: A CHAR: Z range 2array choice repeat1
[ >string ] action ;
MEMO: number ( -- parser )
CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ;
<EBNF
program = block '.' .
block = [ 'const' ident '=' number { ',' ident '=' number } ';' ]
[ 'var' ident { ',' ident } ';' ]
{ 'procedure' ident ';' [ block ';' ] } statement .
statement = [ ident ':=' expression | 'call' ident |
'begin' statement {';' statement } 'end' |
'if' condition 'then' statement |
'while' condition 'do' statement ] .
condition = 'odd' expression |
expression ('=' | '#' | '<=' | '<' | '>=' | '>') expression .
expression = ['+' | '-'] term {('+' | '-') term } .
term = factor {('*' | '/') factor } .
factor = ident | number | '(' expression ')'
program = block "."
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
( "VAR" ident ( "," ident )* ";" )?
( "PROCEDURE" ident ";" ( block ";" )? )* statement
statement = ( ident ":=" expression | "CALL" ident |
"BEGIN" statement (";" statement )* "END" |
"IF" condition "THEN" statement |
"WHILE" condition "DO" statement )?
condition = "ODD" expression |
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
expression = ("+" | "-")? term (("+" | "-") term )*
term = factor (("*" | "/") factor )*
factor = ident | number | "(" expression ")"
ident = (([a-zA-Z])+) [[ >string ]]
digit = ([0-9]) [[ digit> ]]
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
EBNF>