Merge branch 'master' of git://double.co.nz/git/factor

db4
Slava Pestov 2008-03-30 23:57:58 -05:00
commit 75614bf28c
5 changed files with 128 additions and 22 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.ebnf words ;
USING: kernel tools.test peg peg.ebnf words math math.parser ;
IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [
@ -160,6 +160,25 @@ IN: peg.ebnf.tests
"1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call parse-result-ast
] unit-test
{ 6 } [
"4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ drop x y + ]] EBNF] call parse-result-ast
] unit-test
{ 6 } [
"4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ drop x digit> y digit> + ]] EBNF] call parse-result-ast
] unit-test
{ 10 } [
{ 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ f } [
{ "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call
] unit-test
{ 3 } [
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion.

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib
splitting accessors ;
splitting accessors effects sequences.deep ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
@ -19,6 +19,8 @@ TUPLE: ebnf-repeat1 group ;
TUPLE: ebnf-optional group ;
TUPLE: ebnf-rule symbol elements ;
TUPLE: ebnf-action parser code ;
TUPLE: ebnf-var parser name ;
TUPLE: ebnf-semantic parser code ;
TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal
@ -34,6 +36,8 @@ C: <ebnf-repeat1> ebnf-repeat1
C: <ebnf-optional> ebnf-optional
C: <ebnf-rule> ebnf-rule
C: <ebnf-action> ebnf-action
C: <ebnf-var> ebnf-var
C: <ebnf-semantic> ebnf-semantic
C: <ebnf> ebnf
: syntax ( string -- parser )
@ -79,6 +83,7 @@ C: <ebnf> ebnf
[ dup CHAR: * = ]
[ dup CHAR: + = ]
[ dup CHAR: ? = ]
[ dup CHAR: : = ]
} || not nip
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
@ -99,7 +104,7 @@ C: <ebnf> ebnf
"]" syntax ,
] seq* [ first >string <ebnf-range> ] action ;
: 'element' ( -- parser )
: ('element') ( -- parser )
#! An element of a rule. It can be a terminal or a
#! non-terminal but must not be followed by a "=".
#! The latter indicates that it is the beginning of a
@ -117,6 +122,12 @@ C: <ebnf> ebnf
] choice* ,
] seq* [ first ] action ;
: 'element' ( -- parser )
[
[ ('element') , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
('element') ,
] choice* ;
DEFER: 'choice'
: grouped ( quot suffix -- parser )
@ -147,6 +158,7 @@ DEFER: 'choice'
: 'factor-code' ( -- parser )
[
"]]" token ensure-not ,
"]?" token ensure-not ,
[ drop t ] satisfy ,
] seq* [ first ] action repeat0 [ >string ] action ;
@ -184,14 +196,15 @@ DEFER: 'choice'
: 'action' ( -- parser )
"[[" 'factor-code' "]]" syntax-pack ;
: 'semantic' ( -- parser )
"?[" 'factor-code' "]?" syntax-pack ;
: 'sequence' ( -- parser )
#! A sequence of terminals and non-terminals, including
#! groupings of those.
[
[
('sequence') ,
'action' ,
] seq* [ first2 <ebnf-action> ] action ,
[ ('sequence') , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
[ ('sequence') , 'semantic' , ] seq* [ first2 <ebnf-semantic> ] action ,
('sequence') ,
] choice* repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if
@ -200,6 +213,7 @@ DEFER: 'choice'
: 'actioned-sequence' ( -- parser )
[
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
'sequence' ,
] choice* ;
@ -223,15 +237,17 @@ GENERIC: (transform) ( ast -- parser )
SYMBOL: parser
SYMBOL: main
SYMBOL: vars
: transform ( ast -- object )
H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ;
M: ebnf (transform) ( ast -- parser )
rules>> [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser )
dup elements>> (transform) [
dup elements>>
vars get clone vars [ (transform) ] with-variable [
swap symbol>> set
] keep ;
@ -266,9 +282,30 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
M: ebnf-optional (transform) ( ast -- parser )
transform-group optional ;
: build-locals ( string vars -- string )
dup empty? [
drop
] [
[
"USING: locals namespaces ; [let* | " %
[ dup % " [ \"" % % "\" get ] " % ] each
" | " %
%
" ] with-locals" %
] "" make
] if ;
M: ebnf-action (transform) ( ast -- parser )
[ parser>> (transform) ] keep
code>> string-lines [ parse-lines ] with-compilation-unit action ;
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-semantic (transform) ( ast -- parser )
[ parser>> (transform) ] keep
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ;
M: ebnf-var (transform) ( ast -- parser )
[ parser>> (transform) ] [ name>> ] bi
dup vars get push [ dupd set ] curry action ;
M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ;
@ -296,12 +333,12 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
: ebnf>quot ( string -- hashtable quot )
'ebnf' parse check-parse-result
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry ;
[ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
: EBNF:
CREATE-WORD dup
";EBNF" parse-multiline-string
ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing

View File

@ -95,6 +95,19 @@ HELP: optional
"Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is "
"'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ;
HELP: semantic
{ $values
{ "parser" "a parser" }
{ "quot" "a quotation with stack effect ( object -- bool )" }
}
{ $description
"Returns a parser that succeeds if the 'p1' parser succeeds and the quotation called with "
"the AST produced by 'p1' on the stack returns true." }
{ $examples
{ $example "\"A\" [ drop t ] satisfy [ 66 > ] semantic parse" "f" }
{ $example "\"C\" [ drop t ] satisfy [ 66 > ] semantic parse parse-result-ast " "67" }
} ;
HELP: ensure
{ $values
{ "parser" "a parser" }
@ -124,7 +137,7 @@ HELP: action
"Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting "
"from that parse. The result of the quotation is then used as the final AST. This can be used "
"for manipulating the parse tree to produce a AST better suited for the task at hand rather than "
"the default AST." }
"the default AST. If the quotation returns " { $link fail } " then the parser fails." }
{ $code "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ;
HELP: sp

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words ;
USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words math ;
IN: peg.tests
{ f } [
@ -182,4 +182,13 @@ IN: peg.tests
[ f , "a" token , ] seq*
dup parsers>>
dupd 0 swap set-nth compile word?
] unit-test
] unit-test
{ f } [
"A" [ drop t ] satisfy [ 66 >= ] semantic parse
] unit-test
{ CHAR: B } [
"B" [ drop t ] satisfy [ 66 >= ] semantic parse parse-result-ast
] unit-test

View File

@ -3,7 +3,7 @@
USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib math.parser match
unicode.categories sequences.lib compiler.units parser
words quotations effects memoize accessors locals ;
words quotations effects memoize accessors locals effects ;
IN: peg
USE: prettyprint
@ -208,7 +208,7 @@ GENERIC: (compile) ( parser -- quot )
:: parser-body ( parser -- quot )
#! Return the body of the word that is the compiled version
#! of the parser.
[let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ]
[let* | rule [ gensym dup parser (compile) 0 1 <effect> define-declared dup parser "peg" set-word-prop ]
|
[
rule pos get apply-rule dup fail = [
@ -218,7 +218,7 @@ GENERIC: (compile) ( parser -- quot )
] if
]
] ;
: compiled-parser ( parser -- word )
#! Look to see if the given parser has been compiled.
#! If not, compile it to a temporary word, cache it,
@ -229,7 +229,7 @@ GENERIC: (compile) ( parser -- quot )
dup compiled>> [
nip
] [
gensym tuck >>compiled 2dup parser-body define dupd "peg" set-word-prop
gensym tuck >>compiled 2dup parser-body 0 1 <effect> define-declared dupd "peg" set-word-prop
] if* ;
: compile ( parser -- word )
@ -414,6 +414,23 @@ TUPLE: optional-parser p1 ;
M: optional-parser (compile) ( parser -- quot )
p1>> compiled-parser \ ?quot optional-pattern match-replace ;
TUPLE: semantic-parser p1 quot ;
MATCH-VARS: ?parser ;
: semantic-pattern ( -- quot )
[
?parser [
dup parse-result-ast ?quot call [ drop f ] unless
] [
f
] if*
] ;
M: semantic-parser (compile) ( parser -- quot )
[ p1>> compiled-parser ] [ quot>> ] bi
2array { ?parser ?quot } semantic-pattern match-replace ;
TUPLE: ensure-parser p1 ;
: ensure-pattern ( -- quot )
@ -490,8 +507,11 @@ M: box-parser (compile) ( parser -- quot )
#! Calls the quotation at compile time
#! to produce the parser to be compiled.
#! This differs from 'delay' which calls
#! it at run time.
quot>> call compiled-parser 1quotation ;
#! it at run time. Due to using the runtime
#! environment at compile time, this parser
#! must not be cached, so we clear out the
#! delgates cache.
f >>compiled quot>> call compiled-parser 1quotation ;
PRIVATE>
@ -543,6 +563,9 @@ PRIVATE>
: optional ( parser -- parser )
optional-parser construct-boa init-parser ;
: semantic ( parser quot -- parser )
semantic-parser construct-boa init-parser ;
: ensure ( parser -- parser )
ensure-parser construct-boa init-parser ;
@ -562,7 +585,12 @@ PRIVATE>
delay-parser construct-boa init-parser ;
: box ( quot -- parser )
box-parser construct-boa init-parser ;
#! because a box has its quotation run at compile time
#! it must always have a new parser delgate created,
#! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word
#! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ;
: PEG:
(:) [