Merge branch 'master' of git://double.co.nz/git/factor

db4
Slava Pestov 2008-03-30 23:57:58 -05:00
commit 75614bf28c
5 changed files with 128 additions and 22 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.ebnf words ; USING: kernel tools.test peg peg.ebnf words math math.parser ;
IN: peg.ebnf.tests IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [ { T{ ebnf-non-terminal f "abc" } } [
@ -160,6 +160,25 @@ IN: peg.ebnf.tests
"1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call parse-result-ast "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call parse-result-ast
] unit-test ] unit-test
{ 6 } [
"4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ drop x y + ]] EBNF] call parse-result-ast
] unit-test
{ 6 } [
"4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ drop x digit> y digit> + ]] EBNF] call parse-result-ast
] unit-test
{ 10 } [
{ 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ f } [
{ "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call
] unit-test
{ 3 } [
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ drop x y + ]] | num EBNF] call parse-result-ast
] unit-test
{ V{ V{ 49 } "+" V{ 49 } } } [ { V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion. #! Test direct left recursion.

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib peg.parsers unicode.categories multiline combinators.lib
splitting accessors ; splitting accessors effects sequences.deep ;
IN: peg.ebnf IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
@ -19,6 +19,8 @@ TUPLE: ebnf-repeat1 group ;
TUPLE: ebnf-optional group ; TUPLE: ebnf-optional group ;
TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf-rule symbol elements ;
TUPLE: ebnf-action parser code ; TUPLE: ebnf-action parser code ;
TUPLE: ebnf-var parser name ;
TUPLE: ebnf-semantic parser code ;
TUPLE: ebnf rules ; TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-non-terminal> ebnf-non-terminal
@ -34,6 +36,8 @@ C: <ebnf-repeat1> ebnf-repeat1
C: <ebnf-optional> ebnf-optional C: <ebnf-optional> ebnf-optional
C: <ebnf-rule> ebnf-rule C: <ebnf-rule> ebnf-rule
C: <ebnf-action> ebnf-action C: <ebnf-action> ebnf-action
C: <ebnf-var> ebnf-var
C: <ebnf-semantic> ebnf-semantic
C: <ebnf> ebnf C: <ebnf> ebnf
: syntax ( string -- parser ) : syntax ( string -- parser )
@ -79,6 +83,7 @@ C: <ebnf> ebnf
[ dup CHAR: * = ] [ dup CHAR: * = ]
[ dup CHAR: + = ] [ dup CHAR: + = ]
[ dup CHAR: ? = ] [ dup CHAR: ? = ]
[ dup CHAR: : = ]
} || not nip } || not nip
] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ; ] satisfy repeat1 [ >string <ebnf-non-terminal> ] action ;
@ -99,7 +104,7 @@ C: <ebnf> ebnf
"]" syntax , "]" syntax ,
] seq* [ first >string <ebnf-range> ] action ; ] seq* [ first >string <ebnf-range> ] action ;
: 'element' ( -- parser ) : ('element') ( -- parser )
#! An element of a rule. It can be a terminal or a #! An element of a rule. It can be a terminal or a
#! non-terminal but must not be followed by a "=". #! non-terminal but must not be followed by a "=".
#! The latter indicates that it is the beginning of a #! The latter indicates that it is the beginning of a
@ -117,6 +122,12 @@ C: <ebnf> ebnf
] choice* , ] choice* ,
] seq* [ first ] action ; ] seq* [ first ] action ;
: 'element' ( -- parser )
[
[ ('element') , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
('element') ,
] choice* ;
DEFER: 'choice' DEFER: 'choice'
: grouped ( quot suffix -- parser ) : grouped ( quot suffix -- parser )
@ -147,6 +158,7 @@ DEFER: 'choice'
: 'factor-code' ( -- parser ) : 'factor-code' ( -- parser )
[ [
"]]" token ensure-not , "]]" token ensure-not ,
"]?" token ensure-not ,
[ drop t ] satisfy , [ drop t ] satisfy ,
] seq* [ first ] action repeat0 [ >string ] action ; ] seq* [ first ] action repeat0 [ >string ] action ;
@ -184,14 +196,15 @@ DEFER: 'choice'
: 'action' ( -- parser ) : 'action' ( -- parser )
"[[" 'factor-code' "]]" syntax-pack ; "[[" 'factor-code' "]]" syntax-pack ;
: 'semantic' ( -- parser )
"?[" 'factor-code' "]?" syntax-pack ;
: 'sequence' ( -- parser ) : 'sequence' ( -- parser )
#! A sequence of terminals and non-terminals, including #! A sequence of terminals and non-terminals, including
#! groupings of those. #! groupings of those.
[ [
[ [ ('sequence') , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
('sequence') , [ ('sequence') , 'semantic' , ] seq* [ first2 <ebnf-semantic> ] action ,
'action' ,
] seq* [ first2 <ebnf-action> ] action ,
('sequence') , ('sequence') ,
] choice* repeat1 [ ] choice* repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if dup length 1 = [ first ] [ <ebnf-sequence> ] if
@ -200,6 +213,7 @@ DEFER: 'choice'
: 'actioned-sequence' ( -- parser ) : 'actioned-sequence' ( -- parser )
[ [
[ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action , [ 'sequence' , "=>" syntax , 'action' , ] seq* [ first2 <ebnf-action> ] action ,
[ 'sequence' , ":" syntax , "a-zA-Z" range-pattern repeat1 [ >string ] action , ] seq* [ first2 <ebnf-var> ] action ,
'sequence' , 'sequence' ,
] choice* ; ] choice* ;
@ -223,15 +237,17 @@ GENERIC: (transform) ( ast -- parser )
SYMBOL: parser SYMBOL: parser
SYMBOL: main SYMBOL: main
SYMBOL: vars
: transform ( ast -- object ) : transform ( ast -- object )
H{ } clone dup dup [ parser set swap (transform) main set ] bind ; H{ } clone dup dup [ parser set V{ } vars set swap (transform) main set ] bind ;
M: ebnf (transform) ( ast -- parser ) M: ebnf (transform) ( ast -- parser )
rules>> [ (transform) ] map peek ; rules>> [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser ) M: ebnf-rule (transform) ( ast -- parser )
dup elements>> (transform) [ dup elements>>
vars get clone vars [ (transform) ] with-variable [
swap symbol>> set swap symbol>> set
] keep ; ] keep ;
@ -266,9 +282,30 @@ M: ebnf-repeat1 (transform) ( ast -- parser )
M: ebnf-optional (transform) ( ast -- parser ) M: ebnf-optional (transform) ( ast -- parser )
transform-group optional ; transform-group optional ;
: build-locals ( string vars -- string )
dup empty? [
drop
] [
[
"USING: locals namespaces ; [let* | " %
[ dup % " [ \"" % % "\" get ] " % ] each
" | " %
%
" ] with-locals" %
] "" make
] if ;
M: ebnf-action (transform) ( ast -- parser ) M: ebnf-action (transform) ( ast -- parser )
[ parser>> (transform) ] keep [ parser>> (transform) ] keep
code>> string-lines [ parse-lines ] with-compilation-unit action ; code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-semantic (transform) ( ast -- parser )
[ parser>> (transform) ] keep
code>> vars get build-locals string-lines [ parse-lines ] with-compilation-unit semantic ;
M: ebnf-var (transform) ( ast -- parser )
[ parser>> (transform) ] [ name>> ] bi
dup vars get push [ dupd set ] curry action ;
M: ebnf-terminal (transform) ( ast -- parser ) M: ebnf-terminal (transform) ( ast -- parser )
symbol>> token sp ; symbol>> token sp ;
@ -296,12 +333,12 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
: ebnf>quot ( string -- hashtable quot ) : ebnf>quot ( string -- hashtable quot )
'ebnf' parse check-parse-result 'ebnf' parse check-parse-result
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry ; [ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing : [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
: EBNF: : EBNF:
CREATE-WORD dup CREATE-WORD dup
";EBNF" parse-multiline-string ";EBNF" parse-multiline-string
ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing

View File

@ -95,6 +95,19 @@ HELP: optional
"Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is " "Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is "
"'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ; "'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ;
HELP: semantic
{ $values
{ "parser" "a parser" }
{ "quot" "a quotation with stack effect ( object -- bool )" }
}
{ $description
"Returns a parser that succeeds if the 'p1' parser succeeds and the quotation called with "
"the AST produced by 'p1' on the stack returns true." }
{ $examples
{ $example "\"A\" [ drop t ] satisfy [ 66 > ] semantic parse" "f" }
{ $example "\"C\" [ drop t ] satisfy [ 66 > ] semantic parse parse-result-ast " "67" }
} ;
HELP: ensure HELP: ensure
{ $values { $values
{ "parser" "a parser" } { "parser" "a parser" }
@ -124,7 +137,7 @@ HELP: action
"Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting " "Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting "
"from that parse. The result of the quotation is then used as the final AST. This can be used " "from that parse. The result of the quotation is then used as the final AST. This can be used "
"for manipulating the parse tree to produce a AST better suited for the task at hand rather than " "for manipulating the parse tree to produce a AST better suited for the task at hand rather than "
"the default AST." } "the default AST. If the quotation returns " { $link fail } " then the parser fails." }
{ $code "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; { $code "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ;
HELP: sp HELP: sp

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words ; USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words math ;
IN: peg.tests IN: peg.tests
{ f } [ { f } [
@ -182,4 +182,13 @@ IN: peg.tests
[ f , "a" token , ] seq* [ f , "a" token , ] seq*
dup parsers>> dup parsers>>
dupd 0 swap set-nth compile word? dupd 0 swap set-nth compile word?
] unit-test ] unit-test
{ f } [
"A" [ drop t ] satisfy [ 66 >= ] semantic parse
] unit-test
{ CHAR: B } [
"B" [ drop t ] satisfy [ 66 >= ] semantic parse parse-result-ast
] unit-test

View File

@ -3,7 +3,7 @@
USING: kernel sequences strings namespaces math assocs shuffle USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib math.parser match vectors arrays combinators.lib math.parser match
unicode.categories sequences.lib compiler.units parser unicode.categories sequences.lib compiler.units parser
words quotations effects memoize accessors locals ; words quotations effects memoize accessors locals effects ;
IN: peg IN: peg
USE: prettyprint USE: prettyprint
@ -208,7 +208,7 @@ GENERIC: (compile) ( parser -- quot )
:: parser-body ( parser -- quot ) :: parser-body ( parser -- quot )
#! Return the body of the word that is the compiled version #! Return the body of the word that is the compiled version
#! of the parser. #! of the parser.
[let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ] [let* | rule [ gensym dup parser (compile) 0 1 <effect> define-declared dup parser "peg" set-word-prop ]
| |
[ [
rule pos get apply-rule dup fail = [ rule pos get apply-rule dup fail = [
@ -218,7 +218,7 @@ GENERIC: (compile) ( parser -- quot )
] if ] if
] ]
] ; ] ;
: compiled-parser ( parser -- word ) : compiled-parser ( parser -- word )
#! Look to see if the given parser has been compiled. #! Look to see if the given parser has been compiled.
#! If not, compile it to a temporary word, cache it, #! If not, compile it to a temporary word, cache it,
@ -229,7 +229,7 @@ GENERIC: (compile) ( parser -- quot )
dup compiled>> [ dup compiled>> [
nip nip
] [ ] [
gensym tuck >>compiled 2dup parser-body define dupd "peg" set-word-prop gensym tuck >>compiled 2dup parser-body 0 1 <effect> define-declared dupd "peg" set-word-prop
] if* ; ] if* ;
: compile ( parser -- word ) : compile ( parser -- word )
@ -414,6 +414,23 @@ TUPLE: optional-parser p1 ;
M: optional-parser (compile) ( parser -- quot ) M: optional-parser (compile) ( parser -- quot )
p1>> compiled-parser \ ?quot optional-pattern match-replace ; p1>> compiled-parser \ ?quot optional-pattern match-replace ;
TUPLE: semantic-parser p1 quot ;
MATCH-VARS: ?parser ;
: semantic-pattern ( -- quot )
[
?parser [
dup parse-result-ast ?quot call [ drop f ] unless
] [
f
] if*
] ;
M: semantic-parser (compile) ( parser -- quot )
[ p1>> compiled-parser ] [ quot>> ] bi
2array { ?parser ?quot } semantic-pattern match-replace ;
TUPLE: ensure-parser p1 ; TUPLE: ensure-parser p1 ;
: ensure-pattern ( -- quot ) : ensure-pattern ( -- quot )
@ -490,8 +507,11 @@ M: box-parser (compile) ( parser -- quot )
#! Calls the quotation at compile time #! Calls the quotation at compile time
#! to produce the parser to be compiled. #! to produce the parser to be compiled.
#! This differs from 'delay' which calls #! This differs from 'delay' which calls
#! it at run time. #! it at run time. Due to using the runtime
quot>> call compiled-parser 1quotation ; #! environment at compile time, this parser
#! must not be cached, so we clear out the
#! delgates cache.
f >>compiled quot>> call compiled-parser 1quotation ;
PRIVATE> PRIVATE>
@ -543,6 +563,9 @@ PRIVATE>
: optional ( parser -- parser ) : optional ( parser -- parser )
optional-parser construct-boa init-parser ; optional-parser construct-boa init-parser ;
: semantic ( parser quot -- parser )
semantic-parser construct-boa init-parser ;
: ensure ( parser -- parser ) : ensure ( parser -- parser )
ensure-parser construct-boa init-parser ; ensure-parser construct-boa init-parser ;
@ -562,7 +585,12 @@ PRIVATE>
delay-parser construct-boa init-parser ; delay-parser construct-boa init-parser ;
: box ( quot -- parser ) : box ( quot -- parser )
box-parser construct-boa init-parser ; #! because a box has its quotation run at compile time
#! it must always have a new parser delgate created,
#! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word
#! due to running at compile time.
box-parser construct-boa next-id f <parser> over set-delegate ;
: PEG: : PEG:
(:) [ (:) [