Merge branch 'master' of git://double.co.nz/git/factor

db4
Slava Pestov 2008-03-20 15:34:08 -05:00
commit 18a9bc3355
10 changed files with 289 additions and 261 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.ebnf compiler.units ; USING: kernel tools.test peg peg.ebnf ;
IN: peg.ebnf.tests IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [ { T{ ebnf-non-terminal f "abc" } } [
@ -109,13 +109,37 @@ IN: peg.ebnf.tests
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast "ab" [EBNF foo='a' 'b' EBNF] call parse-result-ast
] unit-test ] unit-test
{ V{ 1 "b" } } [ { V{ 1 "b" } } [
"foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call parse-result-ast
] unit-test ] unit-test
{ V{ 1 2 } } [ { V{ 1 2 } } [
"foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call parse-result-ast
] unit-test
{ CHAR: A } [
"A" [EBNF foo=[A-Z] EBNF] call parse-result-ast
] unit-test
{ CHAR: Z } [
"Z" [EBNF foo=[A-Z] EBNF] call parse-result-ast
] unit-test
{ f } [
"0" [EBNF foo=[A-Z] EBNF] call
] unit-test
{ CHAR: 0 } [
"0" [EBNF foo=[^A-Z] EBNF] call parse-result-ast
] unit-test
{ f } [
"A" [EBNF foo=[^A-Z] EBNF] call
] unit-test
{ f } [
"Z" [EBNF foo=[^A-Z] EBNF] call
] unit-test ] unit-test

View File

@ -1,6 +1,6 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel parser words arrays strings math.parser sequences USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib peg.parsers unicode.categories multiline combinators.lib
splitting ; splitting ;
@ -9,6 +9,8 @@ IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-non-terminal symbol ;
TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-terminal symbol ;
TUPLE: ebnf-any-character ; TUPLE: ebnf-any-character ;
TUPLE: ebnf-range pattern ;
TUPLE: ebnf-ensure group ;
TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-ensure-not group ;
TUPLE: ebnf-choice options ; TUPLE: ebnf-choice options ;
TUPLE: ebnf-sequence elements ; TUPLE: ebnf-sequence elements ;
@ -22,6 +24,8 @@ TUPLE: ebnf rules ;
C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-non-terminal> ebnf-non-terminal
C: <ebnf-terminal> ebnf-terminal C: <ebnf-terminal> ebnf-terminal
C: <ebnf-any-character> ebnf-any-character C: <ebnf-any-character> ebnf-any-character
C: <ebnf-range> ebnf-range
C: <ebnf-ensure> ebnf-ensure
C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-ensure-not> ebnf-ensure-not
C: <ebnf-choice> ebnf-choice C: <ebnf-choice> ebnf-choice
C: <ebnf-sequence> ebnf-sequence C: <ebnf-sequence> ebnf-sequence
@ -32,84 +36,6 @@ C: <ebnf-rule> ebnf-rule
C: <ebnf-action> ebnf-action C: <ebnf-action> ebnf-action
C: <ebnf> ebnf C: <ebnf> ebnf
SYMBOL: parsers
SYMBOL: non-terminals
: reset-parser-generation ( -- )
V{ } clone parsers set
H{ } clone non-terminals set ;
: store-parser ( parser -- number )
parsers get [ push ] keep length 1- ;
: get-parser ( index -- parser )
parsers get nth ;
: non-terminal-index ( name -- number )
dup non-terminals get at [
nip
] [
f store-parser [ swap non-terminals get set-at ] keep
] if* ;
GENERIC: (generate-parser) ( ast -- id )
: generate-parser ( ast -- id )
(generate-parser) ;
M: ebnf-terminal (generate-parser) ( ast -- id )
ebnf-terminal-symbol token sp store-parser ;
M: ebnf-non-terminal (generate-parser) ( ast -- id )
[
ebnf-non-terminal-symbol dup non-terminal-index ,
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
] [ ] make delay sp store-parser ;
M: ebnf-any-character (generate-parser) ( ast -- id )
drop [ drop t ] satisfy store-parser ;
M: ebnf-choice (generate-parser) ( ast -- id )
ebnf-choice-options [
generate-parser get-parser
] map choice store-parser ;
M: ebnf-sequence (generate-parser) ( ast -- id )
ebnf-sequence-elements [
generate-parser get-parser
] map seq store-parser ;
M: ebnf-ensure-not (generate-parser) ( ast -- id )
ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ;
M: ebnf-repeat0 (generate-parser) ( ast -- id )
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
M: ebnf-repeat1 (generate-parser) ( ast -- id )
ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ;
M: ebnf-optional (generate-parser) ( ast -- id )
ebnf-optional-elements generate-parser get-parser optional store-parser ;
M: ebnf-rule (generate-parser) ( ast -- id )
dup ebnf-rule-symbol non-terminal-index swap
ebnf-rule-elements generate-parser get-parser ! nt-id body
swap [ parsers get set-nth ] keep ;
M: ebnf-action (generate-parser) ( ast -- id )
[ ebnf-action-parser generate-parser get-parser ] keep
ebnf-action-code string-lines parse-lines action store-parser ;
M: vector (generate-parser) ( ast -- id )
[ generate-parser ] map peek ;
M: ebnf (generate-parser) ( ast -- id )
ebnf-rules [
generate-parser
] map peek ;
DEFER: 'rhs'
: syntax ( string -- parser ) : syntax ( string -- parser )
#! Parses the string, ignoring white space, and #! Parses the string, ignoring white space, and
#! does not put the result in the AST. #! does not put the result in the AST.
@ -149,6 +75,7 @@ DEFER: 'rhs'
[ dup CHAR: [ = ] [ dup CHAR: [ = ]
[ dup CHAR: . = ] [ dup CHAR: . = ]
[ dup CHAR: ! = ] [ dup CHAR: ! = ]
[ dup CHAR: & = ]
[ dup CHAR: * = ] [ dup CHAR: * = ]
[ dup CHAR: + = ] [ dup CHAR: + = ]
[ dup CHAR: ? = ] [ dup CHAR: ? = ]
@ -164,6 +91,14 @@ DEFER: 'rhs'
#! A parser to match the symbol for any character match. #! A parser to match the symbol for any character match.
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ; [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
: 'range-parser' ( -- parser )
#! Match the syntax for declaring character ranges
[
[ "[" syntax , "[" token ensure-not , ] seq* hide ,
[ CHAR: ] = not ] satisfy repeat1 ,
"]" syntax ,
] seq* [ first >string <ebnf-range> ] action ;
: 'element' ( -- parser ) : 'element' ( -- parser )
#! An element of a rule. It can be a terminal or a #! An element of a rule. It can be a terminal or a
#! non-terminal but must not be followed by a "=". #! non-terminal but must not be followed by a "=".
@ -173,6 +108,7 @@ DEFER: 'rhs'
[ [
'non-terminal' , 'non-terminal' ,
'terminal' , 'terminal' ,
'range-parser' ,
'any-character' , 'any-character' ,
] choice* , ] choice* ,
"=" syntax ensure-not , "=" syntax ensure-not ,
@ -194,7 +130,6 @@ DEFER: 'choice'
"*" token sp ensure-not , "*" token sp ensure-not ,
"+" token sp ensure-not , "+" token sp ensure-not ,
"?" token sp ensure-not , "?" token sp ensure-not ,
"[[" token sp ensure-not ,
] seq* hide grouped ; ] seq* hide grouped ;
: 'repeat0' ( -- parser ) : 'repeat0' ( -- parser )
@ -212,13 +147,6 @@ DEFER: 'choice'
[ drop t ] satisfy , [ drop t ] satisfy ,
] seq* [ first ] action repeat0 [ >string ] action ; ] seq* [ first ] action repeat0 [ >string ] action ;
: 'action' ( -- parser )
[
"(" [ 'choice' sp ] delay ")" syntax-pack ,
"[[" 'factor-code' "]]" syntax-pack ,
] seq* [ first2 <ebnf-action> ] action ;
: 'ensure-not' ( -- parser ) : 'ensure-not' ( -- parser )
#! Parses the '!' syntax to ensure that #! Parses the '!' syntax to ensure that
#! something that matches the following elements do #! something that matches the following elements do
@ -228,17 +156,37 @@ DEFER: 'choice'
'group' sp , 'group' sp ,
] seq* [ first <ebnf-ensure-not> ] action ; ] seq* [ first <ebnf-ensure-not> ] action ;
: 'sequence' ( -- parser ) : 'ensure' ( -- parser )
#! Parses the '&' syntax to ensure that
#! something that matches the following elements does
#! exist in the parse stream.
[
"&" syntax ,
'group' sp ,
] seq* [ first <ebnf-ensure> ] action ;
: ('sequence') ( -- parser )
#! A sequence of terminals and non-terminals, including #! A sequence of terminals and non-terminals, including
#! groupings of those. #! groupings of those.
[ [
'ensure-not' sp , 'ensure-not' sp ,
'ensure' sp ,
'element' sp , 'element' sp ,
'group' sp , 'group' sp ,
'repeat0' sp , 'repeat0' sp ,
'repeat1' sp , 'repeat1' sp ,
'optional' sp , 'optional' sp ,
'action' sp , ] choice* ;
: 'sequence' ( -- parser )
#! A sequence of terminals and non-terminals, including
#! groupings of those.
[
[
('sequence') ,
"[[" 'factor-code' "]]" syntax-pack ,
] seq* [ first2 <ebnf-action> ] action ,
('sequence') ,
] choice* repeat1 [ ] choice* repeat1 [
dup length 1 = [ first ] [ <ebnf-sequence> ] if dup length 1 = [ first ] [ <ebnf-sequence> ] if
] action ; ] action ;
@ -258,25 +206,84 @@ DEFER: 'choice'
: 'ebnf' ( -- parser ) : 'ebnf' ( -- parser )
'rule' sp repeat1 [ <ebnf> ] action ; 'rule' sp repeat1 [ <ebnf> ] action ;
: ebnf>quot ( string -- quot ) GENERIC: (transform) ( ast -- parser )
'ebnf' parse [
parse-result-ast [ SYMBOL: parser
reset-parser-generation SYMBOL: main
generate-parser drop
[ : transform ( ast -- object )
non-terminals get H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
[
get-parser [ M: ebnf (transform) ( ast -- parser )
swap , \ in , \ get , \ create , ebnf-rules [ (transform) ] map peek ;
1quotation , \ define ,
] [ M: ebnf-rule (transform) ( ast -- parser )
drop dup ebnf-rule-elements (transform) [
] if* swap ebnf-rule-symbol set
] assoc-each ] keep ;
] [ ] make
] with-scope M: ebnf-sequence (transform) ( ast -- parser )
] [ ebnf-sequence-elements [ (transform) ] map seq ;
f
] if* ; M: ebnf-choice (transform) ( ast -- parser )
ebnf-choice-options [ (transform) ] map choice ;
M: ebnf-any-character (transform) ( ast -- parser )
drop any-char ;
M: ebnf-range (transform) ( ast -- parser )
ebnf-range-pattern range-pattern ;
M: ebnf-ensure (transform) ( ast -- parser )
ebnf-ensure-group (transform) ensure ;
M: ebnf-ensure-not (transform) ( ast -- parser )
ebnf-ensure-not-group (transform) ensure-not ;
M: ebnf-repeat0 (transform) ( ast -- parser )
ebnf-repeat0-group (transform) repeat0 ;
M: ebnf-repeat1 (transform) ( ast -- parser )
ebnf-repeat1-group (transform) repeat1 ;
M: ebnf-optional (transform) ( ast -- parser )
ebnf-optional-elements (transform) optional ;
M: ebnf-action (transform) ( ast -- parser )
[ ebnf-action-parser (transform) ] keep
ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-terminal (transform) ( ast -- parser )
ebnf-terminal-symbol token sp ;
M: ebnf-non-terminal (transform) ( ast -- parser )
ebnf-non-terminal-symbol [
, parser get , \ at ,
] [ ] make delay sp ;
: transform-ebnf ( string -- object )
'ebnf' parse parse-result-ast transform ;
: check-parse-result ( result -- result )
dup [
dup parse-result-remaining empty? [
[
"Unable to fully parse EBNF. Left to parse was: " %
parse-result-remaining %
] "" make throw
] unless
] [
"Could not parse EBNF" throw
] if ;
: ebnf>quot ( string -- hashtable quot )
'ebnf' parse check-parse-result
parse-result-ast transform dup main swap at compile ;
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
: EBNF:
CREATE-WORD dup
";EBNF" parse-multiline-string
ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing
: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing

View File

@ -0,0 +1,25 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.expr multiline sequences ;
IN: peg.expr.tests
{ 5 } [
"2+3" eval-expr
] unit-test
{ 6 } [
"2*3" eval-expr
] unit-test
{ 14 } [
"2+3*4" eval-expr
] unit-test
{ 17 } [
"2+3*4+3" eval-expr
] unit-test
{ 23 } [
"2+3*(4+3)" eval-expr
] unit-test

View File

@ -9,22 +9,21 @@ IN: peg.expr
#! { operator rhs } in to a tree structure of the correct precedence. #! { operator rhs } in to a tree structure of the correct precedence.
swap [ first2 swap call ] reduce ; swap [ first2 swap call ] reduce ;
<EBNF EBNF: expr
times = "*" [[ drop [ * ] ]]
divide = "/" [[ drop [ / ] ]]
add = "+" [[ drop [ + ] ]]
subtract = "-" [[ drop [ - ] ]]
times = ("*") [[ drop [ * ] ]] digit = [0-9] [[ digit> ]]
divide = ("/") [[ drop [ / ] ]] number = (digit)+ [[ unclip [ swap 10 * + ] reduce ]]
add = ("+") [[ drop [ + ] ]]
subtract = ("-") [[ drop [ - ] ]]
digit = "0" | "1" | "2" | "3" | "4" |
"5" | "6" | "7" | "8" | "9"
number = ((digit)+) [[ concat string>number ]]
value = number | ("(" expr ")") [[ second ]] value = number | ("(" expr ")") [[ second ]]
product = (value ((times | divide) value)*) [[ first2 operator-fold ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]]
sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]]
expr = sum expr = sum
EBNF> ;EBNF
: eval-expr ( string -- number ) : eval-expr ( string -- number )
expr parse parse-result-ast ; expr parse-result-ast ;

View File

@ -159,3 +159,21 @@ HELP: 'string'
} { $description } { $description
"Returns a parser that matches an string composed of a \", anything that is not \", and another \"." "Returns a parser that matches an string composed of a \", anything that is not \", and another \"."
} { $see-also 'integer' } ; } { $see-also 'integer' } ;
HELP: range-pattern
{ $values
{ "pattern" "a string" }
{ "parser" "a parser" }
} { $description
"Returns a parser that matches a single character based on the set "
"of characters in the pattern string."
"Any single character in the pattern matches that character. "
"If the pattern begins with a ^ then the set is negated "
"(the element matches any character not in the set). Any pair "
"of characters separated with a dash (-) represents the "
"range of characters from the first to the second, inclusive."
{ $examples
{ $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" }
{ $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" }
}
} ;

View File

@ -1,8 +1,9 @@
! Copyright (C) 2007, 2008 Chris Double, Doug Coleman. ! Copyright (C) 2007, 2008 Chris Double, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs shuffle USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib memoize math.parser match vectors arrays combinators.lib math.parser match
unicode.categories sequences.deep peg peg.private ; unicode.categories sequences.deep peg peg.private
peg.search math.ranges ;
IN: peg.parsers IN: peg.parsers
TUPLE: just-parser p1 ; TUPLE: just-parser p1 ;
@ -18,26 +19,26 @@ TUPLE: just-parser p1 ;
M: just-parser compile ( parser -- quot ) M: just-parser compile ( parser -- quot )
just-parser-p1 compile just-pattern append ; just-parser-p1 compile just-pattern append ;
MEMO: just ( parser -- parser ) : just ( parser -- parser )
just-parser construct-boa init-parser ; just-parser construct-boa ;
MEMO: 1token ( ch -- parser ) 1string token ; : 1token ( ch -- parser ) 1string token ;
<PRIVATE <PRIVATE
MEMO: (list-of) ( items separator repeat1? -- parser ) : (list-of) ( items separator repeat1? -- parser )
>r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq >r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq
[ unclip 1vector swap first append ] action ; [ unclip 1vector swap first append ] action ;
PRIVATE> PRIVATE>
MEMO: list-of ( items separator -- parser ) : list-of ( items separator -- parser )
hide f (list-of) ; hide f (list-of) ;
MEMO: list-of-many ( items separator -- parser ) : list-of-many ( items separator -- parser )
hide t (list-of) ; hide t (list-of) ;
MEMO: epsilon ( -- parser ) V{ } token ; : epsilon ( -- parser ) V{ } token ;
MEMO: any-char ( -- parser ) [ drop t ] satisfy ; : any-char ( -- parser ) [ drop t ] satisfy ;
<PRIVATE <PRIVATE
@ -46,10 +47,10 @@ MEMO: any-char ( -- parser ) [ drop t ] satisfy ;
PRIVATE> PRIVATE>
MEMO: exactly-n ( parser n -- parser' ) : exactly-n ( parser n -- parser' )
swap <repetition> seq ; swap <repetition> seq ;
MEMO: at-most-n ( parser n -- parser' ) : at-most-n ( parser n -- parser' )
dup zero? [ dup zero? [
2drop epsilon 2drop epsilon
] [ ] [
@ -57,29 +58,56 @@ MEMO: at-most-n ( parser n -- parser' )
-rot 1- at-most-n 2choice -rot 1- at-most-n 2choice
] if ; ] if ;
MEMO: at-least-n ( parser n -- parser' ) : at-least-n ( parser n -- parser' )
dupd exactly-n swap repeat0 2seq dupd exactly-n swap repeat0 2seq
[ flatten-vectors ] action ; [ flatten-vectors ] action ;
MEMO: from-m-to-n ( parser m n -- parser' ) : from-m-to-n ( parser m n -- parser' )
>r [ exactly-n ] 2keep r> swap - at-most-n 2seq >r [ exactly-n ] 2keep r> swap - at-most-n 2seq
[ flatten-vectors ] action ; [ flatten-vectors ] action ;
MEMO: pack ( begin body end -- parser ) : pack ( begin body end -- parser )
>r >r hide r> r> hide 3seq [ first ] action ; >r >r hide r> r> hide 3seq [ first ] action ;
MEMO: surrounded-by ( parser begin end -- parser' ) : surrounded-by ( parser begin end -- parser' )
[ token ] 2apply swapd pack ; [ token ] 2apply swapd pack ;
MEMO: 'digit' ( -- parser ) : 'digit' ( -- parser )
[ digit? ] satisfy [ digit> ] action ; [ digit? ] satisfy [ digit> ] action ;
MEMO: 'integer' ( -- parser ) : 'integer' ( -- parser )
'digit' repeat1 [ 10 digits>integer ] action ; 'digit' repeat1 [ 10 digits>integer ] action ;
MEMO: 'string' ( -- parser ) : 'string' ( -- parser )
[ [
[ CHAR: " = ] satisfy hide , [ CHAR: " = ] satisfy hide ,
[ CHAR: " = not ] satisfy repeat0 , [ CHAR: " = not ] satisfy repeat0 ,
[ CHAR: " = ] satisfy hide , [ CHAR: " = ] satisfy hide ,
] { } make seq [ first >string ] action ; ] { } make seq [ first >string ] action ;
: (range-pattern) ( pattern -- string )
#! Given a range pattern, produce a string containing
#! all characters within that range.
[
any-char ,
[ CHAR: - = ] satisfy hide ,
any-char ,
] seq* [
first2 [a,b] >string
] action
replace ;
: range-pattern ( pattern -- parser )
#! 'pattern' is a set of characters describing the
#! parser to be produced. Any single character in
#! the pattern matches that character. If the pattern
#! begins with a ^ then the set is negated (the element
#! matches any character not in the set). Any pair of
#! characters separated with a dash (-) represents the
#! range of characters from the first to the second,
#! inclusive.
dup first CHAR: ^ = [
1 tail (range-pattern) [ member? not ] curry satisfy
] [
(range-pattern) [ member? ] curry satisfy
] if ;

View File

@ -4,10 +4,6 @@
USING: kernel tools.test strings namespaces arrays sequences peg peg.private ; USING: kernel tools.test strings namespaces arrays sequences peg peg.private ;
IN: peg.tests IN: peg.tests
{ 0 1 2 } [
0 next-id set-global get-next-id get-next-id get-next-id
] unit-test
{ f } [ { f } [
"endbegin" "begin" token parse "endbegin" "begin" token parse
] unit-test ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs shuffle USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib memoize math.parser match vectors arrays combinators.lib math.parser match
unicode.categories sequences.lib compiler.units parser unicode.categories sequences.lib compiler.units parser
words ; words ;
IN: peg IN: peg
@ -10,70 +10,14 @@ TUPLE: parse-result remaining ast ;
GENERIC: compile ( parser -- quot ) GENERIC: compile ( parser -- quot )
: (parse) ( state parser -- result ) : parse ( state parser -- result )
compile call ; compile call ;
<PRIVATE
SYMBOL: packrat-cache
SYMBOL: ignore SYMBOL: ignore
SYMBOL: not-in-cache
: not-in-cache? ( result -- ? )
not-in-cache = ;
: <parse-result> ( remaining ast -- parse-result ) : <parse-result> ( remaining ast -- parse-result )
parse-result construct-boa ; parse-result construct-boa ;
SYMBOL: next-id
: get-next-id ( -- number )
next-id get-global 0 or dup 1+ next-id set-global ;
TUPLE: parser id ;
: init-parser ( parser -- parser )
get-next-id parser construct-boa over set-delegate ;
: from ( slice-or-string -- index )
dup slice? [ slice-from ] [ drop 0 ] if ;
: get-cached ( input parser -- result )
[ from ] dip parser-id packrat-cache get at at* [
drop not-in-cache
] unless ;
: put-cached ( result input parser -- )
parser-id dup packrat-cache get at [
nip
] [
H{ } clone dup >r swap packrat-cache get set-at r>
] if*
[ from ] dip set-at ;
PRIVATE>
: parse ( input parser -- result )
packrat-cache get [
2dup get-cached dup not-in-cache? [
! "cache missed: " write over parser-id number>string write " - " write nl ! pick .
drop
#! Protect against left recursion blowing the callstack
#! by storing a failed parse in the cache.
[ f ] dipd [ put-cached ] 2keep
[ (parse) dup ] 2keep put-cached
] [
! "cache hit: " write over parser-id number>string write " - " write nl ! pick .
2nip
] if
] [
(parse)
] if ;
: packrat-parse ( input parser -- result )
H{ } clone packrat-cache [ parse ] with-variable ;
<PRIVATE <PRIVATE
TUPLE: token-parser symbol ; TUPLE: token-parser symbol ;
@ -295,17 +239,17 @@ M: delay-parser compile ( parser -- quot )
PRIVATE> PRIVATE>
MEMO: token ( string -- parser ) : token ( string -- parser )
token-parser construct-boa init-parser ; token-parser construct-boa ;
: satisfy ( quot -- parser ) : satisfy ( quot -- parser )
satisfy-parser construct-boa init-parser ; satisfy-parser construct-boa ;
MEMO: range ( min max -- parser ) : range ( min max -- parser )
range-parser construct-boa init-parser ; range-parser construct-boa ;
: seq ( seq -- parser ) : seq ( seq -- parser )
seq-parser construct-boa init-parser ; seq-parser construct-boa ;
: 2seq ( parser1 parser2 -- parser ) : 2seq ( parser1 parser2 -- parser )
2array seq ; 2array seq ;
@ -320,7 +264,7 @@ MEMO: range ( min max -- parser )
{ } make seq ; inline { } make seq ; inline
: choice ( seq -- parser ) : choice ( seq -- parser )
choice-parser construct-boa init-parser ; choice-parser construct-boa ;
: 2choice ( parser1 parser2 -- parser ) : 2choice ( parser1 parser2 -- parser )
2array choice ; 2array choice ;
@ -334,32 +278,32 @@ MEMO: range ( min max -- parser )
: choice* ( quot -- paser ) : choice* ( quot -- paser )
{ } make choice ; inline { } make choice ; inline
MEMO: repeat0 ( parser -- parser ) : repeat0 ( parser -- parser )
repeat0-parser construct-boa init-parser ; repeat0-parser construct-boa ;
MEMO: repeat1 ( parser -- parser ) : repeat1 ( parser -- parser )
repeat1-parser construct-boa init-parser ; repeat1-parser construct-boa ;
MEMO: optional ( parser -- parser ) : optional ( parser -- parser )
optional-parser construct-boa init-parser ; optional-parser construct-boa ;
MEMO: ensure ( parser -- parser ) : ensure ( parser -- parser )
ensure-parser construct-boa init-parser ; ensure-parser construct-boa ;
MEMO: ensure-not ( parser -- parser ) : ensure-not ( parser -- parser )
ensure-not-parser construct-boa init-parser ; ensure-not-parser construct-boa ;
: action ( parser quot -- parser ) : action ( parser quot -- parser )
action-parser construct-boa init-parser ; action-parser construct-boa ;
MEMO: sp ( parser -- parser ) : sp ( parser -- parser )
sp-parser construct-boa init-parser ; sp-parser construct-boa ;
MEMO: hide ( parser -- parser ) : hide ( parser -- parser )
[ drop ignore ] action ; [ drop ignore ] action ;
MEMO: delay ( quot -- parser ) : delay ( quot -- parser )
delay-parser construct-boa init-parser ; delay-parser construct-boa ;
: PEG: : PEG:
(:) [ (:) [

View File

@ -4,14 +4,6 @@
USING: kernel tools.test peg peg.pl0 multiline sequences ; USING: kernel tools.test peg peg.pl0 multiline sequences ;
IN: peg.pl0.tests IN: peg.pl0.tests
{ "abc" } [
"abc" ident parse parse-result-ast
] unit-test
{ 55 } [
"55abc" number parse parse-result-ast
] unit-test
{ t } [ { t } [
<" <"
VAR x, squ; VAR x, squ;
@ -29,7 +21,7 @@ BEGIN
x := x + 1; x := x + 1;
END END
END. END.
"> program parse parse-result-remaining empty? "> pl0 parse-result-remaining empty?
] unit-test ] unit-test
{ f } [ { f } [
@ -95,5 +87,5 @@ BEGIN
y := 36; y := 36;
CALL gcd; CALL gcd;
END. END.
"> program parse parse-result-remaining empty? "> pl0 parse-result-remaining empty?
] unit-test ] unit-test

View File

@ -1,31 +1,26 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays strings math.parser sequences USING: kernel arrays strings math.parser sequences
peg peg.ebnf peg.parsers memoize namespaces ; peg peg.ebnf peg.parsers memoize namespaces math ;
IN: peg.pl0 IN: peg.pl0
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
MEMO: ident ( -- parser )
[
CHAR: a CHAR: z range ,
CHAR: A CHAR: Z range ,
] choice* repeat1 [ >string ] action ;
MEMO: number ( -- parser ) EBNF: pl0
CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
( "VAR" ident ( "," ident )* ";" )?
<EBNF ( "PROCEDURE" ident ";" ( block ";" )? )* statement
program = block "." statement = ( ident ":=" expression | "CALL" ident |
block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] "BEGIN" statement (";" statement )* "END" |
[ "VAR" ident { "," ident } ";" ]
{ "PROCEDURE" ident ";" [ block ";" ] } statement
statement = [ ident ":=" expression | "CALL" ident |
"BEGIN" statement {";" statement } "END" |
"IF" condition "THEN" statement | "IF" condition "THEN" statement |
"WHILE" condition "DO" statement ] "WHILE" condition "DO" statement )?
condition = "ODD" expression | condition = "ODD" expression |
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
expression = ["+" | "-"] term {("+" | "-") term } expression = ("+" | "-")? term (("+" | "-") term )*
term = factor {("*" | "/") factor } term = factor (("*" | "/") factor )*
factor = ident | number | "(" expression ")" factor = ident | number | "(" expression ")"
EBNF> ident = (([a-zA-Z])+) [[ >string ]]
digit = ([0-9]) [[ digit> ]]
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
program = block "."
;EBNF