Rewrite peg internals

db4
Chris Double 2008-03-28 23:20:43 +13:00
parent 1d87e513f5
commit 4b353c7529
3 changed files with 104 additions and 283 deletions

View File

@ -12,41 +12,7 @@ HELP: parse
{ $description
"Given the input string, parse it using the given parser. The result is a <parse-result> object if "
"the parse was successful, otherwise it is f." }
{ $see-also compile with-packrat packrat-parse } ;
HELP: with-packrat
{ $values
{ "quot" "a quotation with stack effect ( input -- result )" }
{ "result" "the result of the quotation" }
}
{ $description
"Calls the quotation with a packrat cache in scope. Usually the quotation will "
"call " { $link parse } " or call a word produced by " { $link compile } "."
"The cache is used to avoid the possible exponential time performace that pegs "
"can have, instead giving linear time at the cost of increased memory usage. "
"Use of this packrat option also allows direct and indirect recursion to "
"be handled in the parser without entering an infinite loop." }
{ $see-also compile parse packrat-parse packrat-call } ;
HELP: packrat-parse
{ $values
{ "input" "a string" }
{ "parser" "a parser" }
{ "result" "a parse-result or f" }
}
{ $description
"Compiles and calls the parser with a packrat cache in scope." }
{ $see-also compile parse packrat-call with-packrat } ;
HELP: packrat-call
{ $values
{ "input" "a string" }
{ "quot" "a quotation with stack effect ( input -- result )" }
{ "result" "a parse-result or f" }
}
{ $description
"Calls the compiled parser with a packrat cache in scope." }
{ $see-also compile packrat-call packrat-parse with-packrat } ;
{ $see-also compile } ;
HELP: compile
{ $values
@ -54,11 +20,12 @@ HELP: compile
{ "word" "a word" }
}
{ $description
"Compile the parser to a word. The word will have stack effect ( input -- result )."
"Compile the parser to a word. The word will have stack effect ( -- result )."
"The mapping from parser to compiled word is kept in a cache. If you later change "
"the definition of a parser you'll need to clear this cache with "
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." }
{ $see-also compile with-packrat reset-compiled-parsers packrat-call packrat-parse } ;
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again."
}
{ $see-also parse } ;
HELP: reset-compiled-parsers
{ $description

View File

@ -168,32 +168,13 @@ IN: peg.tests
"1+1" swap parse parse-result-ast
] unit-test
{ V{ "1" "-" "1" } V{ "1" "+" "1" } } [
[
[
[ "1" token , "-" token , "1" token , ] seq* ,
[ "1" token , "+" token , "1" token , ] seq* ,
] choice*
"1-1" over parse parse-result-ast swap
] with-packrat
[
"1+1" swap parse parse-result-ast
] with-packrat
] unit-test
: expr ( -- parser )
#! Test direct left recursion. Currently left recursion should cause a
#! failure of that parser.
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
[
#! Not using packrat, so recursion causes data stack overflow
"1+1" expr parse parse-result-ast
] must-fail
{ "1" } [
#! Using packrat, so expr fails, causing the 2nd choice to be used.
"1+1" expr [ parse ] with-packrat parse-result-ast
"1+1" expr parse parse-result-ast
] unit-test
{ t } [

View File

@ -7,6 +7,8 @@ USING: kernel sequences strings namespaces math assocs shuffle
combinators.cleave locals ;
IN: peg
USE: prettyprint
TUPLE: parse-result remaining ast ;
SYMBOL: ignore
@ -15,18 +17,83 @@ SYMBOL: ignore
parse-result construct-boa ;
SYMBOL: packrat
SYMBOL: lrstack
SYMBOL: pos
SYMBOL: input
SYMBOL: fail
TUPLE: phead rule involved-set eval-set ;
C: <head> phead
TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry
: rule-parser ( rule -- parser )
#! A rule is the parser compiled down to a word. It has
#! a "peg" property containing the original parser.
"peg" word-prop ;
: input-slice ( -- slice )
#! Return a slice of the input from the current parse position
input get pos get tail-slice ;
: input-from ( input -- n )
#! Return the index from the original string that the
#! input slice is based on.
dup slice? [ slice-from ] [ drop 0 ] if ;
: heads ( input -- h )
input-from \ heads get at ;
: input-cache ( parser -- cache )
#! From the packrat cache, obtain the cache for the parser
#! that maps the position to the parser result.
id>> packrat get [ drop H{ } clone ] cache ;
: eval-rule ( rule -- ast )
#! Evaluate a rule, return an ast resulting from it.
#! Return fail if the rule failed. The rule has
#! stack effect ( input -- parse-result )
pos get swap
execute [
nip
[ ast>> ] [ remaining>> ] bi
input-from pos set
] [
pos set
fail
] if* ;
: memo ( pos rule -- memo-entry )
#! Return the result from the memo cache.
rule-parser input-cache at ;
: set-memo ( memo-entry pos rule -- )
#! Store an entry in the cache
rule-parser input-cache set-at ;
:: apply-non-memo-rule ( r p -- ast )
[let* |
ans [ r eval-rule ]
m [ ans pos get <memo-entry> ]
|
m p r set-memo ans
] ;
: apply-memo-rule ( m -- ast )
[ ans>> ] [ pos>> ] bi pos set ;
:: apply-rule ( r p -- ast )
[let* |
m [ p r memo ]
|
m [
m apply-memo-rule
] [
r p apply-non-memo-rule
] if
] ;
: with-packrat ( input quot -- result )
#! Run the quotation with a packrat cache active.
swap [
input set
0 pos set
H{ } clone packrat set
] H{ } make-assoc swap bind ;
: compiled-parsers ( -- cache )
@ -35,203 +102,21 @@ C: <head> phead
: reset-compiled-parsers ( -- )
H{ } clone \ compiled-parsers set-global ;
reset-compiled-parsers
GENERIC: (compile) ( parser -- quot )
: input-cache ( id -- cache )
#! From the packrat cache, obtain the cache for the parser quotation
#! that maps the input string position to the parser result.
packrat get [ drop H{ } clone ] cache ;
TUPLE: left-recursion seed rule head next ;
C: <left-recursion> left-recursion
USE: prettyprint
USE: io
:: handle-left-recursive-result ( result -- result )
#! If the result is from a left-recursive call,
#! note this and fail, otherwise return normal result
#! See figure 4 of packrat_TR-2007-002.pdf.
">>handle-left-recursive-result " write result .
result [
[let* | ast [ result ast>> ] |
ast left-recursion? [ t ast (>>detected?) f ] [ result ] if
]
] [
f
] if
"<<handle-left-recursive-result " write dup . ;
:: (grow-lr) ( input quot parser m h -- result )
#! 'Grow the Seed' algorithm to handle left recursion
">>(grow-lr) " write input . " for parser " write parser . " m is " write m .
[let* |
pos [ input ]
ans [ h involved-set>> clone h (>>eval-set) input quot call ]
|
[ ans not ] [ ans [ pos input-from m remaining>> input-from <= ] [ f ] if ] 2array || [
"recursion exiting with = " write ans . "m was " write m .
m
] [
"recursion with = " write ans .
pos quot parser pos ans ast>> <parse-result> h (grow-lr)
] if
]
"<<(grow-lr) " write input . " for parser " write parser . " m is " write m . " result is " write dup .
;
:: grow-lr ( input quot parser m h -- result )
h input input-from \ heads get set-at
input quot parser m h (grow-lr)
f input input-from \ heads get set-at ;
SYMBOL: not-found
: memo ( parser input -- result )
input-from swap id>> input-cache at* [ drop not-found ] unless ;
:: involved? ( parser h -- ? )
h rule>> parser = [
t
] [
parser h involved-set>> member?
] if ;
:: recall ( input quot parser -- result )
[let* |
m [ parser input memo ]
h [ input heads ]
|
#! If not growing a seed pass, just return what is stored
#! in the memo table.
h [
m not-found = parser h involved? not and [
f
] [
parser h eval-set>> member? [
parser h eval-set>> remove h (>>eval-set)
input quot call
] [
m
] if
] if
] [
m
] if
] ;
:: (setup-lr) ( parser l s -- )
s head>> l head>> = [
l head>> s (>>head)
l head>> [ s rule>> add ] change-involved-set drop
parser l s next>> (setup-lr)
] unless ;
:: setup-lr ( parser l -- )
[let* |
s [ lrstack get ]
|
l head>> [ parser V{ } clone V{ } clone <head> l (>>head) ] unless
parser l s (setup-lr)
] ;
:: lr-answer ( quot parser input m -- result )
[let* |
h [ m ast>> head>> ]
|
h rule>> parser = [
"changing memo ast to seed " write
m [ seed>> ast>> dup . ] change-ast drop
m input input-from parser id>> input-cache set-at
m ast>> not [
f
] [
input quot parser m h grow-lr
] if
] [
m ast>> seed>>
] if
] ;
:: (apply-rule) ( quot parser input -- result )
[let* |
lr [ f parser f lrstack get <left-recursion> ]
m [ lr lrstack set input lr <parse-result> ]
ans [ m input input-from parser id>> input-cache set-at input quot call ]
|
lrstack get next>> lrstack set
lr head>> [
"setting seed to ans " write ans .
ans lr (>>seed)
quot parser input m lr-answer
] [
ans
] if
] ;
:: apply-rule ( quot parser input -- result )
[let* |
m [ input quot parser recall ]
|
m not-found = [
quot parser input (apply-rule)
dup input input-from parser id>> input-cache set-at
] [
m [
m ast>> left-recursion? [
"Found left recursion..." print
parser m ast>> setup-lr m remaining>> m ast>> seed>> <parse-result>
dup input input-from parser id>> input-cache set-at
] [
m
dup input input-from parser id>> input-cache set-at
] if
] [
f f input input-from parser id>> input-cache set-at
] if
] if
] ;
:: cached-result ( input-cache input quot parser -- result )
#! Get the cached result for input position
#! from the input cache. If the item is not in the cache,
#! call 'quot' with 'input' on the stack to get the result
#! and store that in the cache and return it.
#! See figure 4 of packrat_TR-2007-002.pdf.
">>cached-result " write input . " for parser " write parser .
input input-from input-cache [
drop
[let* | lr [ f <left-recursion> ]
m [ input lr <parse-result> ]
ans [ m input input-from input-cache set-at input quot call ]
|
"--lr is " write lr . " ans is " write ans . " for parser " write parser .
ans input input-from input-cache set-at
lr detected?>> ans and [
input quot parser ans grow-lr
] [
ans
] if
]
] cache
dup [ handle-left-recursive-result ] when
"<<cached-result " write dup . " for parser " write parser . ;
:: run-packrat-parser ( input quot parser -- result )
quot parser input apply-rule ;
:: parser-body ( parser -- quot )
#! Return the body of the word that is the compiled version
#! of the parser.
[let* | parser-quot [ parser (compile) ]
[let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ]
|
[
packrat get [
parser-quot parser run-packrat-parser
rule pos get apply-rule dup fail = [
drop f
] [
parser-quot call
input-slice swap <parse-result>
] if
]
] ;
@ -253,17 +138,8 @@ SYMBOL: not-found
[ compiled-parser ] with-compilation-unit ;
: parse ( state parser -- result )
compile execute ; inline
: with-packrat ( quot -- result )
#! Run the quotation with a packrat cache active.
H{ } clone \ heads [ [ H{ } clone packrat ] dip with-variable ] with-variable ; inline
: packrat-parse ( state parser -- result )
[ parse ] with-packrat ;
: packrat-call ( state quot -- result )
with-packrat ; inline
dup word? [ compile ] unless
[ execute ] curry with-packrat ;
<PRIVATE
@ -287,6 +163,8 @@ C: <parser> parser
: reset-delegates ( -- )
H{ } clone \ delegates set-global ;
reset-delegates
: init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id.
@ -307,15 +185,15 @@ MATCH-VARS: ?token ;
] if ;
M: token-parser (compile) ( parser -- quot )
symbol>> [ parse-token ] curry ;
[ \ input-slice , symbol>> , \ parse-token , ] [ ] make ;
TUPLE: satisfy-parser quot ;
MATCH-VARS: ?quot ;
: satisfy-pattern ( -- quot )
[
dup empty? [
input-slice dup empty? [
drop f
] [
unclip-slice dup ?quot call [
@ -335,7 +213,7 @@ MATCH-VARS: ?min ?max ;
: range-pattern ( -- quot )
[
dup empty? [
input-slice dup empty? [
drop f
] [
0 over nth dup
@ -355,7 +233,7 @@ TUPLE: seq-parser parsers ;
: seq-pattern ( -- quot )
[
dup [
dup remaining>> ?quot [
?quot [
[ remaining>> swap (>>remaining) ] 2keep
ast>> dup ignore = [
drop
@ -372,7 +250,7 @@ TUPLE: seq-parser parsers ;
M: seq-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
[ input-slice V{ } clone <parse-result> ] %
parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
] [ ] make ;
@ -380,24 +258,19 @@ TUPLE: choice-parser parsers ;
: choice-pattern ( -- quot )
[
dup [
] [
drop dup ?quot
] if
[ ?quot ] unless*
] ;
M: choice-parser (compile) ( parser -- quot )
[
[
f ,
parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
\ nip ,
] [ ] make ;
TUPLE: repeat0-parser p1 ;
: (repeat0) ( quot result -- result )
2dup remaining>> swap call [
over call [
[ remaining>> swap (>>remaining) ] 2keep
ast>> swap [ ast>> push ] keep
(repeat0)
@ -412,7 +285,7 @@ TUPLE: repeat0-parser p1 ;
M: repeat0-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
[ input-slice V{ } clone <parse-result> ] %
p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
] [ ] make ;
@ -431,7 +304,7 @@ TUPLE: repeat1-parser p1 ;
M: repeat1-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
[ input-slice V{ } clone <parse-result> ] %
p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
] [ ] make ;
@ -439,7 +312,7 @@ TUPLE: optional-parser p1 ;
: optional-pattern ( -- quot )
[
dup ?quot swap f <parse-result> or
?quot [ input-slice f <parse-result> ] unless*
] ;
M: optional-parser (compile) ( parser -- quot )
@ -449,7 +322,7 @@ TUPLE: ensure-parser p1 ;
: ensure-pattern ( -- quot )
[
dup ?quot [
input-slice ?quot [
ignore <parse-result>
] [
drop f
@ -463,7 +336,7 @@ TUPLE: ensure-not-parser p1 ;
: ensure-not-pattern ( -- quot )
[
dup ?quot [
input-slice ?quot [
drop f
] [
ignore <parse-result>
@ -486,7 +359,7 @@ MATCH-VARS: ?action ;
] ;
M: action-parser (compile) ( parser -- quot )
{ [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip
[ p1>> compiled-parser ] [ quot>> ] bi
2array { ?quot ?action } action-pattern match-replace ;
: left-trim-slice ( string -- string )
@ -500,7 +373,7 @@ TUPLE: sp-parser p1 ;
M: sp-parser (compile) ( parser -- quot )
[
\ left-trim-slice , p1>> compiled-parser ,
\ input-slice , \ left-trim-slice , \ input-from , \ pos , \ set , p1>> compiled-parser ,
] [ ] make ;
TUPLE: delay-parser quot ;