Rewrite peg internals
parent
1d87e513f5
commit
4b353c7529
|
@ -12,41 +12,7 @@ HELP: parse
|
|||
{ $description
|
||||
"Given the input string, parse it using the given parser. The result is a <parse-result> object if "
|
||||
"the parse was successful, otherwise it is f." }
|
||||
{ $see-also compile with-packrat packrat-parse } ;
|
||||
|
||||
HELP: with-packrat
|
||||
{ $values
|
||||
{ "quot" "a quotation with stack effect ( input -- result )" }
|
||||
{ "result" "the result of the quotation" }
|
||||
}
|
||||
{ $description
|
||||
"Calls the quotation with a packrat cache in scope. Usually the quotation will "
|
||||
"call " { $link parse } " or call a word produced by " { $link compile } "."
|
||||
"The cache is used to avoid the possible exponential time performace that pegs "
|
||||
"can have, instead giving linear time at the cost of increased memory usage. "
|
||||
"Use of this packrat option also allows direct and indirect recursion to "
|
||||
"be handled in the parser without entering an infinite loop." }
|
||||
{ $see-also compile parse packrat-parse packrat-call } ;
|
||||
|
||||
HELP: packrat-parse
|
||||
{ $values
|
||||
{ "input" "a string" }
|
||||
{ "parser" "a parser" }
|
||||
{ "result" "a parse-result or f" }
|
||||
}
|
||||
{ $description
|
||||
"Compiles and calls the parser with a packrat cache in scope." }
|
||||
{ $see-also compile parse packrat-call with-packrat } ;
|
||||
|
||||
HELP: packrat-call
|
||||
{ $values
|
||||
{ "input" "a string" }
|
||||
{ "quot" "a quotation with stack effect ( input -- result )" }
|
||||
{ "result" "a parse-result or f" }
|
||||
}
|
||||
{ $description
|
||||
"Calls the compiled parser with a packrat cache in scope." }
|
||||
{ $see-also compile packrat-call packrat-parse with-packrat } ;
|
||||
{ $see-also compile } ;
|
||||
|
||||
HELP: compile
|
||||
{ $values
|
||||
|
@ -54,11 +20,12 @@ HELP: compile
|
|||
{ "word" "a word" }
|
||||
}
|
||||
{ $description
|
||||
"Compile the parser to a word. The word will have stack effect ( input -- result )."
|
||||
"Compile the parser to a word. The word will have stack effect ( -- result )."
|
||||
"The mapping from parser to compiled word is kept in a cache. If you later change "
|
||||
"the definition of a parser you'll need to clear this cache with "
|
||||
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." }
|
||||
{ $see-also compile with-packrat reset-compiled-parsers packrat-call packrat-parse } ;
|
||||
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again."
|
||||
}
|
||||
{ $see-also parse } ;
|
||||
|
||||
HELP: reset-compiled-parsers
|
||||
{ $description
|
||||
|
|
|
@ -168,32 +168,13 @@ IN: peg.tests
|
|||
"1+1" swap parse parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ V{ "1" "-" "1" } V{ "1" "+" "1" } } [
|
||||
[
|
||||
[
|
||||
[ "1" token , "-" token , "1" token , ] seq* ,
|
||||
[ "1" token , "+" token , "1" token , ] seq* ,
|
||||
] choice*
|
||||
"1-1" over parse parse-result-ast swap
|
||||
] with-packrat
|
||||
[
|
||||
"1+1" swap parse parse-result-ast
|
||||
] with-packrat
|
||||
] unit-test
|
||||
|
||||
: expr ( -- parser )
|
||||
#! Test direct left recursion. Currently left recursion should cause a
|
||||
#! failure of that parser.
|
||||
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
|
||||
|
||||
[
|
||||
#! Not using packrat, so recursion causes data stack overflow
|
||||
"1+1" expr parse parse-result-ast
|
||||
] must-fail
|
||||
|
||||
{ "1" } [
|
||||
#! Using packrat, so expr fails, causing the 2nd choice to be used.
|
||||
"1+1" expr [ parse ] with-packrat parse-result-ast
|
||||
"1+1" expr parse parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ t } [
|
||||
|
|
|
@ -7,6 +7,8 @@ USING: kernel sequences strings namespaces math assocs shuffle
|
|||
combinators.cleave locals ;
|
||||
IN: peg
|
||||
|
||||
USE: prettyprint
|
||||
|
||||
TUPLE: parse-result remaining ast ;
|
||||
|
||||
SYMBOL: ignore
|
||||
|
@ -15,18 +17,83 @@ SYMBOL: ignore
|
|||
parse-result construct-boa ;
|
||||
|
||||
SYMBOL: packrat
|
||||
SYMBOL: lrstack
|
||||
SYMBOL: pos
|
||||
SYMBOL: input
|
||||
SYMBOL: fail
|
||||
|
||||
TUPLE: phead rule involved-set eval-set ;
|
||||
C: <head> phead
|
||||
TUPLE: memo-entry ans pos ;
|
||||
C: <memo-entry> memo-entry
|
||||
|
||||
: rule-parser ( rule -- parser )
|
||||
#! A rule is the parser compiled down to a word. It has
|
||||
#! a "peg" property containing the original parser.
|
||||
"peg" word-prop ;
|
||||
|
||||
: input-slice ( -- slice )
|
||||
#! Return a slice of the input from the current parse position
|
||||
input get pos get tail-slice ;
|
||||
|
||||
: input-from ( input -- n )
|
||||
#! Return the index from the original string that the
|
||||
#! input slice is based on.
|
||||
dup slice? [ slice-from ] [ drop 0 ] if ;
|
||||
|
||||
: heads ( input -- h )
|
||||
input-from \ heads get at ;
|
||||
: input-cache ( parser -- cache )
|
||||
#! From the packrat cache, obtain the cache for the parser
|
||||
#! that maps the position to the parser result.
|
||||
id>> packrat get [ drop H{ } clone ] cache ;
|
||||
|
||||
: eval-rule ( rule -- ast )
|
||||
#! Evaluate a rule, return an ast resulting from it.
|
||||
#! Return fail if the rule failed. The rule has
|
||||
#! stack effect ( input -- parse-result )
|
||||
pos get swap
|
||||
execute [
|
||||
nip
|
||||
[ ast>> ] [ remaining>> ] bi
|
||||
input-from pos set
|
||||
] [
|
||||
pos set
|
||||
fail
|
||||
] if* ;
|
||||
|
||||
: memo ( pos rule -- memo-entry )
|
||||
#! Return the result from the memo cache.
|
||||
rule-parser input-cache at ;
|
||||
|
||||
: set-memo ( memo-entry pos rule -- )
|
||||
#! Store an entry in the cache
|
||||
rule-parser input-cache set-at ;
|
||||
|
||||
:: apply-non-memo-rule ( r p -- ast )
|
||||
[let* |
|
||||
ans [ r eval-rule ]
|
||||
m [ ans pos get <memo-entry> ]
|
||||
|
|
||||
m p r set-memo ans
|
||||
] ;
|
||||
|
||||
: apply-memo-rule ( m -- ast )
|
||||
[ ans>> ] [ pos>> ] bi pos set ;
|
||||
|
||||
:: apply-rule ( r p -- ast )
|
||||
[let* |
|
||||
m [ p r memo ]
|
||||
|
|
||||
m [
|
||||
m apply-memo-rule
|
||||
] [
|
||||
r p apply-non-memo-rule
|
||||
] if
|
||||
] ;
|
||||
|
||||
: with-packrat ( input quot -- result )
|
||||
#! Run the quotation with a packrat cache active.
|
||||
swap [
|
||||
input set
|
||||
0 pos set
|
||||
H{ } clone packrat set
|
||||
] H{ } make-assoc swap bind ;
|
||||
|
||||
|
||||
: compiled-parsers ( -- cache )
|
||||
|
@ -35,203 +102,21 @@ C: <head> phead
|
|||
: reset-compiled-parsers ( -- )
|
||||
H{ } clone \ compiled-parsers set-global ;
|
||||
|
||||
reset-compiled-parsers
|
||||
|
||||
GENERIC: (compile) ( parser -- quot )
|
||||
|
||||
: input-cache ( id -- cache )
|
||||
#! From the packrat cache, obtain the cache for the parser quotation
|
||||
#! that maps the input string position to the parser result.
|
||||
packrat get [ drop H{ } clone ] cache ;
|
||||
|
||||
TUPLE: left-recursion seed rule head next ;
|
||||
C: <left-recursion> left-recursion
|
||||
|
||||
USE: prettyprint
|
||||
USE: io
|
||||
|
||||
|
||||
:: handle-left-recursive-result ( result -- result )
|
||||
#! If the result is from a left-recursive call,
|
||||
#! note this and fail, otherwise return normal result
|
||||
#! See figure 4 of packrat_TR-2007-002.pdf.
|
||||
">>handle-left-recursive-result " write result .
|
||||
result [
|
||||
[let* | ast [ result ast>> ] |
|
||||
ast left-recursion? [ t ast (>>detected?) f ] [ result ] if
|
||||
]
|
||||
] [
|
||||
f
|
||||
] if
|
||||
"<<handle-left-recursive-result " write dup . ;
|
||||
|
||||
:: (grow-lr) ( input quot parser m h -- result )
|
||||
#! 'Grow the Seed' algorithm to handle left recursion
|
||||
">>(grow-lr) " write input . " for parser " write parser . " m is " write m .
|
||||
[let* |
|
||||
pos [ input ]
|
||||
ans [ h involved-set>> clone h (>>eval-set) input quot call ]
|
||||
|
|
||||
[ ans not ] [ ans [ pos input-from m remaining>> input-from <= ] [ f ] if ] 2array || [
|
||||
"recursion exiting with = " write ans . "m was " write m .
|
||||
m
|
||||
] [
|
||||
"recursion with = " write ans .
|
||||
pos quot parser pos ans ast>> <parse-result> h (grow-lr)
|
||||
] if
|
||||
]
|
||||
"<<(grow-lr) " write input . " for parser " write parser . " m is " write m . " result is " write dup .
|
||||
;
|
||||
|
||||
:: grow-lr ( input quot parser m h -- result )
|
||||
h input input-from \ heads get set-at
|
||||
input quot parser m h (grow-lr)
|
||||
f input input-from \ heads get set-at ;
|
||||
|
||||
SYMBOL: not-found
|
||||
|
||||
: memo ( parser input -- result )
|
||||
input-from swap id>> input-cache at* [ drop not-found ] unless ;
|
||||
|
||||
|
||||
:: involved? ( parser h -- ? )
|
||||
h rule>> parser = [
|
||||
t
|
||||
] [
|
||||
parser h involved-set>> member?
|
||||
] if ;
|
||||
|
||||
:: recall ( input quot parser -- result )
|
||||
[let* |
|
||||
m [ parser input memo ]
|
||||
h [ input heads ]
|
||||
|
|
||||
#! If not growing a seed pass, just return what is stored
|
||||
#! in the memo table.
|
||||
h [
|
||||
m not-found = parser h involved? not and [
|
||||
f
|
||||
] [
|
||||
parser h eval-set>> member? [
|
||||
parser h eval-set>> remove h (>>eval-set)
|
||||
input quot call
|
||||
] [
|
||||
m
|
||||
] if
|
||||
] if
|
||||
] [
|
||||
m
|
||||
] if
|
||||
] ;
|
||||
|
||||
:: (setup-lr) ( parser l s -- )
|
||||
s head>> l head>> = [
|
||||
l head>> s (>>head)
|
||||
l head>> [ s rule>> add ] change-involved-set drop
|
||||
parser l s next>> (setup-lr)
|
||||
] unless ;
|
||||
|
||||
:: setup-lr ( parser l -- )
|
||||
[let* |
|
||||
s [ lrstack get ]
|
||||
|
|
||||
l head>> [ parser V{ } clone V{ } clone <head> l (>>head) ] unless
|
||||
parser l s (setup-lr)
|
||||
] ;
|
||||
|
||||
:: lr-answer ( quot parser input m -- result )
|
||||
[let* |
|
||||
h [ m ast>> head>> ]
|
||||
|
|
||||
h rule>> parser = [
|
||||
"changing memo ast to seed " write
|
||||
m [ seed>> ast>> dup . ] change-ast drop
|
||||
m input input-from parser id>> input-cache set-at
|
||||
m ast>> not [
|
||||
f
|
||||
] [
|
||||
input quot parser m h grow-lr
|
||||
] if
|
||||
] [
|
||||
m ast>> seed>>
|
||||
] if
|
||||
] ;
|
||||
|
||||
:: (apply-rule) ( quot parser input -- result )
|
||||
[let* |
|
||||
lr [ f parser f lrstack get <left-recursion> ]
|
||||
m [ lr lrstack set input lr <parse-result> ]
|
||||
ans [ m input input-from parser id>> input-cache set-at input quot call ]
|
||||
|
|
||||
lrstack get next>> lrstack set
|
||||
lr head>> [
|
||||
"setting seed to ans " write ans .
|
||||
ans lr (>>seed)
|
||||
quot parser input m lr-answer
|
||||
] [
|
||||
ans
|
||||
] if
|
||||
] ;
|
||||
|
||||
:: apply-rule ( quot parser input -- result )
|
||||
[let* |
|
||||
m [ input quot parser recall ]
|
||||
|
|
||||
m not-found = [
|
||||
quot parser input (apply-rule)
|
||||
dup input input-from parser id>> input-cache set-at
|
||||
] [
|
||||
m [
|
||||
m ast>> left-recursion? [
|
||||
"Found left recursion..." print
|
||||
parser m ast>> setup-lr m remaining>> m ast>> seed>> <parse-result>
|
||||
dup input input-from parser id>> input-cache set-at
|
||||
] [
|
||||
m
|
||||
dup input input-from parser id>> input-cache set-at
|
||||
] if
|
||||
] [
|
||||
f f input input-from parser id>> input-cache set-at
|
||||
] if
|
||||
] if
|
||||
] ;
|
||||
|
||||
:: cached-result ( input-cache input quot parser -- result )
|
||||
#! Get the cached result for input position
|
||||
#! from the input cache. If the item is not in the cache,
|
||||
#! call 'quot' with 'input' on the stack to get the result
|
||||
#! and store that in the cache and return it.
|
||||
#! See figure 4 of packrat_TR-2007-002.pdf.
|
||||
">>cached-result " write input . " for parser " write parser .
|
||||
input input-from input-cache [
|
||||
drop
|
||||
[let* | lr [ f <left-recursion> ]
|
||||
m [ input lr <parse-result> ]
|
||||
ans [ m input input-from input-cache set-at input quot call ]
|
||||
|
|
||||
"--lr is " write lr . " ans is " write ans . " for parser " write parser .
|
||||
ans input input-from input-cache set-at
|
||||
lr detected?>> ans and [
|
||||
input quot parser ans grow-lr
|
||||
] [
|
||||
ans
|
||||
] if
|
||||
]
|
||||
] cache
|
||||
dup [ handle-left-recursive-result ] when
|
||||
"<<cached-result " write dup . " for parser " write parser . ;
|
||||
|
||||
:: run-packrat-parser ( input quot parser -- result )
|
||||
quot parser input apply-rule ;
|
||||
|
||||
:: parser-body ( parser -- quot )
|
||||
#! Return the body of the word that is the compiled version
|
||||
#! of the parser.
|
||||
[let* | parser-quot [ parser (compile) ]
|
||||
[let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ]
|
||||
|
|
||||
[
|
||||
packrat get [
|
||||
parser-quot parser run-packrat-parser
|
||||
rule pos get apply-rule dup fail = [
|
||||
drop f
|
||||
] [
|
||||
parser-quot call
|
||||
input-slice swap <parse-result>
|
||||
] if
|
||||
]
|
||||
] ;
|
||||
|
@ -253,17 +138,8 @@ SYMBOL: not-found
|
|||
[ compiled-parser ] with-compilation-unit ;
|
||||
|
||||
: parse ( state parser -- result )
|
||||
compile execute ; inline
|
||||
|
||||
: with-packrat ( quot -- result )
|
||||
#! Run the quotation with a packrat cache active.
|
||||
H{ } clone \ heads [ [ H{ } clone packrat ] dip with-variable ] with-variable ; inline
|
||||
|
||||
: packrat-parse ( state parser -- result )
|
||||
[ parse ] with-packrat ;
|
||||
|
||||
: packrat-call ( state quot -- result )
|
||||
with-packrat ; inline
|
||||
dup word? [ compile ] unless
|
||||
[ execute ] curry with-packrat ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
@ -287,6 +163,8 @@ C: <parser> parser
|
|||
: reset-delegates ( -- )
|
||||
H{ } clone \ delegates set-global ;
|
||||
|
||||
reset-delegates
|
||||
|
||||
: init-parser ( parser -- parser )
|
||||
#! Set the delegate for the parser. Equivalent parsers
|
||||
#! get a delegate with the same id.
|
||||
|
@ -307,7 +185,7 @@ MATCH-VARS: ?token ;
|
|||
] if ;
|
||||
|
||||
M: token-parser (compile) ( parser -- quot )
|
||||
symbol>> [ parse-token ] curry ;
|
||||
[ \ input-slice , symbol>> , \ parse-token , ] [ ] make ;
|
||||
|
||||
TUPLE: satisfy-parser quot ;
|
||||
|
||||
|
@ -315,7 +193,7 @@ MATCH-VARS: ?quot ;
|
|||
|
||||
: satisfy-pattern ( -- quot )
|
||||
[
|
||||
dup empty? [
|
||||
input-slice dup empty? [
|
||||
drop f
|
||||
] [
|
||||
unclip-slice dup ?quot call [
|
||||
|
@ -335,7 +213,7 @@ MATCH-VARS: ?min ?max ;
|
|||
|
||||
: range-pattern ( -- quot )
|
||||
[
|
||||
dup empty? [
|
||||
input-slice dup empty? [
|
||||
drop f
|
||||
] [
|
||||
0 over nth dup
|
||||
|
@ -355,7 +233,7 @@ TUPLE: seq-parser parsers ;
|
|||
: seq-pattern ( -- quot )
|
||||
[
|
||||
dup [
|
||||
dup remaining>> ?quot [
|
||||
?quot [
|
||||
[ remaining>> swap (>>remaining) ] 2keep
|
||||
ast>> dup ignore = [
|
||||
drop
|
||||
|
@ -372,7 +250,7 @@ TUPLE: seq-parser parsers ;
|
|||
|
||||
M: seq-parser (compile) ( parser -- quot )
|
||||
[
|
||||
[ V{ } clone <parse-result> ] %
|
||||
[ input-slice V{ } clone <parse-result> ] %
|
||||
parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
|
||||
] [ ] make ;
|
||||
|
||||
|
@ -380,24 +258,19 @@ TUPLE: choice-parser parsers ;
|
|||
|
||||
: choice-pattern ( -- quot )
|
||||
[
|
||||
dup [
|
||||
|
||||
] [
|
||||
drop dup ?quot
|
||||
] if
|
||||
[ ?quot ] unless*
|
||||
] ;
|
||||
|
||||
M: choice-parser (compile) ( parser -- quot )
|
||||
[
|
||||
f ,
|
||||
parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
|
||||
\ nip ,
|
||||
] [ ] make ;
|
||||
|
||||
TUPLE: repeat0-parser p1 ;
|
||||
|
||||
: (repeat0) ( quot result -- result )
|
||||
2dup remaining>> swap call [
|
||||
over call [
|
||||
[ remaining>> swap (>>remaining) ] 2keep
|
||||
ast>> swap [ ast>> push ] keep
|
||||
(repeat0)
|
||||
|
@ -412,7 +285,7 @@ TUPLE: repeat0-parser p1 ;
|
|||
|
||||
M: repeat0-parser (compile) ( parser -- quot )
|
||||
[
|
||||
[ V{ } clone <parse-result> ] %
|
||||
[ input-slice V{ } clone <parse-result> ] %
|
||||
p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
|
||||
] [ ] make ;
|
||||
|
||||
|
@ -431,7 +304,7 @@ TUPLE: repeat1-parser p1 ;
|
|||
|
||||
M: repeat1-parser (compile) ( parser -- quot )
|
||||
[
|
||||
[ V{ } clone <parse-result> ] %
|
||||
[ input-slice V{ } clone <parse-result> ] %
|
||||
p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
|
||||
] [ ] make ;
|
||||
|
||||
|
@ -439,7 +312,7 @@ TUPLE: optional-parser p1 ;
|
|||
|
||||
: optional-pattern ( -- quot )
|
||||
[
|
||||
dup ?quot swap f <parse-result> or
|
||||
?quot [ input-slice f <parse-result> ] unless*
|
||||
] ;
|
||||
|
||||
M: optional-parser (compile) ( parser -- quot )
|
||||
|
@ -449,7 +322,7 @@ TUPLE: ensure-parser p1 ;
|
|||
|
||||
: ensure-pattern ( -- quot )
|
||||
[
|
||||
dup ?quot [
|
||||
input-slice ?quot [
|
||||
ignore <parse-result>
|
||||
] [
|
||||
drop f
|
||||
|
@ -463,7 +336,7 @@ TUPLE: ensure-not-parser p1 ;
|
|||
|
||||
: ensure-not-pattern ( -- quot )
|
||||
[
|
||||
dup ?quot [
|
||||
input-slice ?quot [
|
||||
drop f
|
||||
] [
|
||||
ignore <parse-result>
|
||||
|
@ -486,7 +359,7 @@ MATCH-VARS: ?action ;
|
|||
] ;
|
||||
|
||||
M: action-parser (compile) ( parser -- quot )
|
||||
{ [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip
|
||||
[ p1>> compiled-parser ] [ quot>> ] bi
|
||||
2array { ?quot ?action } action-pattern match-replace ;
|
||||
|
||||
: left-trim-slice ( string -- string )
|
||||
|
@ -500,7 +373,7 @@ TUPLE: sp-parser p1 ;
|
|||
|
||||
M: sp-parser (compile) ( parser -- quot )
|
||||
[
|
||||
\ left-trim-slice , p1>> compiled-parser ,
|
||||
\ input-slice , \ left-trim-slice , \ input-from , \ pos , \ set , p1>> compiled-parser ,
|
||||
] [ ] make ;
|
||||
|
||||
TUPLE: delay-parser quot ;
|
||||
|
|
Loading…
Reference in New Issue