diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index c93d1af830..d2ca353ba1 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -12,41 +12,7 @@ HELP: parse { $description "Given the input string, parse it using the given parser. The result is a object if " "the parse was successful, otherwise it is f." } -{ $see-also compile with-packrat packrat-parse } ; - -HELP: with-packrat -{ $values - { "quot" "a quotation with stack effect ( input -- result )" } - { "result" "the result of the quotation" } -} -{ $description - "Calls the quotation with a packrat cache in scope. Usually the quotation will " - "call " { $link parse } " or call a word produced by " { $link compile } "." - "The cache is used to avoid the possible exponential time performace that pegs " - "can have, instead giving linear time at the cost of increased memory usage. " - "Use of this packrat option also allows direct and indirect recursion to " - "be handled in the parser without entering an infinite loop." } -{ $see-also compile parse packrat-parse packrat-call } ; - -HELP: packrat-parse -{ $values - { "input" "a string" } - { "parser" "a parser" } - { "result" "a parse-result or f" } -} -{ $description - "Compiles and calls the parser with a packrat cache in scope." } -{ $see-also compile parse packrat-call with-packrat } ; - -HELP: packrat-call -{ $values - { "input" "a string" } - { "quot" "a quotation with stack effect ( input -- result )" } - { "result" "a parse-result or f" } -} -{ $description - "Calls the compiled parser with a packrat cache in scope." } -{ $see-also compile packrat-call packrat-parse with-packrat } ; +{ $see-also compile } ; HELP: compile { $values @@ -54,11 +20,12 @@ HELP: compile { "word" "a word" } } { $description - "Compile the parser to a word. The word will have stack effect ( input -- result )." + "Compile the parser to a word. The word will have stack effect ( -- result )." "The mapping from parser to compiled word is kept in a cache. If you later change " "the definition of a parser you'll need to clear this cache with " - { $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." } -{ $see-also compile with-packrat reset-compiled-parsers packrat-call packrat-parse } ; + { $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." +} +{ $see-also parse } ; HELP: reset-compiled-parsers { $description diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 7e2701bc48..7467a4111a 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -168,32 +168,13 @@ IN: peg.tests "1+1" swap parse parse-result-ast ] unit-test -{ V{ "1" "-" "1" } V{ "1" "+" "1" } } [ - [ - [ - [ "1" token , "-" token , "1" token , ] seq* , - [ "1" token , "+" token , "1" token , ] seq* , - ] choice* - "1-1" over parse parse-result-ast swap - ] with-packrat - [ - "1+1" swap parse parse-result-ast - ] with-packrat -] unit-test - : expr ( -- parser ) #! Test direct left recursion. Currently left recursion should cause a #! failure of that parser. [ expr ] delay "+" token "1" token 3seq "1" token 2choice ; -[ - #! Not using packrat, so recursion causes data stack overflow - "1+1" expr parse parse-result-ast -] must-fail - { "1" } [ - #! Using packrat, so expr fails, causing the 2nd choice to be used. - "1+1" expr [ parse ] with-packrat parse-result-ast + "1+1" expr parse parse-result-ast ] unit-test { t } [ diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 96fe36f85f..81a9ed8ace 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -7,6 +7,8 @@ USING: kernel sequences strings namespaces math assocs shuffle combinators.cleave locals ; IN: peg +USE: prettyprint + TUPLE: parse-result remaining ast ; SYMBOL: ignore @@ -15,18 +17,83 @@ SYMBOL: ignore parse-result construct-boa ; SYMBOL: packrat -SYMBOL: lrstack +SYMBOL: pos +SYMBOL: input +SYMBOL: fail -TUPLE: phead rule involved-set eval-set ; -C: phead +TUPLE: memo-entry ans pos ; +C: memo-entry + +: rule-parser ( rule -- parser ) + #! A rule is the parser compiled down to a word. It has + #! a "peg" property containing the original parser. + "peg" word-prop ; + +: input-slice ( -- slice ) + #! Return a slice of the input from the current parse position + input get pos get tail-slice ; : input-from ( input -- n ) #! Return the index from the original string that the #! input slice is based on. dup slice? [ slice-from ] [ drop 0 ] if ; -: heads ( input -- h ) - input-from \ heads get at ; +: input-cache ( parser -- cache ) + #! From the packrat cache, obtain the cache for the parser + #! that maps the position to the parser result. + id>> packrat get [ drop H{ } clone ] cache ; + +: eval-rule ( rule -- ast ) + #! Evaluate a rule, return an ast resulting from it. + #! Return fail if the rule failed. The rule has + #! stack effect ( input -- parse-result ) + pos get swap + execute [ + nip + [ ast>> ] [ remaining>> ] bi + input-from pos set + ] [ + pos set + fail + ] if* ; + +: memo ( pos rule -- memo-entry ) + #! Return the result from the memo cache. + rule-parser input-cache at ; + +: set-memo ( memo-entry pos rule -- ) + #! Store an entry in the cache + rule-parser input-cache set-at ; + +:: apply-non-memo-rule ( r p -- ast ) + [let* | + ans [ r eval-rule ] + m [ ans pos get ] + | + m p r set-memo ans + ] ; + +: apply-memo-rule ( m -- ast ) + [ ans>> ] [ pos>> ] bi pos set ; + +:: apply-rule ( r p -- ast ) + [let* | + m [ p r memo ] + | + m [ + m apply-memo-rule + ] [ + r p apply-non-memo-rule + ] if + ] ; + +: with-packrat ( input quot -- result ) + #! Run the quotation with a packrat cache active. + swap [ + input set + 0 pos set + H{ } clone packrat set + ] H{ } make-assoc swap bind ; : compiled-parsers ( -- cache ) @@ -35,203 +102,21 @@ C: phead : reset-compiled-parsers ( -- ) H{ } clone \ compiled-parsers set-global ; +reset-compiled-parsers + GENERIC: (compile) ( parser -- quot ) -: input-cache ( id -- cache ) - #! From the packrat cache, obtain the cache for the parser quotation - #! that maps the input string position to the parser result. - packrat get [ drop H{ } clone ] cache ; - -TUPLE: left-recursion seed rule head next ; -C: left-recursion - -USE: prettyprint -USE: io - - -:: handle-left-recursive-result ( result -- result ) - #! If the result is from a left-recursive call, - #! note this and fail, otherwise return normal result - #! See figure 4 of packrat_TR-2007-002.pdf. - ">>handle-left-recursive-result " write result . - result [ - [let* | ast [ result ast>> ] | - ast left-recursion? [ t ast (>>detected?) f ] [ result ] if - ] - ] [ - f - ] if - "<>(grow-lr) " write input . " for parser " write parser . " m is " write m . - [let* | - pos [ input ] - ans [ h involved-set>> clone h (>>eval-set) input quot call ] - | - [ ans not ] [ ans [ pos input-from m remaining>> input-from <= ] [ f ] if ] 2array || [ - "recursion exiting with = " write ans . "m was " write m . - m - ] [ - "recursion with = " write ans . - pos quot parser pos ans ast>> h (grow-lr) - ] if - ] - "<<(grow-lr) " write input . " for parser " write parser . " m is " write m . " result is " write dup . - ; - -:: grow-lr ( input quot parser m h -- result ) - h input input-from \ heads get set-at - input quot parser m h (grow-lr) - f input input-from \ heads get set-at ; - -SYMBOL: not-found - -: memo ( parser input -- result ) - input-from swap id>> input-cache at* [ drop not-found ] unless ; - - -:: involved? ( parser h -- ? ) - h rule>> parser = [ - t - ] [ - parser h involved-set>> member? - ] if ; - -:: recall ( input quot parser -- result ) - [let* | - m [ parser input memo ] - h [ input heads ] - | - #! If not growing a seed pass, just return what is stored - #! in the memo table. - h [ - m not-found = parser h involved? not and [ - f - ] [ - parser h eval-set>> member? [ - parser h eval-set>> remove h (>>eval-set) - input quot call - ] [ - m - ] if - ] if - ] [ - m - ] if - ] ; - -:: (setup-lr) ( parser l s -- ) - s head>> l head>> = [ - l head>> s (>>head) - l head>> [ s rule>> add ] change-involved-set drop - parser l s next>> (setup-lr) - ] unless ; - -:: setup-lr ( parser l -- ) - [let* | - s [ lrstack get ] - | - l head>> [ parser V{ } clone V{ } clone l (>>head) ] unless - parser l s (setup-lr) - ] ; - -:: lr-answer ( quot parser input m -- result ) - [let* | - h [ m ast>> head>> ] - | - h rule>> parser = [ - "changing memo ast to seed " write - m [ seed>> ast>> dup . ] change-ast drop - m input input-from parser id>> input-cache set-at - m ast>> not [ - f - ] [ - input quot parser m h grow-lr - ] if - ] [ - m ast>> seed>> - ] if - ] ; - -:: (apply-rule) ( quot parser input -- result ) - [let* | - lr [ f parser f lrstack get ] - m [ lr lrstack set input lr ] - ans [ m input input-from parser id>> input-cache set-at input quot call ] - | - lrstack get next>> lrstack set - lr head>> [ -"setting seed to ans " write ans . - ans lr (>>seed) - quot parser input m lr-answer - ] [ - ans - ] if - ] ; - -:: apply-rule ( quot parser input -- result ) - [let* | - m [ input quot parser recall ] - | - m not-found = [ - quot parser input (apply-rule) - dup input input-from parser id>> input-cache set-at - ] [ - m [ - m ast>> left-recursion? [ - "Found left recursion..." print - parser m ast>> setup-lr m remaining>> m ast>> seed>> - dup input input-from parser id>> input-cache set-at - ] [ - m - dup input input-from parser id>> input-cache set-at - ] if - ] [ - f f input input-from parser id>> input-cache set-at - ] if - ] if - ] ; - -:: cached-result ( input-cache input quot parser -- result ) - #! Get the cached result for input position - #! from the input cache. If the item is not in the cache, - #! call 'quot' with 'input' on the stack to get the result - #! and store that in the cache and return it. - #! See figure 4 of packrat_TR-2007-002.pdf. - ">>cached-result " write input . " for parser " write parser . - input input-from input-cache [ - drop - [let* | lr [ f ] - m [ input lr ] - ans [ m input input-from input-cache set-at input quot call ] - | - "--lr is " write lr . " ans is " write ans . " for parser " write parser . - ans input input-from input-cache set-at - lr detected?>> ans and [ - input quot parser ans grow-lr - ] [ - ans - ] if - ] - ] cache - dup [ handle-left-recursive-result ] when - "< ] if ] ] ; @@ -253,17 +138,8 @@ SYMBOL: not-found [ compiled-parser ] with-compilation-unit ; : parse ( state parser -- result ) - compile execute ; inline - -: with-packrat ( quot -- result ) - #! Run the quotation with a packrat cache active. - H{ } clone \ heads [ [ H{ } clone packrat ] dip with-variable ] with-variable ; inline - -: packrat-parse ( state parser -- result ) - [ parse ] with-packrat ; - -: packrat-call ( state quot -- result ) - with-packrat ; inline + dup word? [ compile ] unless + [ execute ] curry with-packrat ; parser : reset-delegates ( -- ) H{ } clone \ delegates set-global ; +reset-delegates + : init-parser ( parser -- parser ) #! Set the delegate for the parser. Equivalent parsers #! get a delegate with the same id. @@ -307,15 +185,15 @@ MATCH-VARS: ?token ; ] if ; M: token-parser (compile) ( parser -- quot ) - symbol>> [ parse-token ] curry ; - + [ \ input-slice , symbol>> , \ parse-token , ] [ ] make ; + TUPLE: satisfy-parser quot ; MATCH-VARS: ?quot ; : satisfy-pattern ( -- quot ) [ - dup empty? [ + input-slice dup empty? [ drop f ] [ unclip-slice dup ?quot call [ @@ -335,7 +213,7 @@ MATCH-VARS: ?min ?max ; : range-pattern ( -- quot ) [ - dup empty? [ + input-slice dup empty? [ drop f ] [ 0 over nth dup @@ -355,7 +233,7 @@ TUPLE: seq-parser parsers ; : seq-pattern ( -- quot ) [ dup [ - dup remaining>> ?quot [ + ?quot [ [ remaining>> swap (>>remaining) ] 2keep ast>> dup ignore = [ drop @@ -372,7 +250,7 @@ TUPLE: seq-parser parsers ; M: seq-parser (compile) ( parser -- quot ) [ - [ V{ } clone ] % + [ input-slice V{ } clone ] % parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each ] [ ] make ; @@ -380,24 +258,19 @@ TUPLE: choice-parser parsers ; : choice-pattern ( -- quot ) [ - dup [ - - ] [ - drop dup ?quot - ] if + [ ?quot ] unless* ] ; M: choice-parser (compile) ( parser -- quot ) - [ + [ f , parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each - \ nip , ] [ ] make ; TUPLE: repeat0-parser p1 ; : (repeat0) ( quot result -- result ) - 2dup remaining>> swap call [ + over call [ [ remaining>> swap (>>remaining) ] 2keep ast>> swap [ ast>> push ] keep (repeat0) @@ -412,7 +285,7 @@ TUPLE: repeat0-parser p1 ; M: repeat0-parser (compile) ( parser -- quot ) [ - [ V{ } clone ] % + [ input-slice V{ } clone ] % p1>> compiled-parser \ ?quot repeat0-pattern match-replace % ] [ ] make ; @@ -431,7 +304,7 @@ TUPLE: repeat1-parser p1 ; M: repeat1-parser (compile) ( parser -- quot ) [ - [ V{ } clone ] % + [ input-slice V{ } clone ] % p1>> compiled-parser \ ?quot repeat1-pattern match-replace % ] [ ] make ; @@ -439,7 +312,7 @@ TUPLE: optional-parser p1 ; : optional-pattern ( -- quot ) [ - dup ?quot swap f or + ?quot [ input-slice f ] unless* ] ; M: optional-parser (compile) ( parser -- quot ) @@ -449,7 +322,7 @@ TUPLE: ensure-parser p1 ; : ensure-pattern ( -- quot ) [ - dup ?quot [ + input-slice ?quot [ ignore ] [ drop f @@ -463,7 +336,7 @@ TUPLE: ensure-not-parser p1 ; : ensure-not-pattern ( -- quot ) [ - dup ?quot [ + input-slice ?quot [ drop f ] [ ignore @@ -486,7 +359,7 @@ MATCH-VARS: ?action ; ] ; M: action-parser (compile) ( parser -- quot ) - { [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip + [ p1>> compiled-parser ] [ quot>> ] bi 2array { ?quot ?action } action-pattern match-replace ; : left-trim-slice ( string -- string ) @@ -500,7 +373,7 @@ TUPLE: sp-parser p1 ; M: sp-parser (compile) ( parser -- quot ) [ - \ left-trim-slice , p1>> compiled-parser , + \ input-slice , \ left-trim-slice , \ input-from , \ pos , \ set , p1>> compiled-parser , ] [ ] make ; TUPLE: delay-parser quot ;