From 708726d20838833899ce7ddfb9aae19efa10bc1a Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 27 Mar 2008 15:50:27 +1300 Subject: [PATCH] Add with-packrat word and more memoization --- extra/peg/parsers/parsers.factor | 17 ++++----- extra/peg/peg-docs.factor | 31 ++++++++++++++++- extra/peg/peg.factor | 60 ++++++++++++++++++++------------ 3 files changed, 77 insertions(+), 31 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 13509e81f7..fa6801dc1c 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -3,10 +3,11 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.deep peg peg.private - peg.search math.ranges words ; + peg.search math.ranges words memoize ; IN: peg.parsers TUPLE: just-parser p1 ; +M: just-parser equal? 2drop f ; : just-pattern [ @@ -19,7 +20,7 @@ TUPLE: just-parser p1 ; M: just-parser (compile) ( parser -- quot ) just-parser-p1 compiled-parser just-pattern curry ; -: just ( parser -- parser ) +MEMO: just ( parser -- parser ) just-parser construct-boa ; : 1token ( ch -- parser ) 1string token ; @@ -47,10 +48,10 @@ PRIVATE> PRIVATE> -: exactly-n ( parser n -- parser' ) +MEMO: exactly-n ( parser n -- parser' ) swap seq ; -: at-most-n ( parser n -- parser' ) +MEMO: at-most-n ( parser n -- parser' ) dup zero? [ 2drop epsilon ] [ @@ -58,15 +59,15 @@ PRIVATE> -rot 1- at-most-n 2choice ] if ; -: at-least-n ( parser n -- parser' ) +MEMO: at-least-n ( parser n -- parser' ) dupd exactly-n swap repeat0 2seq [ flatten-vectors ] action ; -: from-m-to-n ( parser m n -- parser' ) +MEMO: from-m-to-n ( parser m n -- parser' ) >r [ exactly-n ] 2keep r> swap - at-most-n 2seq [ flatten-vectors ] action ; -: pack ( begin body end -- parser ) +MEMO: pack ( begin body end -- parser ) >r >r hide r> r> hide 3seq [ first ] action ; : surrounded-by ( parser begin end -- parser' ) @@ -83,7 +84,7 @@ PRIVATE> [ CHAR: " = ] satisfy hide , [ CHAR: " = not ] satisfy repeat0 , [ CHAR: " = ] satisfy hide , - ] { } make seq [ first >string ] action ; + ] seq* [ first >string ] action ; : (range-pattern) ( pattern -- string ) #! Given a range pattern, produce a string containing diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index 9ad375ea04..30e7f0e72f 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -11,7 +11,36 @@ HELP: parse } { $description "Given the input string, parse it using the given parser. The result is a object if " - "the parse was successful, otherwise it is f." } ; + "the parse was successful, otherwise it is f." } +{ $see-also compile with-packrat } ; + +HELP: with-packrat +{ $values + { "quot" "a quotation with stack effect ( input -- result )" } + { "result" "the result of the quotation" } +} +{ $description + "Calls the quotation with a packrat cache in scope. Usually the quotation will " + "call " { $link parse } " or call a word produced by " { $link compile } "." + "The cache is used to avoid the possible exponential time performace that pegs " + "can have, instead giving linear time at the cost of increased memory usage." } +{ $see-also compile parse } ; + +HELP: compile +{ $values + { "parser" "a parser" } + { "word" "a word" } +} +{ $description + "Compile the parser to a word. The word will have stack effect ( input -- result )." + "The mapping from parser to compiled word is kept in a cache. If you later change " + "the definition of a parser you'll need to clear this cache with " + { $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." } +{ $see-also compile with-packrat reset-compiled-parsers } ; + +HELP: reset-compiled-parsers +{ $description + "Reset the cache mapping parsers to compiled words." } ; HELP: token { $values diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index c994c5aa29..10c9ce907d 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -50,10 +50,14 @@ GENERIC: (compile) ( parser -- quot ) : parse ( state parser -- result ) compile execute ; +: with-packrat ( quot -- result ) + #! Run the quotation with a packrat cache active. + [ H{ } clone packrat ] dip with-variable ; + > [ parse-token ] curry ; TUPLE: satisfy-parser quot ; +M: satisfy-parser equal? 2drop f ; MATCH-VARS: ?quot ; @@ -89,6 +94,7 @@ M: satisfy-parser (compile) ( parser -- quot ) quot>> \ ?quot satisfy-pattern match-replace ; TUPLE: range-parser min max ; +M: range-parser equal? 2drop f ; MATCH-VARS: ?min ?max ; @@ -110,6 +116,7 @@ M: range-parser (compile) ( parser -- quot ) T{ range-parser _ ?min ?max } range-pattern match-replace ; TUPLE: seq-parser parsers ; +M: seq-parser equal? 2drop f ; : seq-pattern ( -- quot ) [ @@ -136,6 +143,7 @@ M: seq-parser (compile) ( parser -- quot ) ] [ ] make ; TUPLE: choice-parser parsers ; +M: choice-parser equal? 2drop f ; : choice-pattern ( -- quot ) [ @@ -154,6 +162,7 @@ M: choice-parser (compile) ( parser -- quot ) ] [ ] make ; TUPLE: repeat0-parser p1 ; +M: repeat0-parser equal? 2drop f ; : (repeat0) ( quot result -- result ) 2dup remaining>> swap call [ @@ -176,6 +185,7 @@ M: repeat0-parser (compile) ( parser -- quot ) ] [ ] make ; TUPLE: repeat1-parser p1 ; +M: repeat1-parser equal? 2drop f ; : repeat1-pattern ( -- quot ) [ @@ -195,6 +205,7 @@ M: repeat1-parser (compile) ( parser -- quot ) ] [ ] make ; TUPLE: optional-parser p1 ; +M: optional-parser equal? 2drop f ; : optional-pattern ( -- quot ) [ @@ -205,6 +216,7 @@ M: optional-parser (compile) ( parser -- quot ) p1>> compiled-parser \ ?quot optional-pattern match-replace ; TUPLE: ensure-parser p1 ; +M: ensure-parser equal? 2drop f ; : ensure-pattern ( -- quot ) [ @@ -219,6 +231,7 @@ M: ensure-parser (compile) ( parser -- quot ) p1>> compiled-parser \ ?quot ensure-pattern match-replace ; TUPLE: ensure-not-parser p1 ; +M: ensure-not-parser equal? 2drop f ; : ensure-not-pattern ( -- quot ) [ @@ -233,6 +246,7 @@ M: ensure-not-parser (compile) ( parser -- quot ) p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ; TUPLE: action-parser p1 quot ; +M: action-parser equal? 2drop f ; MATCH-VARS: ?action ; @@ -256,6 +270,7 @@ M: action-parser (compile) ( parser -- quot ) ] unless ; TUPLE: sp-parser p1 ; +M: sp-parser equal? 2drop f ; M: sp-parser (compile) ( parser -- quot ) [ @@ -263,6 +278,7 @@ M: sp-parser (compile) ( parser -- quot ) ] [ ] make ; TUPLE: delay-parser quot ; +M: delay-parser equal? 2drop f ; M: delay-parser (compile) ( parser -- quot ) #! For efficiency we memoize the quotation. @@ -276,70 +292,70 @@ M: delay-parser (compile) ( parser -- quot ) PRIVATE> -: token ( string -- parser ) +MEMO: token ( string -- parser ) token-parser construct-boa ; -: satisfy ( quot -- parser ) +MEMO: satisfy ( quot -- parser ) satisfy-parser construct-boa ; -: range ( min max -- parser ) +MEMO: range ( min max -- parser ) range-parser construct-boa ; -: seq ( seq -- parser ) +MEMO: seq ( seq -- parser ) seq-parser construct-boa ; -: 2seq ( parser1 parser2 -- parser ) +MEMO: 2seq ( parser1 parser2 -- parser ) 2array seq ; -: 3seq ( parser1 parser2 parser3 -- parser ) +MEMO: 3seq ( parser1 parser2 parser3 -- parser ) 3array seq ; -: 4seq ( parser1 parser2 parser3 parser4 -- parser ) +MEMO: 4seq ( parser1 parser2 parser3 parser4 -- parser ) 4array seq ; -: seq* ( quot -- paser ) +MEMO: seq* ( quot -- paser ) { } make seq ; inline -: choice ( seq -- parser ) +MEMO: choice ( seq -- parser ) choice-parser construct-boa ; -: 2choice ( parser1 parser2 -- parser ) +MEMO: 2choice ( parser1 parser2 -- parser ) 2array choice ; -: 3choice ( parser1 parser2 parser3 -- parser ) +MEMO: 3choice ( parser1 parser2 parser3 -- parser ) 3array choice ; -: 4choice ( parser1 parser2 parser3 parser4 -- parser ) +MEMO: 4choice ( parser1 parser2 parser3 parser4 -- parser ) 4array choice ; -: choice* ( quot -- paser ) +MEMO: choice* ( quot -- paser ) { } make choice ; inline -: repeat0 ( parser -- parser ) +MEMO: repeat0 ( parser -- parser ) repeat0-parser construct-boa ; -: repeat1 ( parser -- parser ) +MEMO: repeat1 ( parser -- parser ) repeat1-parser construct-boa ; -: optional ( parser -- parser ) +MEMO: optional ( parser -- parser ) optional-parser construct-boa ; -: ensure ( parser -- parser ) +MEMO: ensure ( parser -- parser ) ensure-parser construct-boa ; -: ensure-not ( parser -- parser ) +MEMO: ensure-not ( parser -- parser ) ensure-not-parser construct-boa ; -: action ( parser quot -- parser ) +MEMO: action ( parser quot -- parser ) action-parser construct-boa ; -: sp ( parser -- parser ) +MEMO: sp ( parser -- parser ) sp-parser construct-boa ; : hide ( parser -- parser ) [ drop ignore ] action ; -: delay ( quot -- parser ) +MEMO: delay ( quot -- parser ) delay-parser construct-boa ; : PEG: