From a14854520da6b9c41ee0f0aeb9235fa9d894129a Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Fri, 21 Mar 2008 03:05:21 +1300 Subject: [PATCH 1/3] Compile pegs down to words --- extra/peg/parsers/parsers.factor | 6 +- extra/peg/peg.factor | 124 +++++++++++++++++++------------ 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 3ccb1e7d10..407729004f 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -16,11 +16,11 @@ TUPLE: just-parser p1 ; ] ; -M: just-parser compile ( parser -- quot ) - just-parser-p1 compile just-pattern append ; +M: just-parser (compile) ( parser -- quot ) + just-parser-p1 compiled-parser just-pattern curry ; : just ( parser -- parser ) - just-parser construct-boa ; + just-parser construct-boa init-parser ; : 1token ( ch -- parser ) 1string token ; diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index b3200ec5eb..9d6b18398e 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -8,16 +8,42 @@ IN: peg TUPLE: parse-result remaining ast ; -GENERIC: compile ( parser -- quot ) - -: parse ( state parser -- result ) - compile call ; - SYMBOL: ignore : <parse-result> ( remaining ast -- parse-result ) parse-result construct-boa ; +TUPLE: parser ; +C: <parser> parser +M: parser equal? eq? ; + +: init-parser ( parser -- parser ) + #! Set the delegate for the parser + <parser> over set-delegate ; + +SYMBOL: compiled-parsers + +GENERIC: (compile) ( parser -- quot ) + +: compiled-parser ( parser -- word ) + #! Look to see if the given parser has been compied. + #! If not, compile it to a temporary word, cache it, + #! and return it. Otherwise return the existing one. + dup compiled-parsers get at [ + nip + ] [ + dup (compile) define-temp + [ swap compiled-parsers get set-at ] keep + ] if* ; + +: compile ( parser -- word ) + H{ } clone compiled-parsers [ + [ compiled-parser ] with-compilation-unit + ] with-variable ; + +: parse ( state parser -- result ) + compile call ; + <PRIVATE TUPLE: token-parser symbol ; @@ -33,7 +59,7 @@ MATCH-VARS: ?token ; ] if ] ; -M: token-parser compile ( parser -- quot ) +M: token-parser (compile) ( parser -- quot ) token-parser-symbol \ ?token token-pattern match-replace ; TUPLE: satisfy-parser quot ; @@ -53,7 +79,7 @@ MATCH-VARS: ?quot ; ] if ] ; -M: satisfy-parser compile ( parser -- quot ) +M: satisfy-parser (compile) ( parser -- quot ) satisfy-parser-quot \ ?quot satisfy-pattern match-replace ; TUPLE: range-parser min max ; @@ -74,7 +100,7 @@ MATCH-VARS: ?min ?max ; ] if ] ; -M: range-parser compile ( parser -- quot ) +M: range-parser (compile) ( parser -- quot ) T{ range-parser _ ?min ?max } range-pattern match-replace ; TUPLE: seq-parser parsers ; @@ -82,7 +108,7 @@ TUPLE: seq-parser parsers ; : seq-pattern ( -- quot ) [ dup [ - dup parse-result-remaining ?quot call [ + dup parse-result-remaining ?quot [ [ parse-result-remaining swap set-parse-result-remaining ] 2keep parse-result-ast dup ignore = [ drop @@ -97,10 +123,10 @@ TUPLE: seq-parser parsers ; ] if ] ; -M: seq-parser compile ( parser -- quot ) +M: seq-parser (compile) ( parser -- quot ) [ [ V{ } clone <parse-result> ] % - seq-parser-parsers [ compile \ ?quot seq-pattern match-replace % ] each + seq-parser-parsers [ compiled-parser \ ?quot seq-pattern match-replace % ] each ] [ ] make ; TUPLE: choice-parser parsers ; @@ -110,14 +136,14 @@ TUPLE: choice-parser parsers ; dup [ ] [ - drop dup ?quot call + drop dup ?quot ] if ] ; -M: choice-parser compile ( parser -- quot ) +M: choice-parser (compile) ( parser -- quot ) [ f , - choice-parser-parsers [ compile \ ?quot choice-pattern match-replace % ] each + choice-parser-parsers [ compiled-parser \ ?quot choice-pattern match-replace % ] each \ nip , ] [ ] make ; @@ -134,20 +160,20 @@ TUPLE: repeat0-parser p1 ; : repeat0-pattern ( -- quot ) [ - ?quot swap (repeat0) + [ ?quot ] swap (repeat0) ] ; -M: repeat0-parser compile ( parser -- quot ) +M: repeat0-parser (compile) ( parser -- quot ) [ [ V{ } clone <parse-result> ] % - repeat0-parser-p1 compile \ ?quot repeat0-pattern match-replace % + repeat0-parser-p1 compiled-parser \ ?quot repeat0-pattern match-replace % ] [ ] make ; TUPLE: repeat1-parser p1 ; : repeat1-pattern ( -- quot ) [ - ?quot swap (repeat0) [ + [ ?quot ] swap (repeat0) [ dup parse-result-ast empty? [ drop f ] when @@ -156,49 +182,49 @@ TUPLE: repeat1-parser p1 ; ] if* ] ; -M: repeat1-parser compile ( parser -- quot ) +M: repeat1-parser (compile) ( parser -- quot ) [ [ V{ } clone <parse-result> ] % - repeat1-parser-p1 compile \ ?quot repeat1-pattern match-replace % + repeat1-parser-p1 compiled-parser \ ?quot repeat1-pattern match-replace % ] [ ] make ; TUPLE: optional-parser p1 ; : optional-pattern ( -- quot ) [ - dup ?quot call swap f <parse-result> or + dup ?quot swap f <parse-result> or ] ; -M: optional-parser compile ( parser -- quot ) - optional-parser-p1 compile \ ?quot optional-pattern match-replace ; +M: optional-parser (compile) ( parser -- quot ) + optional-parser-p1 compiled-parser \ ?quot optional-pattern match-replace ; TUPLE: ensure-parser p1 ; : ensure-pattern ( -- quot ) [ - dup ?quot call [ + dup ?quot [ ignore <parse-result> ] [ drop f ] if ] ; -M: ensure-parser compile ( parser -- quot ) - ensure-parser-p1 compile \ ?quot ensure-pattern match-replace ; +M: ensure-parser (compile) ( parser -- quot ) + ensure-parser-p1 compiled-parser \ ?quot ensure-pattern match-replace ; TUPLE: ensure-not-parser p1 ; : ensure-not-pattern ( -- quot ) [ - dup ?quot call [ + dup ?quot [ drop f ] [ ignore <parse-result> ] if ] ; -M: ensure-not-parser compile ( parser -- quot ) - ensure-not-parser-p1 compile \ ?quot ensure-not-pattern match-replace ; +M: ensure-not-parser (compile) ( parser -- quot ) + ensure-not-parser-p1 compiled-parser \ ?quot ensure-not-pattern match-replace ; TUPLE: action-parser p1 quot ; @@ -206,14 +232,14 @@ MATCH-VARS: ?action ; : action-pattern ( -- quot ) [ - ?quot call dup [ + ?quot dup [ dup parse-result-ast ?action call swap [ set-parse-result-ast ] keep ] when ] ; -M: action-parser compile ( parser -- quot ) - { action-parser-p1 action-parser-quot } get-slots [ compile ] dip +M: action-parser (compile) ( parser -- quot ) + { action-parser-p1 action-parser-quot } get-slots [ compiled-parser ] dip 2array { ?quot ?action } action-pattern match-replace ; : left-trim-slice ( string -- string ) @@ -225,31 +251,31 @@ M: action-parser compile ( parser -- quot ) TUPLE: sp-parser p1 ; -M: sp-parser compile ( parser -- quot ) +M: sp-parser (compile) ( parser -- quot ) [ - \ left-trim-slice , sp-parser-p1 compile % + \ left-trim-slice , sp-parser-p1 compiled-parser , ] [ ] make ; TUPLE: delay-parser quot ; -M: delay-parser compile ( parser -- quot ) +M: delay-parser (compile) ( parser -- quot ) [ - delay-parser-quot % \ compile , \ call , + delay-parser-quot % \ (compile) , \ call , ] [ ] make ; PRIVATE> : token ( string -- parser ) - token-parser construct-boa ; + token-parser construct-boa init-parser ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa ; + satisfy-parser construct-boa init-parser ; : range ( min max -- parser ) - range-parser construct-boa ; + range-parser construct-boa init-parser ; : seq ( seq -- parser ) - seq-parser construct-boa ; + seq-parser construct-boa init-parser ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -264,7 +290,7 @@ PRIVATE> { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa ; + choice-parser construct-boa init-parser ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -279,31 +305,31 @@ PRIVATE> { } make choice ; inline : repeat0 ( parser -- parser ) - repeat0-parser construct-boa ; + repeat0-parser construct-boa init-parser ; : repeat1 ( parser -- parser ) - repeat1-parser construct-boa ; + repeat1-parser construct-boa init-parser ; : optional ( parser -- parser ) - optional-parser construct-boa ; + optional-parser construct-boa init-parser ; : ensure ( parser -- parser ) - ensure-parser construct-boa ; + ensure-parser construct-boa init-parser ; : ensure-not ( parser -- parser ) - ensure-not-parser construct-boa ; + ensure-not-parser construct-boa init-parser ; : action ( parser quot -- parser ) - action-parser construct-boa ; + action-parser construct-boa init-parser ; : sp ( parser -- parser ) - sp-parser construct-boa ; + sp-parser construct-boa init-parser ; : hide ( parser -- parser ) [ drop ignore ] action ; : delay ( quot -- parser ) - delay-parser construct-boa ; + delay-parser construct-boa init-parser ; : PEG: (:) [ From d1e0aa6e806e730d1972274e262a2f5b8ddd3563 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Sat, 22 Mar 2008 00:58:53 +1300 Subject: [PATCH 2/3] Get peg subvocabs working again --- extra/peg/ebnf/ebnf-tests.factor | 2 +- extra/peg/ebnf/ebnf.factor | 2 +- extra/peg/peg.factor | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 54639431a4..c9b9f5d977 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf ; +USING: kernel tools.test peg peg.ebnf words ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index ab7baa547e..db478e571f 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -278,7 +278,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : ebnf>quot ( string -- hashtable quot ) 'ebnf' parse check-parse-result - parse-result-ast transform dup main swap at compile ; + parse-result-ast transform dup main swap at compile 1quotation ; : [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 9d6b18398e..47dc0a3454 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words ; + words quotations ; IN: peg TUPLE: parse-result remaining ast ; @@ -42,7 +42,7 @@ GENERIC: (compile) ( parser -- quot ) ] with-variable ; : parse ( state parser -- result ) - compile call ; + compile execute ; <PRIVATE @@ -260,7 +260,7 @@ TUPLE: delay-parser quot ; M: delay-parser (compile) ( parser -- quot ) [ - delay-parser-quot % \ (compile) , \ call , + delay-parser-quot % \ compile , \ execute , ] [ ] make ; PRIVATE> @@ -334,7 +334,7 @@ PRIVATE> : PEG: (:) [ [ - call compile + call compile 1quotation [ dup [ parse-result-ast ] [ "Parse failed" throw ] if ] append define ] with-compilation-unit From 943b02ab2f1893012ff68af1bef4214f03c4d349 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Sat, 22 Mar 2008 01:59:16 +1300 Subject: [PATCH 3/3] Fix performance regression in pegs delay parser is improved to use a memoized quotation so the construction and compilation of the parser at runtime only occurs once. Changed compile so it would use equality rather than identity for memoization purposes. --- extra/peg/parsers/parsers.factor | 2 +- extra/peg/peg.factor | 50 +++++++++++++++----------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 407729004f..4bba60bb09 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -20,7 +20,7 @@ M: just-parser (compile) ( parser -- quot ) just-parser-p1 compiled-parser just-pattern curry ; : just ( parser -- parser ) - just-parser construct-boa init-parser ; + just-parser construct-boa ; : 1token ( ch -- parser ) 1string token ; diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 47dc0a3454..1707193e70 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words quotations ; + words quotations effects memoize ; IN: peg TUPLE: parse-result remaining ast ; @@ -13,20 +13,12 @@ SYMBOL: ignore : <parse-result> ( remaining ast -- parse-result ) parse-result construct-boa ; -TUPLE: parser ; -C: <parser> parser -M: parser equal? eq? ; - -: init-parser ( parser -- parser ) - #! Set the delegate for the parser - <parser> over set-delegate ; - SYMBOL: compiled-parsers GENERIC: (compile) ( parser -- quot ) : compiled-parser ( parser -- word ) - #! Look to see if the given parser has been compied. + #! Look to see if the given parser has been compiled. #! If not, compile it to a temporary word, cache it, #! and return it. Otherwise return the existing one. dup compiled-parsers get at [ @@ -36,7 +28,7 @@ GENERIC: (compile) ( parser -- quot ) [ swap compiled-parsers get set-at ] keep ] if* ; -: compile ( parser -- word ) +MEMO: compile ( parser -- word ) H{ } clone compiled-parsers [ [ compiled-parser ] with-compilation-unit ] with-variable ; @@ -47,6 +39,7 @@ GENERIC: (compile) ( parser -- quot ) <PRIVATE TUPLE: token-parser symbol ; +! M: token-parser equal? eq? ; MATCH-VARS: ?token ; @@ -259,23 +252,28 @@ M: sp-parser (compile) ( parser -- quot ) TUPLE: delay-parser quot ; M: delay-parser (compile) ( parser -- quot ) + #! For efficiency we memoize the quotation. + #! This way it is run only once and the + #! parser constructed once at run time. [ - delay-parser-quot % \ compile , \ execute , - ] [ ] make ; + delay-parser-quot % \ compile , + ] [ ] make + { } { "word" } <effect> memoize-quot + [ % \ execute , ] [ ] make ; PRIVATE> : token ( string -- parser ) - token-parser construct-boa init-parser ; + token-parser construct-boa ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa init-parser ; + satisfy-parser construct-boa ; : range ( min max -- parser ) - range-parser construct-boa init-parser ; + range-parser construct-boa ; : seq ( seq -- parser ) - seq-parser construct-boa init-parser ; + seq-parser construct-boa ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -290,7 +288,7 @@ PRIVATE> { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa init-parser ; + choice-parser construct-boa ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -305,31 +303,31 @@ PRIVATE> { } make choice ; inline : repeat0 ( parser -- parser ) - repeat0-parser construct-boa init-parser ; + repeat0-parser construct-boa ; : repeat1 ( parser -- parser ) - repeat1-parser construct-boa init-parser ; + repeat1-parser construct-boa ; : optional ( parser -- parser ) - optional-parser construct-boa init-parser ; + optional-parser construct-boa ; : ensure ( parser -- parser ) - ensure-parser construct-boa init-parser ; + ensure-parser construct-boa ; : ensure-not ( parser -- parser ) - ensure-not-parser construct-boa init-parser ; + ensure-not-parser construct-boa ; : action ( parser quot -- parser ) - action-parser construct-boa init-parser ; + action-parser construct-boa ; : sp ( parser -- parser ) - sp-parser construct-boa init-parser ; + sp-parser construct-boa ; : hide ( parser -- parser ) [ drop ignore ] action ; : delay ( quot -- parser ) - delay-parser construct-boa init-parser ; + delay-parser construct-boa ; : PEG: (:) [