From aacf88a72d67a47edc6b5cde8e139dcf4c124d64 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 20 Nov 2007 16:36:38 +1300 Subject: [PATCH 01/10] First cut at peg style packrat parser --- extra/peg/peg-docs.factor | 11 +++++ extra/peg/peg-tests.factor | 55 +++++++++++++++++++++++ extra/peg/peg.factor | 92 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 extra/peg/peg-docs.factor create mode 100644 extra/peg/peg-tests.factor create mode 100644 extra/peg/peg.factor diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor new file mode 100644 index 0000000000..ec4a5d304d --- /dev/null +++ b/extra/peg/peg-docs.factor @@ -0,0 +1,11 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax peg ; + +HELP: token +{ $values + { "string" "a string" } } +{ $description + "A parser generator that returns a parser that matches the given string." } +{ $example "\"begin foo end\" \"begin\" token parse" "result-here" } ; + diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor new file mode 100644 index 0000000000..20e4206357 --- /dev/null +++ b/extra/peg/peg-tests.factor @@ -0,0 +1,55 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test strings namespaces arrays peg ; +IN: temporary + +{ 0 1 2 } [ + 0 next-id set-global get-next-id get-next-id get-next-id +] unit-test + +{ "0123456789" } [ + "0123456789" 0 0 state-tail parse-state-input >string +] unit-test + +{ "56789" } [ + "0123456789" 5 0 state-tail parse-state-input >string +] unit-test + +{ "789" } [ + "0123456789" 5 2 state-tail parse-state-input >string +] unit-test + +{ f } [ + "endbegin" 0 "begin" token parse +] unit-test + +{ "begin" "begin" "end" } [ + "beginend" 0 "begin" token parse + { parse-result-matched parse-result-ast parse-result-remaining } get-slots + parse-state-input >string +] unit-test + +{ f } [ + "" 0 CHAR: a CHAR: z range parse +] unit-test + +{ f } [ + "1bcd" 0 CHAR: a CHAR: z range parse +] unit-test + +{ CHAR: a } [ + "abcd" 0 CHAR: a CHAR: z range parse parse-result-ast +] unit-test + +{ CHAR: z } [ + "zbcd" 0 CHAR: a CHAR: z range parse parse-result-ast +] unit-test + +{ f } [ + "bad" 0 "a" token "b" token 2array seq parse +] unit-test + +{ "go" } [ + "good" 0 "g" token "o" token 2array seq parse parse-result-matched +] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor new file mode 100644 index 0000000000..f5c3f4ab3e --- /dev/null +++ b/extra/peg/peg.factor @@ -0,0 +1,92 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences strings namespaces math assocs combinators.lib ; +IN: peg + +TUPLE: parse-state input cache ; + +: ( input index -- state ) + tail-slice { set-parse-state-input } parse-state construct ; + +: get-cached ( pid state -- result ) + tuck parse-state-cache at [ + swap parse-state-input slice-from swap nth + ] [ + drop f + ] if* ; + +: state-tail ( state n -- state ) + dupd [ parse-state-cache ] dipd + [ parse-state-input ] dip tail-slice + { set-parse-state-cache set-parse-state-input } parse-state construct ; + +TUPLE: parse-result remaining matched ast ; + +: ( remaining matched ast -- parse-result ) + parse-result construct-boa ; + +SYMBOL: next-id + +: get-next-id ( -- number ) + next-id get-global 0 or dup 1+ next-id set-global ; + +TUPLE: parser id ; + +: init-parser ( parser -- parser ) + get-next-id parser construct-boa over set-delegate ; + +GENERIC: parse ( state parser -- result ) + +TUPLE: token-parser symbol ; + +M: token-parser parse ( state parser -- result ) + token-parser-symbol 2dup >r parse-state-input r> head? [ + dup >r length state-tail r> dup + ] [ + 2drop f + ] if ; + +: token ( string -- parser ) + token-parser construct-boa init-parser ; + +TUPLE: range-parser min max ; + +M: range-parser parse ( state parser -- result ) + over parse-state-input empty? [ + 2drop f + ] [ + 0 pick parse-state-input nth dup rot + { range-parser-min range-parser-max } get-slots between? [ + [ 1 state-tail ] dip dup + ] [ + 2drop f + ] if + ] if ; + +: range ( min max -- parser ) + range-parser construct-boa init-parser ; + +TUPLE: seq-parser parsers ; + +: do-seq-parser ( result parser -- result ) + [ dup parse-result-remaining ] dip parse [ + [ parse-result-remaining swap set-parse-result-remaining ] 2keep + [ parse-result-ast swap parse-result-ast push ] 2keep + parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep + + ] [ + drop f + ] if* ; + +: (seq-parser) ( result parsers -- result ) + dup empty? not pick and [ + unclip swap [ do-seq-parser ] dip (seq-parser) + ] [ + drop + ] if ; + +M: seq-parser parse ( state parser -- result ) + seq-parser-parsers [ "" V{ } clone ] dip (seq-parser) ; + +: seq ( seq -- parser ) + seq-parser construct-boa init-parser ; From 2d3fe08403e807febf7c62c6547d90bc2aec0926 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 20 Nov 2007 17:58:11 +1300 Subject: [PATCH 02/10] Add choice parser --- extra/peg/peg-tests.factor | 16 ++++++++++++++++ extra/peg/peg.factor | 22 ++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 20e4206357..d95233b899 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -53,3 +53,19 @@ IN: temporary { "go" } [ "good" 0 "g" token "o" token 2array seq parse parse-result-matched ] unit-test + +{ "a" } [ + "abcd" 0 "a" token "b" token 2array choice parse parse-result-matched +] unit-test + +{ "b" } [ + "bbcd" 0 "a" token "b" token 2array choice parse parse-result-matched +] unit-test + +{ f } [ + "cbcd" 0 "a" token "b" token 2array choice parse +] unit-test + +{ f } [ + "" 0 "a" token "b" token 2array choice parse +] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index f5c3f4ab3e..7965424ddd 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -72,8 +72,7 @@ TUPLE: seq-parser parsers ; [ dup parse-result-remaining ] dip parse [ [ parse-result-remaining swap set-parse-result-remaining ] 2keep [ parse-result-ast swap parse-result-ast push ] 2keep - parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep - + parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep ] [ drop f ] if* ; @@ -90,3 +89,22 @@ M: seq-parser parse ( state parser -- result ) : seq ( seq -- parser ) seq-parser construct-boa init-parser ; + +TUPLE: choice-parser parsers ; + +: (choice-parser) ( state parsers -- result ) + dup empty? [ + 2drop f + ] [ + unclip pick swap parse [ + 2nip + ] [ + (choice-parser) + ] if* + ] if ; + +M: choice-parser parse ( state parser -- result ) + choice-parser-parsers (choice-parser) ; + +: choice ( seq -- parser ) + choice-parser construct-boa init-parser ; From 691c62501f71ae4163ccda89751b27a4ba720f6d Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 15:01:44 +1300 Subject: [PATCH 03/10] add repeat0 and repeat1 --- extra/peg/peg-tests.factor | 28 ++++++++++++++++++++++++++-- extra/peg/peg.factor | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index d95233b899..8c496a2b95 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test strings namespaces arrays peg ; +USING: kernel tools.test strings namespaces arrays sequences peg ; IN: temporary { 0 1 2 } [ @@ -68,4 +68,28 @@ IN: temporary { f } [ "" 0 "a" token "b" token 2array choice parse -] unit-test \ No newline at end of file +] unit-test + +{ 0 } [ + "" 0 "a" token repeat0 parse parse-result-ast length +] unit-test + +{ 0 } [ + "b" 0 "a" token repeat0 parse parse-result-ast length +] unit-test + +{ "aaa" } [ + "aaab" 0 "a" token repeat0 parse parse-result-matched +] unit-test + +{ f } [ + "" 0 "a" token repeat1 parse +] unit-test + +{ f } [ + "b" 0 "a" token repeat1 parse +] unit-test + +{ "aaa" } [ + "aaab" 0 "a" token repeat1 parse parse-result-matched +] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 7965424ddd..9bda5a358b 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences strings namespaces math assocs combinators.lib ; +USING: kernel sequences strings namespaces math assocs shuffle combinators.lib ; IN: peg TUPLE: parse-state input cache ; @@ -108,3 +108,37 @@ M: choice-parser parse ( state parser -- result ) : choice ( seq -- parser ) choice-parser construct-boa init-parser ; + +TUPLE: repeat0-parser p1 ; + +: (repeat-parser) ( parser result -- result ) + 2dup parse-result-remaining swap parse [ + [ parse-result-remaining swap set-parse-result-remaining ] 2keep + [ parse-result-ast swap parse-result-ast push ] 2keep + parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep + (repeat-parser) + ] [ + nip + ] if* ; + +: clone-result ( result -- result ) + { parse-result-remaining parse-result-matched parse-result-ast } + get-slots V{ } clone-like ; + +M: repeat0-parser parse ( state parser -- result ) + repeat0-parser-p1 2dup parse [ + nipd clone-result (repeat-parser) + ] [ + drop "" V{ } clone + ] if* ; + +: repeat0 ( parser -- parser ) + repeat0-parser construct-boa init-parser ; + +TUPLE: repeat1-parser p1 ; + +M: repeat1-parser parse ( state parser -- result ) + repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; + +: repeat1 ( parser -- parser ) + repeat1-parser construct-boa init-parser ; From e9df13dad58a7a6cc25eaab61a15b80850849f81 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 15:31:23 +1300 Subject: [PATCH 04/10] remove match from parse results --- extra/peg/peg-tests.factor | 20 ++++++++++---------- extra/peg/peg.factor | 24 +++++++++++------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 8c496a2b95..82d993af3c 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -24,9 +24,9 @@ IN: temporary "endbegin" 0 "begin" token parse ] unit-test -{ "begin" "begin" "end" } [ +{ "begin" "end" } [ "beginend" 0 "begin" token parse - { parse-result-matched parse-result-ast parse-result-remaining } get-slots + { parse-result-ast parse-result-remaining } get-slots parse-state-input >string ] unit-test @@ -50,16 +50,16 @@ IN: temporary "bad" 0 "a" token "b" token 2array seq parse ] unit-test -{ "go" } [ - "good" 0 "g" token "o" token 2array seq parse parse-result-matched +{ V{ "g" "o" } } [ + "good" 0 "g" token "o" token 2array seq parse parse-result-ast ] unit-test { "a" } [ - "abcd" 0 "a" token "b" token 2array choice parse parse-result-matched + "abcd" 0 "a" token "b" token 2array choice parse parse-result-ast ] unit-test { "b" } [ - "bbcd" 0 "a" token "b" token 2array choice parse parse-result-matched + "bbcd" 0 "a" token "b" token 2array choice parse parse-result-ast ] unit-test { f } [ @@ -78,8 +78,8 @@ IN: temporary "b" 0 "a" token repeat0 parse parse-result-ast length ] unit-test -{ "aaa" } [ - "aaab" 0 "a" token repeat0 parse parse-result-matched +{ V{ "a" "a" "a" } } [ + "aaab" 0 "a" token repeat0 parse parse-result-ast ] unit-test { f } [ @@ -90,6 +90,6 @@ IN: temporary "b" 0 "a" token repeat1 parse ] unit-test -{ "aaa" } [ - "aaab" 0 "a" token repeat1 parse parse-result-matched +{ V{ "a" "a" "a" } } [ + "aaab" 0 "a" token repeat1 parse parse-result-ast ] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 9bda5a358b..4a43c1b965 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences strings namespaces math assocs shuffle combinators.lib ; +USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ; IN: peg TUPLE: parse-state input cache ; @@ -20,9 +20,9 @@ TUPLE: parse-state input cache ; [ parse-state-input ] dip tail-slice { set-parse-state-cache set-parse-state-input } parse-state construct ; -TUPLE: parse-result remaining matched ast ; +TUPLE: parse-result remaining ast ; -: ( remaining matched ast -- parse-result ) +: ( remaining ast -- parse-result ) parse-result construct-boa ; SYMBOL: next-id @@ -41,7 +41,7 @@ TUPLE: token-parser symbol ; M: token-parser parse ( state parser -- result ) token-parser-symbol 2dup >r parse-state-input r> head? [ - dup >r length state-tail r> dup + dup >r length state-tail r> ] [ 2drop f ] if ; @@ -57,7 +57,7 @@ M: range-parser parse ( state parser -- result ) ] [ 0 pick parse-state-input nth dup rot { range-parser-min range-parser-max } get-slots between? [ - [ 1 state-tail ] dip dup + [ 1 state-tail ] dip ] [ 2drop f ] if @@ -71,8 +71,7 @@ TUPLE: seq-parser parsers ; : do-seq-parser ( result parser -- result ) [ dup parse-result-remaining ] dip parse [ [ parse-result-remaining swap set-parse-result-remaining ] 2keep - [ parse-result-ast swap parse-result-ast push ] 2keep - parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep + parse-result-ast swap [ parse-result-ast push ] keep ] [ drop f ] if* ; @@ -85,7 +84,7 @@ TUPLE: seq-parser parsers ; ] if ; M: seq-parser parse ( state parser -- result ) - seq-parser-parsers [ "" V{ } clone ] dip (seq-parser) ; + seq-parser-parsers [ V{ } clone ] dip (seq-parser) ; : seq ( seq -- parser ) seq-parser construct-boa init-parser ; @@ -114,22 +113,21 @@ TUPLE: repeat0-parser p1 ; : (repeat-parser) ( parser result -- result ) 2dup parse-result-remaining swap parse [ [ parse-result-remaining swap set-parse-result-remaining ] 2keep - [ parse-result-ast swap parse-result-ast push ] 2keep - parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep + parse-result-ast swap [ parse-result-ast push ] keep (repeat-parser) ] [ nip ] if* ; : clone-result ( result -- result ) - { parse-result-remaining parse-result-matched parse-result-ast } - get-slots V{ } clone-like ; + { parse-result-remaining parse-result-ast } + get-slots 1vector ; M: repeat0-parser parse ( state parser -- result ) repeat0-parser-p1 2dup parse [ nipd clone-result (repeat-parser) ] [ - drop "" V{ } clone + drop V{ } clone ] if* ; : repeat0 ( parser -- parser ) From ffa71ef86f608e4a3f22dd3d146260a55727d13d Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 15:50:47 +1300 Subject: [PATCH 05/10] add optional parser --- extra/peg/peg-tests.factor | 12 ++++++++++++ extra/peg/peg.factor | 10 +++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 82d993af3c..b10fcb8e55 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -93,3 +93,15 @@ IN: temporary { V{ "a" "a" "a" } } [ "aaab" 0 "a" token repeat1 parse parse-result-ast ] unit-test + +{ V{ "a" "b" } } [ + "ab" 0 "a" token optional "b" token 2array seq parse parse-result-ast +] unit-test + +{ V{ f "b" } } [ + "b" 0 "a" token optional "b" token 2array seq parse parse-result-ast +] unit-test + +{ f } [ + "cb" 0 "a" token optional "b" token 2array seq parse +] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 4a43c1b965..e2f0cfd1b2 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -136,7 +136,15 @@ M: repeat0-parser parse ( state parser -- result ) TUPLE: repeat1-parser p1 ; M: repeat1-parser parse ( state parser -- result ) - repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; + repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; : repeat1 ( parser -- parser ) repeat1-parser construct-boa init-parser ; + +TUPLE: optional-parser p1 ; + +M: optional-parser parse ( state parser -- result ) + dupd optional-parser-p1 parse swap f or ; + +: optional ( parser -- parser ) + optional-parser construct-boa init-parser ; From 129f68d428f548bb20f28272718aa77074704510 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 16:06:02 +1300 Subject: [PATCH 06/10] add ensure parser --- extra/peg/peg-tests.factor | 8 ++++++++ extra/peg/peg.factor | 16 +++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index b10fcb8e55..b7977285c4 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -104,4 +104,12 @@ IN: temporary { f } [ "cb" 0 "a" token optional "b" token 2array seq parse +] unit-test + +{ V{ CHAR: a CHAR: b } } [ + "ab" 0 "a" token ensure CHAR: a CHAR: z range dup 3array seq parse parse-result-ast +] unit-test + +{ f } [ + "bb" 0 "a" token ensure CHAR: a CHAR: z range 2array seq parse ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index e2f0cfd1b2..239af02d26 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,6 +3,8 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ; IN: peg +SYMBOL: ignore + TUPLE: parse-state input cache ; : ( input index -- state ) @@ -71,7 +73,7 @@ TUPLE: seq-parser parsers ; : do-seq-parser ( result parser -- result ) [ dup parse-result-remaining ] dip parse [ [ parse-result-remaining swap set-parse-result-remaining ] 2keep - parse-result-ast swap [ parse-result-ast push ] keep + parse-result-ast dup ignore = [ drop ] [ swap [ parse-result-ast push ] keep ] if ] [ drop f ] if* ; @@ -148,3 +150,15 @@ M: optional-parser parse ( state parser -- result ) : optional ( parser -- parser ) optional-parser construct-boa init-parser ; + +TUPLE: ensure-parser p1 ; + +M: ensure-parser parse ( state parser -- result ) + dupd ensure-parser-p1 parse [ + ignore + ] [ + drop f + ] if ; + +: ensure ( parser -- parser ) + ensure-parser construct-boa init-parser ; From 2a464ea2c65bb14782066b00789b63567a77c308 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 16:11:49 +1300 Subject: [PATCH 07/10] add ensure-not parser --- extra/peg/peg-tests.factor | 24 ++++++++++++++++++++++++ extra/peg/peg.factor | 12 ++++++++++++ 2 files changed, 36 insertions(+) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index b7977285c4..94f70d32ad 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -112,4 +112,28 @@ IN: temporary { f } [ "bb" 0 "a" token ensure CHAR: a CHAR: z range 2array seq parse +] unit-test + +{ t } [ + "a+b" 0 + "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ t } [ + "a++b" 0 + "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ t } [ + "a+b" 0 + "a" token "+" token "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if +] unit-test + +{ f } [ + "a++b" 0 + "a" token "+" token "++" token 2array choice "b" token 3array seq + parse [ t ] [ f ] if ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 239af02d26..82c4505ae7 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -162,3 +162,15 @@ M: ensure-parser parse ( state parser -- result ) : ensure ( parser -- parser ) ensure-parser construct-boa init-parser ; + +TUPLE: ensure-not-parser p1 ; + +M: ensure-not-parser parse ( state parser -- result ) + dupd ensure-not-parser-p1 parse [ + drop f + ] [ + ignore + ] if ; + +: ensure-not ( parser -- parser ) + ensure-not-parser construct-boa init-parser ; From 167f2d716d323bff0a1c82ab8e5a1de6f9360306 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 21 Nov 2007 16:21:23 +1300 Subject: [PATCH 08/10] add action parser --- extra/peg/peg-tests.factor | 12 ++++++++++++ extra/peg/peg.factor | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 94f70d32ad..a60d1eaaf0 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -136,4 +136,16 @@ IN: temporary "a++b" 0 "a" token "+" token "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if +] unit-test + +{ 1 } [ + "a" 0 "a" token [ drop 1 ] action parse parse-result-ast +] unit-test + +{ V{ 1 1 } } [ + "aa" 0 "a" token [ drop 1 ] action dup 2array seq parse parse-result-ast +] unit-test + +{ f } [ + "b" 0 "a" token [ drop 1 ] action parse ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 82c4505ae7..2c985b68dc 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -174,3 +174,16 @@ M: ensure-not-parser parse ( state parser -- result ) : ensure-not ( parser -- parser ) ensure-not-parser construct-boa init-parser ; + +TUPLE: action-parser p1 quot ; + +M: action-parser parse ( state parser -- result ) + tuck action-parser-p1 parse dup [ + dup parse-result-ast rot action-parser-quot call + swap [ set-parse-result-ast ] keep + ] [ + nip + ] if ; + +: action ( parser quot -- parser ) + action-parser construct-boa init-parser ; From 6476eb765ea4a7fe9cbfdb2943be305dae0b2ef9 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 11:57:08 +1300 Subject: [PATCH 09/10] remove parse-state from peg --- extra/peg/peg-tests.factor | 74 ++++++++++++++++---------------------- extra/peg/peg.factor | 27 +++----------- 2 files changed, 36 insertions(+), 65 deletions(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index a60d1eaaf0..9becc81b56 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -8,144 +8,132 @@ IN: temporary 0 next-id set-global get-next-id get-next-id get-next-id ] unit-test -{ "0123456789" } [ - "0123456789" 0 0 state-tail parse-state-input >string -] unit-test - -{ "56789" } [ - "0123456789" 5 0 state-tail parse-state-input >string -] unit-test - -{ "789" } [ - "0123456789" 5 2 state-tail parse-state-input >string -] unit-test - { f } [ - "endbegin" 0 "begin" token parse + "endbegin" "begin" token parse ] unit-test { "begin" "end" } [ - "beginend" 0 "begin" token parse + "beginend" "begin" token parse { parse-result-ast parse-result-remaining } get-slots - parse-state-input >string + >string ] unit-test { f } [ - "" 0 CHAR: a CHAR: z range parse + "" CHAR: a CHAR: z range parse ] unit-test { f } [ - "1bcd" 0 CHAR: a CHAR: z range parse + "1bcd" CHAR: a CHAR: z range parse ] unit-test { CHAR: a } [ - "abcd" 0 CHAR: a CHAR: z range parse parse-result-ast + "abcd" CHAR: a CHAR: z range parse parse-result-ast ] unit-test { CHAR: z } [ - "zbcd" 0 CHAR: a CHAR: z range parse parse-result-ast + "zbcd" CHAR: a CHAR: z range parse parse-result-ast ] unit-test { f } [ - "bad" 0 "a" token "b" token 2array seq parse + "bad" "a" token "b" token 2array seq parse ] unit-test { V{ "g" "o" } } [ - "good" 0 "g" token "o" token 2array seq parse parse-result-ast + "good" "g" token "o" token 2array seq parse parse-result-ast ] unit-test { "a" } [ - "abcd" 0 "a" token "b" token 2array choice parse parse-result-ast + "abcd" "a" token "b" token 2array choice parse parse-result-ast ] unit-test { "b" } [ - "bbcd" 0 "a" token "b" token 2array choice parse parse-result-ast + "bbcd" "a" token "b" token 2array choice parse parse-result-ast ] unit-test { f } [ - "cbcd" 0 "a" token "b" token 2array choice parse + "cbcd" "a" token "b" token 2array choice parse ] unit-test { f } [ - "" 0 "a" token "b" token 2array choice parse + "" "a" token "b" token 2array choice parse ] unit-test { 0 } [ - "" 0 "a" token repeat0 parse parse-result-ast length + "" "a" token repeat0 parse parse-result-ast length ] unit-test { 0 } [ - "b" 0 "a" token repeat0 parse parse-result-ast length + "b" "a" token repeat0 parse parse-result-ast length ] unit-test { V{ "a" "a" "a" } } [ - "aaab" 0 "a" token repeat0 parse parse-result-ast + "aaab" "a" token repeat0 parse parse-result-ast ] unit-test { f } [ - "" 0 "a" token repeat1 parse + "" "a" token repeat1 parse ] unit-test { f } [ - "b" 0 "a" token repeat1 parse + "b" "a" token repeat1 parse ] unit-test { V{ "a" "a" "a" } } [ - "aaab" 0 "a" token repeat1 parse parse-result-ast + "aaab" "a" token repeat1 parse parse-result-ast ] unit-test { V{ "a" "b" } } [ - "ab" 0 "a" token optional "b" token 2array seq parse parse-result-ast + "ab" "a" token optional "b" token 2array seq parse parse-result-ast ] unit-test { V{ f "b" } } [ - "b" 0 "a" token optional "b" token 2array seq parse parse-result-ast + "b" "a" token optional "b" token 2array seq parse parse-result-ast ] unit-test { f } [ - "cb" 0 "a" token optional "b" token 2array seq parse + "cb" "a" token optional "b" token 2array seq parse ] unit-test { V{ CHAR: a CHAR: b } } [ - "ab" 0 "a" token ensure CHAR: a CHAR: z range dup 3array seq parse parse-result-ast + "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse parse-result-ast ] unit-test { f } [ - "bb" 0 "a" token ensure CHAR: a CHAR: z range 2array seq parse + "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse ] unit-test { t } [ - "a+b" 0 + "a+b" "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if ] unit-test { t } [ - "a++b" 0 + "a++b" "a" token "+" token dup ensure-not 2array seq "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if ] unit-test { t } [ - "a+b" 0 + "a+b" "a" token "+" token "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if ] unit-test { f } [ - "a++b" 0 + "a++b" "a" token "+" token "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if ] unit-test { 1 } [ - "a" 0 "a" token [ drop 1 ] action parse parse-result-ast + "a" "a" token [ drop 1 ] action parse parse-result-ast ] unit-test { V{ 1 1 } } [ - "aa" 0 "a" token [ drop 1 ] action dup 2array seq parse parse-result-ast + "aa" "a" token [ drop 1 ] action dup 2array seq parse parse-result-ast ] unit-test { f } [ - "b" 0 "a" token [ drop 1 ] action parse + "b" "a" token [ drop 1 ] action parse ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 2c985b68dc..34c17448fb 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -5,23 +5,6 @@ IN: peg SYMBOL: ignore -TUPLE: parse-state input cache ; - -: ( input index -- state ) - tail-slice { set-parse-state-input } parse-state construct ; - -: get-cached ( pid state -- result ) - tuck parse-state-cache at [ - swap parse-state-input slice-from swap nth - ] [ - drop f - ] if* ; - -: state-tail ( state n -- state ) - dupd [ parse-state-cache ] dipd - [ parse-state-input ] dip tail-slice - { set-parse-state-cache set-parse-state-input } parse-state construct ; - TUPLE: parse-result remaining ast ; : ( remaining ast -- parse-result ) @@ -42,8 +25,8 @@ GENERIC: parse ( state parser -- result ) TUPLE: token-parser symbol ; M: token-parser parse ( state parser -- result ) - token-parser-symbol 2dup >r parse-state-input r> head? [ - dup >r length state-tail r> + token-parser-symbol 2dup head? [ + dup >r length tail-slice r> ] [ 2drop f ] if ; @@ -54,12 +37,12 @@ M: token-parser parse ( state parser -- result ) TUPLE: range-parser min max ; M: range-parser parse ( state parser -- result ) - over parse-state-input empty? [ + over empty? [ 2drop f ] [ - 0 pick parse-state-input nth dup rot + 0 pick nth dup rot { range-parser-min range-parser-max } get-slots between? [ - [ 1 state-tail ] dip + [ 1 tail-slice ] dip ] [ 2drop f ] if From 99b39e03511df0aa979cd1ad8daa0dd2f0e7561c Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 27 Nov 2007 12:22:33 +1300 Subject: [PATCH 10/10] Some help for pegs --- extra/peg/peg-docs.factor | 108 +++++++++++++++++++++++++++++++++++-- extra/peg/peg-tests.factor | 2 +- extra/peg/peg.factor | 66 ++++++++++++----------- 3 files changed, 141 insertions(+), 35 deletions(-) diff --git a/extra/peg/peg-docs.factor b/extra/peg/peg-docs.factor index ec4a5d304d..40743132f3 100644 --- a/extra/peg/peg-docs.factor +++ b/extra/peg/peg-docs.factor @@ -2,10 +2,112 @@ ! See http://factorcode.org/license.txt for BSD license. USING: help.markup help.syntax peg ; +HELP: parse +{ $values + { "string" "a string" } + { "parse" "a parser" } + { "result" "a or f" } +} +{ $description + "Given the input string, parse it using the given parser. The result is a object if " + "the parse was successful, otherwise it is f." } ; + HELP: token { $values - { "string" "a string" } } + { "string" "a string" } + { "parser" "a parser" } +} { $description - "A parser generator that returns a parser that matches the given string." } -{ $example "\"begin foo end\" \"begin\" token parse" "result-here" } ; + "Returns a parser that matches the given string." } ; + +HELP: range +{ $values + { "min" "a character" } + { "max" "a character" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that matches a single character that lies within the range of characters given, inclusive." } +{ $example ": digit ( -- parser ) CHAR: 0 CHAR: 9 range ;" } ; + +HELP: seq +{ $values + { "seq" "a sequence of parsers" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that calls all parsers in the given sequence, in order. The parser succeeds if " + "all the parsers succeed, otherwise it fails. The AST produced is a sequence of the AST produced by " + "the individual parsers." } ; + +HELP: choice +{ $values + { "seq" "a sequence of parsers" } + { "parser" "a parser" } +} +{ $description + "Returns a parser that will try all the parsers in the sequence, in order, until one succeeds. " + "The resulting AST is that produced by the successful parser." } ; + +HELP: repeat0 +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 0 or more instances of the 'p1' parser. The AST produced is " + "an array of the AST produced by the 'p1' parser. An empty array indicates 0 instances were " + "parsed." } ; + +HELP: repeat1 +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 1 or more instances of the 'p1' parser. The AST produced is " + "an array of the AST produced by the 'p1' parser." } ; + +HELP: optional +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that parses 0 or 1 instances of the 'p1' parser. The AST produced is " + "'f' if 0 instances are parsed the AST produced is 'f', otherwise it is the AST produced by 'p1'." } ; + +HELP: ensure +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser succeeds but does not add anything to the " + "AST and does not move the location in the input string. This can be used for lookahead and " + "disambiguation, along with the " { $link ensure-not } " word." } +{ $example "\"0\" token ensure octal-parser" } ; + +HELP: ensure-not +{ $values + { "p1" "a parser" } + { "p2" "a parser" } +} +{ $description + "Returns a parser that succeeds if the 'p1' parser fails but does not add anything to the " + "AST and does not move the location in the input string. This can be used for lookahead and " + "disambiguation, along with the " { $link ensure } " word." } +{ $example "\"+\" token \"=\" token ensure-not \"+=\" token 3array seq" } ; + +HELP: action +{ $values + { "p1" "a parser" } + { "quot" "a quotation with stack effect ( ast -- ast )" } +} +{ $description + "Returns a parser that calls the 'p1' parser and applies the quotation to the AST resulting " + "from that parse. The result of the quotation is then used as the final AST. This can be used " + "for manipulating the parse tree to produce a AST better suited for the task at hand rather than " + "the default AST." } +{ $example "CHAR: 0 CHAR: 9 range [ to-digit ] action" } ; diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 9becc81b56..7648819a8c 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test strings namespaces arrays sequences peg ; +USING: kernel tools.test strings namespaces arrays sequences peg peg.private ; IN: temporary { 0 1 2 } [ diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 34c17448fb..1fb8e7860d 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,10 +3,14 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ; IN: peg -SYMBOL: ignore - TUPLE: parse-result remaining ast ; +GENERIC: parse ( state parser -- result ) + + ( remaining ast -- parse-result ) parse-result construct-boa ; @@ -20,8 +24,6 @@ TUPLE: parser id ; : init-parser ( parser -- parser ) get-next-id parser construct-boa over set-delegate ; -GENERIC: parse ( state parser -- result ) - TUPLE: token-parser symbol ; M: token-parser parse ( state parser -- result ) @@ -31,9 +33,6 @@ M: token-parser parse ( state parser -- result ) 2drop f ] if ; -: token ( string -- parser ) - token-parser construct-boa init-parser ; - TUPLE: range-parser min max ; M: range-parser parse ( state parser -- result ) @@ -48,9 +47,6 @@ M: range-parser parse ( state parser -- result ) ] if ] if ; -: range ( min max -- parser ) - range-parser construct-boa init-parser ; - TUPLE: seq-parser parsers ; : do-seq-parser ( result parser -- result ) @@ -71,9 +67,6 @@ TUPLE: seq-parser parsers ; M: seq-parser parse ( state parser -- result ) seq-parser-parsers [ V{ } clone ] dip (seq-parser) ; -: seq ( seq -- parser ) - seq-parser construct-boa init-parser ; - TUPLE: choice-parser parsers ; : (choice-parser) ( state parsers -- result ) @@ -90,9 +83,6 @@ TUPLE: choice-parser parsers ; M: choice-parser parse ( state parser -- result ) choice-parser-parsers (choice-parser) ; -: choice ( seq -- parser ) - choice-parser construct-boa init-parser ; - TUPLE: repeat0-parser p1 ; : (repeat-parser) ( parser result -- result ) @@ -115,25 +105,16 @@ M: repeat0-parser parse ( state parser -- result ) drop V{ } clone ] if* ; -: repeat0 ( parser -- parser ) - repeat0-parser construct-boa init-parser ; - TUPLE: repeat1-parser p1 ; M: repeat1-parser parse ( state parser -- result ) repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; -: repeat1 ( parser -- parser ) - repeat1-parser construct-boa init-parser ; - TUPLE: optional-parser p1 ; M: optional-parser parse ( state parser -- result ) dupd optional-parser-p1 parse swap f or ; -: optional ( parser -- parser ) - optional-parser construct-boa init-parser ; - TUPLE: ensure-parser p1 ; M: ensure-parser parse ( state parser -- result ) @@ -143,9 +124,6 @@ M: ensure-parser parse ( state parser -- result ) drop f ] if ; -: ensure ( parser -- parser ) - ensure-parser construct-boa init-parser ; - TUPLE: ensure-not-parser p1 ; M: ensure-not-parser parse ( state parser -- result ) @@ -155,9 +133,6 @@ M: ensure-not-parser parse ( state parser -- result ) ignore ] if ; -: ensure-not ( parser -- parser ) - ensure-not-parser construct-boa init-parser ; - TUPLE: action-parser p1 quot ; M: action-parser parse ( state parser -- result ) @@ -168,5 +143,34 @@ M: action-parser parse ( state parser -- result ) nip ] if ; +PRIVATE> + +: token ( string -- parser ) + token-parser construct-boa init-parser ; + +: range ( min max -- parser ) + range-parser construct-boa init-parser ; + +: seq ( seq -- parser ) + seq-parser construct-boa init-parser ; + +: choice ( seq -- parser ) + choice-parser construct-boa init-parser ; + +: repeat0 ( parser -- parser ) + repeat0-parser construct-boa init-parser ; + +: repeat1 ( parser -- parser ) + repeat1-parser construct-boa init-parser ; + +: optional ( parser -- parser ) + optional-parser construct-boa init-parser ; + +: ensure ( parser -- parser ) + ensure-parser construct-boa init-parser ; + +: ensure-not ( parser -- parser ) + ensure-not-parser construct-boa init-parser ; + : action ( parser quot -- parser ) action-parser construct-boa init-parser ;