diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 0eeab7c4dc..a308b9af52 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -9,27 +9,91 @@ IN: temporary ] unit-test { T{ ebnf-terminal f "55" } } [ - "\"55\"" 'terminal' parse parse-result-ast + "'55'" 'terminal' parse parse-result-ast +] unit-test + +{ + T{ ebnf-rule f + "digit" + V{ + T{ ebnf-choice f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + } + f + } + } +} [ + "digit = '1' | '2'" 'rule' parse parse-result-ast ] unit-test { T{ ebnf-rule f "digit" - T{ ebnf-choice f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + V{ + T{ ebnf-sequence f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + } + f } - } + } } [ - "digit = \"1\" | \"2\"" 'rule' parse parse-result-ast + "digit = '1' '2'" 'rule' parse parse-result-ast ] unit-test { - T{ ebnf-rule f - "digit" - T{ ebnf-sequence f - V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + T{ ebnf-choice f + V{ + T{ ebnf-sequence f + V{ T{ ebnf-non-terminal f "one" } T{ ebnf-non-terminal f "two" } } + } + T{ ebnf-non-terminal f "three" } } } } [ - "digit = \"1\" \"2\"" 'rule' parse parse-result-ast -] unit-test \ No newline at end of file + "one two | three" 'choice' parse parse-result-ast +] unit-test + +{ + T{ ebnf-sequence f + V{ + T{ ebnf-non-terminal f "one" } + T{ ebnf-choice f + V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } } + } + } + } +} [ + "one (two | three)" 'choice' parse parse-result-ast +] unit-test + +{ + T{ ebnf-sequence f + V{ + T{ ebnf-non-terminal f "one" } + T{ ebnf-repeat0 f + T{ ebnf-sequence f + V{ + T{ ebnf-choice f + V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } } + } + T{ ebnf-non-terminal f "four" } + } + } + } + } + } +} [ + "one {(two | three) four}" 'choice' parse parse-result-ast +] unit-test + +{ + T{ ebnf-sequence f + V{ + T{ ebnf-non-terminal f "one" } + T{ ebnf-optional f T{ ebnf-non-terminal f "two" } } + T{ ebnf-non-terminal f "three" } + } + } +} [ + "one [ two ] three" 'choice' parse parse-result-ast +] unit-test diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 8726581488..e55ee9d852 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel parser words arrays strings math.parser sequences namespaces peg ; +USING: kernel parser words arrays strings math.parser sequences + quotations vectors namespaces math assocs continuations peg ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -8,7 +9,9 @@ TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-repeat0 group ; +TUPLE: ebnf-optional elements ; TUPLE: ebnf-rule symbol elements ; +TUPLE: ebnf-action word ; TUPLE: ebnf rules ; C: ebnf-non-terminal @@ -16,58 +19,82 @@ C: ebnf-terminal C: ebnf-choice C: ebnf-sequence C: ebnf-repeat0 +C: ebnf-optional C: ebnf-rule +C: ebnf-action C: ebnf -GENERIC: ebnf-compile ( ast -- quot ) +SYMBOL: parsers +SYMBOL: non-terminals +SYMBOL: last-parser -M: ebnf-terminal ebnf-compile ( ast -- quot ) - [ - ebnf-terminal-symbol , \ token , - ] [ ] make ; +: reset-parser-generation ( -- ) + V{ } clone parsers set + H{ } clone non-terminals set + f last-parser set ; -M: ebnf-non-terminal ebnf-compile ( ast -- quot ) - [ - ebnf-non-terminal-symbol , \ in , \ get , \ lookup , \ execute , - ] [ ] make ; +: store-parser ( parser -- number ) + parsers get [ push ] keep length 1- ; -M: ebnf-choice ebnf-compile ( ast -- quot ) - [ - [ - ebnf-choice-options [ - ebnf-compile , - ] each - ] { } make , - [ call ] , \ map , \ choice , - ] [ ] make ; +: get-parser ( index -- parser ) + parsers get nth ; + +: non-terminal-index ( name -- number ) + dup non-terminals get at [ + nip + ] [ + f store-parser [ swap non-terminals get set-at ] keep + ] if* ; -M: ebnf-sequence ebnf-compile ( ast -- quot ) - [ - [ - ebnf-sequence-elements [ - ebnf-compile , - ] each - ] { } make , - [ call ] , \ map , \ seq , - ] [ ] make ; +GENERIC: (generate-parser) ( ast -- id ) -M: ebnf-repeat0 ebnf-compile ( ast -- quot ) - [ - ebnf-repeat0-group ebnf-compile % \ repeat0 , - ] [ ] make ; +: generate-parser ( ast -- id ) + (generate-parser) dup last-parser set ; -M: ebnf-rule ebnf-compile ( ast -- quot ) - [ - dup ebnf-rule-symbol , \ in , \ get , \ create , - ebnf-rule-elements ebnf-compile , \ define-compound , - ] [ ] make ; +M: ebnf-terminal (generate-parser) ( ast -- id ) + ebnf-terminal-symbol token sp store-parser ; -M: ebnf ebnf-compile ( ast -- quot ) +M: ebnf-non-terminal (generate-parser) ( ast -- id ) [ - ebnf-rules [ - ebnf-compile % - ] each - ] [ ] make ; + ebnf-non-terminal-symbol dup non-terminal-index , + parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , + ] [ ] make delay sp store-parser ; + +M: ebnf-choice (generate-parser) ( ast -- id ) + ebnf-choice-options [ + generate-parser get-parser + ] map choice store-parser ; + +M: ebnf-sequence (generate-parser) ( ast -- id ) + ebnf-sequence-elements [ + generate-parser get-parser + ] map seq store-parser ; + +M: ebnf-repeat0 (generate-parser) ( ast -- id ) + ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; + +M: ebnf-optional (generate-parser) ( ast -- id ) + ebnf-optional-elements generate-parser get-parser optional store-parser ; + +M: ebnf-rule (generate-parser) ( ast -- id ) + dup ebnf-rule-symbol non-terminal-index swap + ebnf-rule-elements generate-parser get-parser ! nt-id body + swap [ parsers get set-nth ] keep ; + +M: ebnf-action (generate-parser) ( ast -- id ) + ebnf-action-word search 1quotation + last-parser get get-parser swap action store-parser ; + +M: vector (generate-parser) ( ast -- id ) + [ generate-parser ] map peek ; + +M: f (generate-parser) ( ast -- id ) + drop last-parser get ; + +M: ebnf (generate-parser) ( ast -- id ) + ebnf-rules [ + generate-parser + ] map peek ; DEFER: 'rhs' @@ -75,32 +102,55 @@ DEFER: 'rhs' CHAR: a CHAR: z range repeat1 [ >string ] action ; : 'terminal' ( -- parser ) - "\"" token hide [ CHAR: " = not ] satisfy repeat1 "\"" token hide 3array seq [ first >string ] action ; + "'" token hide [ CHAR: ' = not ] satisfy repeat1 "'" token hide 3array seq [ first >string ] action ; : 'element' ( -- parser ) 'non-terminal' 'terminal' 2array choice ; -: 'sequence' ( -- parser ) - 'element' sp - "|" token sp ensure-not 2array seq [ first ] action - repeat1 [ ] action ; - -: 'choice' ( -- parser ) - 'element' sp "|" token sp list-of [ ] action ; +DEFER: 'choice' + +: 'group' ( -- parser ) + "(" token sp hide + [ 'choice' sp ] delay + ")" token sp hide + 3array seq [ first ] action ; : 'repeat0' ( -- parser ) "{" token sp hide - [ 'rhs' sp ] delay + [ 'choice' sp ] delay "}" token sp hide 3array seq [ first ] action ; -: 'rhs' ( -- parser ) - 'repeat0' - 'sequence' - 'choice' - 'element' - 4array choice ; +: 'optional' ( -- parser ) + "[" token sp hide + [ 'choice' sp ] delay + "]" token sp hide + 3array seq [ first ] action ; + +: 'sequence' ( -- parser ) + [ + 'element' sp , + 'group' sp , + 'repeat0' sp , + 'optional' sp , + ] { } make choice + repeat1 [ + dup length 1 = [ first ] [ ] if + ] action ; + +: 'choice' ( -- parser ) + 'sequence' sp "|" token sp list-of [ + dup length 1 = [ first ] [ ] if + ] action ; + +: 'action' ( -- parser ) + "=>" token hide + [ blank? ] satisfy ensure-not [ drop t ] satisfy 2array seq [ first ] action repeat1 [ >string ] action sp + 2array seq [ first ] action ; +: 'rhs' ( -- parser ) + 'choice' 'action' sp optional 2array seq ; + : 'rule' ( -- parser ) 'non-terminal' [ ebnf-non-terminal-symbol ] action "=" token sp hide @@ -112,9 +162,23 @@ DEFER: 'rhs' : ebnf>quot ( string -- quot ) 'ebnf' parse [ - parse-result-ast ebnf-compile + parse-result-ast [ + reset-parser-generation + generate-parser drop + [ + non-terminals get + [ + get-parser [ + swap , \ in , \ get , \ create , + 1quotation , \ define-compound , + ] [ + drop + ] if* + ] assoc-each + ] [ ] make + ] with-scope ] [ f ] if* ; -: " parse-tokens "" join ebnf>quot call ; parsing \ No newline at end of file +: " parse-tokens " " join ebnf>quot call ; parsing \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index a9e08f6024..8940fc87c6 100644 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,14 +1,16 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences strings namespaces math assocs shuffle vectors combinators.lib ; +USING: kernel sequences strings namespaces math assocs shuffle + vectors arrays combinators.lib ; IN: peg TUPLE: parse-result remaining ast ; -GENERIC: parse ( state parser -- result ) +GENERIC: (parse) ( state parser -- result ) ( remaining ast -- parse-result ) @@ -24,18 +26,48 @@ TUPLE: parser id ; : init-parser ( parser -- parser ) get-next-id parser construct-boa over set-delegate ; +: from ( slice-or-string -- index ) + dup slice? [ slice-from ] [ drop 0 ] if ; + +: get-cached ( input parser -- result ) + [ from ] dip parser-id packrat-cache get at at ; + +: put-cached ( result input parser -- ) + parser-id dup packrat-cache get at [ + nip + ] [ + H{ } clone dup >r swap packrat-cache get set-at r> + ] if* + [ from ] dip set-at ; + +PRIVATE> + +: parse ( input parser -- result ) + packrat-cache get [ + 2dup get-cached [ + [ (parse) dup ] 2keep put-cached + ] unless* + ] [ + (parse) + ] if ; + +: packrat-parse ( input parser -- result ) + H{ } clone packrat-cache [ parse ] with-variable ; + +r length tail-slice r> ] [ 2drop f ] if ; - + TUPLE: satisfy-parser quot ; -M: satisfy-parser parse ( state parser -- result ) +M: satisfy-parser (parse) ( state parser -- result ) over empty? [ 2drop f ] [ @@ -48,7 +80,7 @@ M: satisfy-parser parse ( state parser -- result ) TUPLE: range-parser min max ; -M: range-parser parse ( state parser -- result ) +M: range-parser (parse) ( state parser -- result ) over empty? [ 2drop f ] [ @@ -77,7 +109,7 @@ TUPLE: seq-parser parsers ; drop ] if ; -M: seq-parser parse ( state parser -- result ) +M: seq-parser (parse) ( state parser -- result ) seq-parser-parsers [ V{ } clone ] dip (seq-parser) ; TUPLE: choice-parser parsers ; @@ -93,7 +125,7 @@ TUPLE: choice-parser parsers ; ] if* ] if ; -M: choice-parser parse ( state parser -- result ) +M: choice-parser (parse) ( state parser -- result ) choice-parser-parsers (choice-parser) ; TUPLE: repeat0-parser p1 ; @@ -111,7 +143,7 @@ TUPLE: repeat0-parser p1 ; { parse-result-remaining parse-result-ast } get-slots 1vector ; -M: repeat0-parser parse ( state parser -- result ) +M: repeat0-parser (parse) ( state parser -- result ) repeat0-parser-p1 2dup parse [ nipd clone-result (repeat-parser) ] [ @@ -120,17 +152,17 @@ M: repeat0-parser parse ( state parser -- result ) TUPLE: repeat1-parser p1 ; -M: repeat1-parser parse ( state parser -- result ) +M: repeat1-parser (parse) ( state parser -- result ) repeat1-parser-p1 tuck parse dup [ clone-result (repeat-parser) ] [ nip ] if ; TUPLE: optional-parser p1 ; -M: optional-parser parse ( state parser -- result ) +M: optional-parser (parse) ( state parser -- result ) dupd optional-parser-p1 parse swap f or ; TUPLE: ensure-parser p1 ; -M: ensure-parser parse ( state parser -- result ) +M: ensure-parser (parse) ( state parser -- result ) dupd ensure-parser-p1 parse [ ignore ] [ @@ -139,7 +171,7 @@ M: ensure-parser parse ( state parser -- result ) TUPLE: ensure-not-parser p1 ; -M: ensure-not-parser parse ( state parser -- result ) +M: ensure-not-parser (parse) ( state parser -- result ) dupd ensure-not-parser-p1 parse [ drop f ] [ @@ -148,7 +180,7 @@ M: ensure-not-parser parse ( state parser -- result ) TUPLE: action-parser p1 quot ; -M: action-parser parse ( state parser -- result ) +M: action-parser (parse) ( state parser -- result ) tuck action-parser-p1 parse dup [ dup parse-result-ast rot action-parser-quot call swap [ set-parse-result-ast ] keep @@ -165,12 +197,12 @@ M: action-parser parse ( state parser -- result ) TUPLE: sp-parser p1 ; -M: sp-parser parse ( state parser -- result ) +M: sp-parser (parse) ( state parser -- result ) [ left-trim-slice ] dip sp-parser-p1 parse ; TUPLE: delay-parser quot ; -M: delay-parser parse ( state parser -- result ) +M: delay-parser (parse) ( state parser -- result ) delay-parser-quot call parse ; PRIVATE> diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index e40c984660..cec7b24cd0 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -5,9 +5,9 @@ USING: kernel tools.test peg peg.pl0 ; IN: temporary { "abc" } [ - "abc" 'ident' parse parse-result-ast + "abc" ident parse parse-result-ast ] unit-test { 55 } [ - "55abc" 'number' parse parse-result-ast + "55abc" number parse parse-result-ast ] unit-test diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 8a01057bfb..b37009238d 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,58 +1,29 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel arrays strings math.parser sequences peg ; +USING: kernel arrays strings math.parser sequences peg peg.ebnf ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 - -: 'ident' ( -- parser ) +: ident ( -- parser ) CHAR: a CHAR: z range CHAR: A CHAR: Z range 2array choice repeat1 [ >string ] action ; -: 'number' ( -- parser ) +: number ( -- parser ) CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; -DEFER: 'factor' - -: 'term' ( -- parser ) - 'factor' "*" token "/" token 2array choice sp 'factor' sp 2array seq repeat0 2array seq ; - -: 'expression' ( -- parser ) - [ "+" token "-" token 2array choice sp optional 'term' sp 2dup 2array seq repeat0 3array seq ] delay ; - -: 'factor' ( -- parser ) - 'ident' 'number' "(" token hide 'expression' sp ")" token sp hide 3array seq 3array choice ; - -: 'condition' ( -- parser ) - "odd" token 'expression' sp 2array seq - 'expression' { "=" "#" "<=" "<" ">=" ">" } [ token ] map choice sp 'expression' sp 3array seq - 2array choice ; - -: 'statement' ( -- parser ) - [ - 'ident' ":=" token sp 'expression' sp 3array seq - "call" token 'ident' sp 2array seq - "begin" token 'statement' sp ";" token sp 'statement' sp 2array seq repeat0 "end" token sp 4array seq - "if" token 'condition' sp "then" token sp 'statement' sp 4array seq - 4array choice - "while" token 'condition' sp "do" token sp 'statement' sp 4array seq - 2array choice optional - ] delay ; - -: 'block' ( -- parser ) - [ - "const" token 'ident' sp "=" token sp 'number' sp 4array seq - "," token sp 'ident' sp "=" token sp 'number' sp 4array seq repeat0 - ";" token sp 3array seq optional - - "var" token 'ident' sp "," token sp 'ident' sp 2array seq repeat0 - ";" token sp 4array seq optional - - "procedure" token 'ident' sp ";" token sp 'block' sp 4array seq ";" token sp 2array seq repeat0 'statement' sp 2array seq - - 3array seq - ] delay ; - -: 'program' ( -- parser ) - 'block' "." token sp 2array seq ; +=' | '>') expression . +expression = ['+' | '-'] term {('+' | '-') term } . +term = factor {('*' | '/') factor } . +factor = ident | number | '(' expression ')' +EBNF>