diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 8846a9c94c..54639431a4 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf compiler.units ; +USING: kernel tools.test peg peg.ebnf ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -109,13 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "ab" [EBNF foo='a' 'b' EBNF] call parse-result-ast ] unit-test { V{ 1 "b" } } [ - "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call parse-result-ast +] unit-test + +{ CHAR: A } [ + "A" [EBNF foo=[A-Z] EBNF] call parse-result-ast +] unit-test + +{ CHAR: Z } [ + "Z" [EBNF foo=[A-Z] EBNF] call parse-result-ast +] unit-test + +{ f } [ + "0" [EBNF foo=[A-Z] EBNF] call +] unit-test + +{ CHAR: 0 } [ + "0" [EBNF foo=[^A-Z] EBNF] call parse-result-ast +] unit-test + +{ f } [ + "A" [EBNF foo=[^A-Z] EBNF] call +] unit-test + +{ f } [ + "Z" [EBNF foo=[^A-Z] EBNF] call ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e2c2dd5006..ab7baa547e 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel parser words arrays strings math.parser sequences +USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib splitting ; @@ -9,6 +9,8 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-any-character ; +TUPLE: ebnf-range pattern ; +TUPLE: ebnf-ensure group ; TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; @@ -22,6 +24,8 @@ TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-terminal> ebnf-terminal C: <ebnf-any-character> ebnf-any-character +C: <ebnf-range> ebnf-range +C: <ebnf-ensure> ebnf-ensure C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence @@ -32,84 +36,6 @@ C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action C: <ebnf> ebnf -SYMBOL: parsers -SYMBOL: non-terminals - -: reset-parser-generation ( -- ) - V{ } clone parsers set - H{ } clone non-terminals set ; - -: store-parser ( parser -- number ) - parsers get [ push ] keep length 1- ; - -: get-parser ( index -- parser ) - parsers get nth ; - -: non-terminal-index ( name -- number ) - dup non-terminals get at [ - nip - ] [ - f store-parser [ swap non-terminals get set-at ] keep - ] if* ; - -GENERIC: (generate-parser) ( ast -- id ) - -: generate-parser ( ast -- id ) - (generate-parser) ; - -M: ebnf-terminal (generate-parser) ( ast -- id ) - ebnf-terminal-symbol token sp store-parser ; - -M: ebnf-non-terminal (generate-parser) ( ast -- id ) - [ - ebnf-non-terminal-symbol dup non-terminal-index , - parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , - ] [ ] make delay sp store-parser ; - -M: ebnf-any-character (generate-parser) ( ast -- id ) - drop [ drop t ] satisfy store-parser ; - -M: ebnf-choice (generate-parser) ( ast -- id ) - ebnf-choice-options [ - generate-parser get-parser - ] map choice store-parser ; - -M: ebnf-sequence (generate-parser) ( ast -- id ) - ebnf-sequence-elements [ - generate-parser get-parser - ] map seq store-parser ; - -M: ebnf-ensure-not (generate-parser) ( ast -- id ) - ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; - -M: ebnf-repeat0 (generate-parser) ( ast -- id ) - ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; - -M: ebnf-repeat1 (generate-parser) ( ast -- id ) - ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; - -M: ebnf-optional (generate-parser) ( ast -- id ) - ebnf-optional-elements generate-parser get-parser optional store-parser ; - -M: ebnf-rule (generate-parser) ( ast -- id ) - dup ebnf-rule-symbol non-terminal-index swap - ebnf-rule-elements generate-parser get-parser ! nt-id body - swap [ parsers get set-nth ] keep ; - -M: ebnf-action (generate-parser) ( ast -- id ) - [ ebnf-action-parser generate-parser get-parser ] keep - ebnf-action-code string-lines parse-lines action store-parser ; - -M: vector (generate-parser) ( ast -- id ) - [ generate-parser ] map peek ; - -M: ebnf (generate-parser) ( ast -- id ) - ebnf-rules [ - generate-parser - ] map peek ; - -DEFER: 'rhs' - : syntax ( string -- parser ) #! Parses the string, ignoring white space, and #! does not put the result in the AST. @@ -149,6 +75,7 @@ DEFER: 'rhs' [ dup CHAR: [ = ] [ dup CHAR: . = ] [ dup CHAR: ! = ] + [ dup CHAR: & = ] [ dup CHAR: * = ] [ dup CHAR: + = ] [ dup CHAR: ? = ] @@ -163,6 +90,14 @@ DEFER: 'rhs' : 'any-character' ( -- parser ) #! A parser to match the symbol for any character match. [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ; + +: 'range-parser' ( -- parser ) + #! Match the syntax for declaring character ranges + [ + [ "[" syntax , "[" token ensure-not , ] seq* hide , + [ CHAR: ] = not ] satisfy repeat1 , + "]" syntax , + ] seq* [ first >string <ebnf-range> ] action ; : 'element' ( -- parser ) #! An element of a rule. It can be a terminal or a @@ -173,6 +108,7 @@ DEFER: 'rhs' [ 'non-terminal' , 'terminal' , + 'range-parser' , 'any-character' , ] choice* , "=" syntax ensure-not , @@ -194,7 +130,6 @@ DEFER: 'choice' "*" token sp ensure-not , "+" token sp ensure-not , "?" token sp ensure-not , - "[[" token sp ensure-not , ] seq* hide grouped ; : 'repeat0' ( -- parser ) @@ -212,13 +147,6 @@ DEFER: 'choice' [ drop t ] satisfy , ] seq* [ first ] action repeat0 [ >string ] action ; -: 'action' ( -- parser ) - [ - "(" [ 'choice' sp ] delay ")" syntax-pack , - "[[" 'factor-code' "]]" syntax-pack , - ] seq* [ first2 <ebnf-action> ] action ; - - : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that #! something that matches the following elements do @@ -228,21 +156,41 @@ DEFER: 'choice' 'group' sp , ] seq* [ first <ebnf-ensure-not> ] action ; -: 'sequence' ( -- parser ) +: 'ensure' ( -- parser ) + #! Parses the '&' syntax to ensure that + #! something that matches the following elements does + #! exist in the parse stream. + [ + "&" syntax , + 'group' sp , + ] seq* [ first <ebnf-ensure> ] action ; + +: ('sequence') ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ 'ensure-not' sp , + 'ensure' sp , 'element' sp , 'group' sp , 'repeat0' sp , 'repeat1' sp , 'optional' sp , - 'action' sp , + ] choice* ; + +: 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. + [ + [ + ('sequence') , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 <ebnf-action> ] action , + ('sequence') , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if - ] action ; - + ] action ; + : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ <ebnf-choice> ] if @@ -258,25 +206,84 @@ DEFER: 'choice' : 'ebnf' ( -- parser ) 'rule' sp repeat1 [ <ebnf> ] action ; -: ebnf>quot ( string -- quot ) - 'ebnf' parse [ - parse-result-ast [ - reset-parser-generation - generate-parser drop - [ - non-terminals get - [ - get-parser [ - swap , \ in , \ get , \ create , - 1quotation , \ define , - ] [ - drop - ] if* - ] assoc-each - ] [ ] make - ] with-scope - ] [ - f - ] if* ; +GENERIC: (transform) ( ast -- parser ) + +SYMBOL: parser +SYMBOL: main + +: transform ( ast -- object ) + H{ } clone dup dup [ parser set swap (transform) main set ] bind ; + +M: ebnf (transform) ( ast -- parser ) + ebnf-rules [ (transform) ] map peek ; + +M: ebnf-rule (transform) ( ast -- parser ) + dup ebnf-rule-elements (transform) [ + swap ebnf-rule-symbol set + ] keep ; + +M: ebnf-sequence (transform) ( ast -- parser ) + ebnf-sequence-elements [ (transform) ] map seq ; + +M: ebnf-choice (transform) ( ast -- parser ) + ebnf-choice-options [ (transform) ] map choice ; + +M: ebnf-any-character (transform) ( ast -- parser ) + drop any-char ; + +M: ebnf-range (transform) ( ast -- parser ) + ebnf-range-pattern range-pattern ; + +M: ebnf-ensure (transform) ( ast -- parser ) + ebnf-ensure-group (transform) ensure ; + +M: ebnf-ensure-not (transform) ( ast -- parser ) + ebnf-ensure-not-group (transform) ensure-not ; + +M: ebnf-repeat0 (transform) ( ast -- parser ) + ebnf-repeat0-group (transform) repeat0 ; + +M: ebnf-repeat1 (transform) ( ast -- parser ) + ebnf-repeat1-group (transform) repeat1 ; + +M: ebnf-optional (transform) ( ast -- parser ) + ebnf-optional-elements (transform) optional ; + +M: ebnf-action (transform) ( ast -- parser ) + [ ebnf-action-parser (transform) ] keep + ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ; + +M: ebnf-terminal (transform) ( ast -- parser ) + ebnf-terminal-symbol token sp ; + +M: ebnf-non-terminal (transform) ( ast -- parser ) + ebnf-non-terminal-symbol [ + , parser get , \ at , + ] [ ] make delay sp ; + +: transform-ebnf ( string -- object ) + 'ebnf' parse parse-result-ast transform ; + +: check-parse-result ( result -- result ) + dup [ + dup parse-result-remaining empty? [ + [ + "Unable to fully parse EBNF. Left to parse was: " % + parse-result-remaining % + ] "" make throw + ] unless + ] [ + "Could not parse EBNF" throw + ] if ; + +: ebnf>quot ( string -- hashtable quot ) + 'ebnf' parse check-parse-result + parse-result-ast transform dup main swap at compile ; + +: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing + +: EBNF: + CREATE-WORD dup + ";EBNF" parse-multiline-string + ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing -: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor new file mode 100644 index 0000000000..b6f3163bf4 --- /dev/null +++ b/extra/peg/expr/expr-tests.factor @@ -0,0 +1,25 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg peg.expr multiline sequences ; +IN: peg.expr.tests + +{ 5 } [ + "2+3" eval-expr +] unit-test + +{ 6 } [ + "2*3" eval-expr +] unit-test + +{ 14 } [ + "2+3*4" eval-expr +] unit-test + +{ 17 } [ + "2+3*4+3" eval-expr +] unit-test + +{ 23 } [ + "2+3*(4+3)" eval-expr +] unit-test diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index ed13ac0e50..6b690cb5ee 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -9,22 +9,21 @@ IN: peg.expr #! { operator rhs } in to a tree structure of the correct precedence. swap [ first2 swap call ] reduce ; -<EBNF +EBNF: expr +times = "*" [[ drop [ * ] ]] +divide = "/" [[ drop [ / ] ]] +add = "+" [[ drop [ + ] ]] +subtract = "-" [[ drop [ - ] ]] -times = ("*") [[ drop [ * ] ]] -divide = ("/") [[ drop [ / ] ]] -add = ("+") [[ drop [ + ] ]] -subtract = ("-") [[ drop [ - ] ]] - -digit = "0" | "1" | "2" | "3" | "4" | - "5" | "6" | "7" | "8" | "9" -number = ((digit)+) [[ concat string>number ]] +digit = [0-9] [[ digit> ]] +number = (digit)+ [[ unclip [ swap 10 * + ] reduce ]] value = number | ("(" expr ")") [[ second ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]] sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] expr = sum -EBNF> +;EBNF : eval-expr ( string -- number ) - expr parse parse-result-ast ; \ No newline at end of file + expr parse-result-ast ; + diff --git a/extra/peg/parsers/parsers-docs.factor b/extra/peg/parsers/parsers-docs.factor index 1991cba0eb..d49f1158dd 100755 --- a/extra/peg/parsers/parsers-docs.factor +++ b/extra/peg/parsers/parsers-docs.factor @@ -159,3 +159,21 @@ HELP: 'string' } { $description "Returns a parser that matches an string composed of a \", anything that is not \", and another \"." } { $see-also 'integer' } ; + +HELP: range-pattern +{ $values + { "pattern" "a string" } + { "parser" "a parser" } +} { $description +"Returns a parser that matches a single character based on the set " +"of characters in the pattern string." +"Any single character in the pattern matches that character. " +"If the pattern begins with a ^ then the set is negated " +"(the element matches any character not in the set). Any pair " +"of characters separated with a dash (-) represents the " +"range of characters from the first to the second, inclusive." +{ $examples + { $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" } + { $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" } +} +} ; diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 87306e1469..3ccb1e7d10 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2007, 2008 Chris Double, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle - vectors arrays combinators.lib memoize math.parser match - unicode.categories sequences.deep peg peg.private ; + vectors arrays combinators.lib math.parser match + unicode.categories sequences.deep peg peg.private + peg.search math.ranges ; IN: peg.parsers TUPLE: just-parser p1 ; @@ -18,26 +19,26 @@ TUPLE: just-parser p1 ; M: just-parser compile ( parser -- quot ) just-parser-p1 compile just-pattern append ; -MEMO: just ( parser -- parser ) - just-parser construct-boa init-parser ; +: just ( parser -- parser ) + just-parser construct-boa ; -MEMO: 1token ( ch -- parser ) 1string token ; +: 1token ( ch -- parser ) 1string token ; <PRIVATE -MEMO: (list-of) ( items separator repeat1? -- parser ) +: (list-of) ( items separator repeat1? -- parser ) >r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq [ unclip 1vector swap first append ] action ; PRIVATE> -MEMO: list-of ( items separator -- parser ) +: list-of ( items separator -- parser ) hide f (list-of) ; -MEMO: list-of-many ( items separator -- parser ) +: list-of-many ( items separator -- parser ) hide t (list-of) ; -MEMO: epsilon ( -- parser ) V{ } token ; +: epsilon ( -- parser ) V{ } token ; -MEMO: any-char ( -- parser ) [ drop t ] satisfy ; +: any-char ( -- parser ) [ drop t ] satisfy ; <PRIVATE @@ -46,10 +47,10 @@ MEMO: any-char ( -- parser ) [ drop t ] satisfy ; PRIVATE> -MEMO: exactly-n ( parser n -- parser' ) +: exactly-n ( parser n -- parser' ) swap <repetition> seq ; -MEMO: at-most-n ( parser n -- parser' ) +: at-most-n ( parser n -- parser' ) dup zero? [ 2drop epsilon ] [ @@ -57,29 +58,56 @@ MEMO: at-most-n ( parser n -- parser' ) -rot 1- at-most-n 2choice ] if ; -MEMO: at-least-n ( parser n -- parser' ) +: at-least-n ( parser n -- parser' ) dupd exactly-n swap repeat0 2seq [ flatten-vectors ] action ; -MEMO: from-m-to-n ( parser m n -- parser' ) +: from-m-to-n ( parser m n -- parser' ) >r [ exactly-n ] 2keep r> swap - at-most-n 2seq [ flatten-vectors ] action ; -MEMO: pack ( begin body end -- parser ) +: pack ( begin body end -- parser ) >r >r hide r> r> hide 3seq [ first ] action ; -MEMO: surrounded-by ( parser begin end -- parser' ) +: surrounded-by ( parser begin end -- parser' ) [ token ] 2apply swapd pack ; -MEMO: 'digit' ( -- parser ) +: 'digit' ( -- parser ) [ digit? ] satisfy [ digit> ] action ; -MEMO: 'integer' ( -- parser ) +: 'integer' ( -- parser ) 'digit' repeat1 [ 10 digits>integer ] action ; -MEMO: 'string' ( -- parser ) +: 'string' ( -- parser ) [ [ CHAR: " = ] satisfy hide , [ CHAR: " = not ] satisfy repeat0 , [ CHAR: " = ] satisfy hide , ] { } make seq [ first >string ] action ; + +: (range-pattern) ( pattern -- string ) + #! Given a range pattern, produce a string containing + #! all characters within that range. + [ + any-char , + [ CHAR: - = ] satisfy hide , + any-char , + ] seq* [ + first2 [a,b] >string + ] action + replace ; + +: range-pattern ( pattern -- parser ) + #! 'pattern' is a set of characters describing the + #! parser to be produced. Any single character in + #! the pattern matches that character. If the pattern + #! begins with a ^ then the set is negated (the element + #! matches any character not in the set). Any pair of + #! characters separated with a dash (-) represents the + #! range of characters from the first to the second, + #! inclusive. + dup first CHAR: ^ = [ + 1 tail (range-pattern) [ member? not ] curry satisfy + ] [ + (range-pattern) [ member? ] curry satisfy + ] if ; diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 7a1ce99883..89cc243863 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -4,10 +4,6 @@ USING: kernel tools.test strings namespaces arrays sequences peg peg.private ; IN: peg.tests -{ 0 1 2 } [ - 0 next-id set-global get-next-id get-next-id get-next-id -] unit-test - { f } [ "endbegin" "begin" token parse ] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 16cf40f884..b3200ec5eb 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle - vectors arrays combinators.lib memoize math.parser match + vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser words ; IN: peg @@ -10,70 +10,14 @@ TUPLE: parse-result remaining ast ; GENERIC: compile ( parser -- quot ) -: (parse) ( state parser -- result ) +: parse ( state parser -- result ) compile call ; - -<PRIVATE - -SYMBOL: packrat-cache SYMBOL: ignore -SYMBOL: not-in-cache - -: not-in-cache? ( result -- ? ) - not-in-cache = ; : <parse-result> ( remaining ast -- parse-result ) parse-result construct-boa ; -SYMBOL: next-id - -: get-next-id ( -- number ) - next-id get-global 0 or dup 1+ next-id set-global ; - -TUPLE: parser id ; - -: init-parser ( parser -- parser ) - get-next-id parser construct-boa over set-delegate ; - -: from ( slice-or-string -- index ) - dup slice? [ slice-from ] [ drop 0 ] if ; - -: get-cached ( input parser -- result ) - [ from ] dip parser-id packrat-cache get at at* [ - drop not-in-cache - ] unless ; - -: put-cached ( result input parser -- ) - parser-id dup packrat-cache get at [ - nip - ] [ - H{ } clone dup >r swap packrat-cache get set-at r> - ] if* - [ from ] dip set-at ; - -PRIVATE> - -: parse ( input parser -- result ) - packrat-cache get [ - 2dup get-cached dup not-in-cache? [ -! "cache missed: " write over parser-id number>string write " - " write nl ! pick . - drop - #! Protect against left recursion blowing the callstack - #! by storing a failed parse in the cache. - [ f ] dipd [ put-cached ] 2keep - [ (parse) dup ] 2keep put-cached - ] [ -! "cache hit: " write over parser-id number>string write " - " write nl ! pick . - 2nip - ] if - ] [ - (parse) - ] if ; - -: packrat-parse ( input parser -- result ) - H{ } clone packrat-cache [ parse ] with-variable ; - <PRIVATE TUPLE: token-parser symbol ; @@ -295,17 +239,17 @@ M: delay-parser compile ( parser -- quot ) PRIVATE> -MEMO: token ( string -- parser ) - token-parser construct-boa init-parser ; +: token ( string -- parser ) + token-parser construct-boa ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa init-parser ; + satisfy-parser construct-boa ; -MEMO: range ( min max -- parser ) - range-parser construct-boa init-parser ; +: range ( min max -- parser ) + range-parser construct-boa ; : seq ( seq -- parser ) - seq-parser construct-boa init-parser ; + seq-parser construct-boa ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -320,7 +264,7 @@ MEMO: range ( min max -- parser ) { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa init-parser ; + choice-parser construct-boa ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -334,32 +278,32 @@ MEMO: range ( min max -- parser ) : choice* ( quot -- paser ) { } make choice ; inline -MEMO: repeat0 ( parser -- parser ) - repeat0-parser construct-boa init-parser ; +: repeat0 ( parser -- parser ) + repeat0-parser construct-boa ; -MEMO: repeat1 ( parser -- parser ) - repeat1-parser construct-boa init-parser ; +: repeat1 ( parser -- parser ) + repeat1-parser construct-boa ; -MEMO: optional ( parser -- parser ) - optional-parser construct-boa init-parser ; +: optional ( parser -- parser ) + optional-parser construct-boa ; -MEMO: ensure ( parser -- parser ) - ensure-parser construct-boa init-parser ; +: ensure ( parser -- parser ) + ensure-parser construct-boa ; -MEMO: ensure-not ( parser -- parser ) - ensure-not-parser construct-boa init-parser ; +: ensure-not ( parser -- parser ) + ensure-not-parser construct-boa ; : action ( parser quot -- parser ) - action-parser construct-boa init-parser ; + action-parser construct-boa ; -MEMO: sp ( parser -- parser ) - sp-parser construct-boa init-parser ; +: sp ( parser -- parser ) + sp-parser construct-boa ; -MEMO: hide ( parser -- parser ) +: hide ( parser -- parser ) [ drop ignore ] action ; -MEMO: delay ( quot -- parser ) - delay-parser construct-boa init-parser ; +: delay ( quot -- parser ) + delay-parser construct-boa ; : PEG: (:) [ diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index bf321d54e9..b3d2135da7 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -4,14 +4,6 @@ USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests -{ "abc" } [ - "abc" ident parse parse-result-ast -] unit-test - -{ 55 } [ - "55abc" number parse parse-result-ast -] unit-test - { t } [ <" VAR x, squ; @@ -29,7 +21,7 @@ BEGIN x := x + 1; END END. -"> program parse parse-result-remaining empty? +"> pl0 parse-result-remaining empty? ] unit-test { f } [ @@ -95,5 +87,5 @@ BEGIN y := 36; CALL gcd; END. - "> program parse parse-result-remaining empty? + "> pl0 parse-result-remaining empty? ] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 1ef7a23b41..f7eb3cad23 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,31 +1,26 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize namespaces ; +peg peg.ebnf peg.parsers memoize namespaces math ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 -MEMO: ident ( -- parser ) - [ - CHAR: a CHAR: z range , - CHAR: A CHAR: Z range , - ] choice* repeat1 [ >string ] action ; -MEMO: number ( -- parser ) - CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; - -<EBNF -program = block "." -block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] - [ "VAR" ident { "," ident } ";" ] - { "PROCEDURE" ident ";" [ block ";" ] } statement -statement = [ ident ":=" expression | "CALL" ident | - "BEGIN" statement {";" statement } "END" | +EBNF: pl0 +block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )? + ( "VAR" ident ( "," ident )* ";" )? + ( "PROCEDURE" ident ";" ( block ";" )? )* statement +statement = ( ident ":=" expression | "CALL" ident | + "BEGIN" statement (";" statement )* "END" | "IF" condition "THEN" statement | - "WHILE" condition "DO" statement ] + "WHILE" condition "DO" statement )? condition = "ODD" expression | expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression -expression = ["+" | "-"] term {("+" | "-") term } -term = factor {("*" | "/") factor } +expression = ("+" | "-")? term (("+" | "-") term )* +term = factor (("*" | "/") factor )* factor = ident | number | "(" expression ")" -EBNF> +ident = (([a-zA-Z])+) [[ >string ]] +digit = ([0-9]) [[ digit> ]] +number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] +program = block "." +;EBNF