From 264284d0c4dac5d6b70232fc1ff35b1bba0573c8 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 13:40:22 +1300 Subject: [PATCH 01/13] Add range-pattern parser --- extra/peg/parsers/parsers-docs.factor | 18 ++++++++++++++++ extra/peg/parsers/parsers.factor | 30 ++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/extra/peg/parsers/parsers-docs.factor b/extra/peg/parsers/parsers-docs.factor index 1991cba0eb..d49f1158dd 100755 --- a/extra/peg/parsers/parsers-docs.factor +++ b/extra/peg/parsers/parsers-docs.factor @@ -159,3 +159,21 @@ HELP: 'string' } { $description "Returns a parser that matches an string composed of a \", anything that is not \", and another \"." } { $see-also 'integer' } ; + +HELP: range-pattern +{ $values + { "pattern" "a string" } + { "parser" "a parser" } +} { $description +"Returns a parser that matches a single character based on the set " +"of characters in the pattern string." +"Any single character in the pattern matches that character. " +"If the pattern begins with a ^ then the set is negated " +"(the element matches any character not in the set). Any pair " +"of characters separated with a dash (-) represents the " +"range of characters from the first to the second, inclusive." +{ $examples + { $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" } + { $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" } +} +} ; diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 87306e1469..63e9e9a336 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib memoize math.parser match - unicode.categories sequences.deep peg peg.private ; + unicode.categories sequences.deep peg peg.private + peg.search math.ranges ; IN: peg.parsers TUPLE: just-parser p1 ; @@ -83,3 +84,30 @@ MEMO: 'string' ( -- parser ) [ CHAR: " = not ] satisfy repeat0 , [ CHAR: " = ] satisfy hide , ] { } make seq [ first >string ] action ; + +: (range-pattern) ( pattern -- string ) + #! Given a range pattern, produce a string containing + #! all characters within that range. + [ + any-char , + [ CHAR: - = ] satisfy hide , + any-char , + ] seq* [ + first2 [a,b] >string + ] action + replace ; + +MEMO: range-pattern ( pattern -- parser ) + #! 'pattern' is a set of characters describing the + #! parser to be produced. Any single character in + #! the pattern matches that character. If the pattern + #! begins with a ^ then the set is negated (the element + #! matches any character not in the set). Any pair of + #! characters separated with a dash (-) represents the + #! range of characters from the first to the second, + #! inclusive. + dup first CHAR: ^ = [ + 1 tail (range-pattern) [ member? not ] curry satisfy + ] [ + (range-pattern) [ member? ] curry satisfy + ] if ; From 795ef0ae3b0a5031b329c84d555a1c64bfeae758 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 13:55:19 +1300 Subject: [PATCH 02/13] Add ranges to EBNF syntax This works: <EBNF letter = [a-zA-Z] EBNF> and <EBNF not-digit = [^0-9] EBNF> --- extra/peg/ebnf/ebnf-tests.factor | 24 ++++++++++++++++++++++++ extra/peg/ebnf/ebnf.factor | 14 ++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 8846a9c94c..458c68e0d4 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -118,4 +118,28 @@ IN: peg.ebnf.tests { V{ 1 2 } } [ "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast +] unit-test + +{ CHAR: A } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "A" foo parse parse-result-ast +] unit-test + +{ CHAR: Z } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" foo parse parse-result-ast +] unit-test + +{ f } [ + "foo=[A-Z]" ebnf>quot with-compilation-unit "0" foo parse +] unit-test + +{ CHAR: 0 } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" foo parse parse-result-ast +] unit-test + +{ f } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" foo parse +] unit-test + +{ f } [ + "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" foo parse ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index e2c2dd5006..03f36c5f28 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -9,6 +9,7 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-any-character ; +TUPLE: ebnf-range pattern ; TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; @@ -22,6 +23,7 @@ TUPLE: ebnf rules ; C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-terminal> ebnf-terminal C: <ebnf-any-character> ebnf-any-character +C: <ebnf-range> ebnf-range C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence @@ -69,6 +71,9 @@ M: ebnf-non-terminal (generate-parser) ( ast -- id ) M: ebnf-any-character (generate-parser) ( ast -- id ) drop [ drop t ] satisfy store-parser ; +M: ebnf-range (generate-parser) ( ast -- id ) + ebnf-range-pattern range-pattern store-parser ; + M: ebnf-choice (generate-parser) ( ast -- id ) ebnf-choice-options [ generate-parser get-parser @@ -163,6 +168,14 @@ DEFER: 'rhs' : 'any-character' ( -- parser ) #! A parser to match the symbol for any character match. [ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ; + +: 'range-parser' ( -- parser ) + #! Match the syntax for declaring character ranges + [ + "[" syntax , + [ CHAR: ] = not ] satisfy repeat1 , + "]" syntax , + ] seq* [ first >string <ebnf-range> ] action ; : 'element' ( -- parser ) #! An element of a rule. It can be a terminal or a @@ -173,6 +186,7 @@ DEFER: 'rhs' [ 'non-terminal' , 'terminal' , + 'range-parser' , 'any-character' , ] choice* , "=" syntax ensure-not , From ec4f964e4f770f912cc9e1674bd790abcebc7f53 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 14:06:21 +1300 Subject: [PATCH 03/13] Fix pl0 for EBNF syntax changes --- extra/peg/pl0/pl0.factor | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 1ef7a23b41..b30f6bfe70 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -16,16 +16,16 @@ MEMO: number ( -- parser ) <EBNF program = block "." -block = [ "CONST" ident "=" number { "," ident "=" number } ";" ] - [ "VAR" ident { "," ident } ";" ] - { "PROCEDURE" ident ";" [ block ";" ] } statement -statement = [ ident ":=" expression | "CALL" ident | - "BEGIN" statement {";" statement } "END" | +block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )? + ( "VAR" ident ( "," ident )* ";" )? + ( "PROCEDURE" ident ";" ( block ";" )? )* statement +statement = ( ident ":=" expression | "CALL" ident | + "BEGIN" statement (";" statement )* "END" | "IF" condition "THEN" statement | - "WHILE" condition "DO" statement ] + "WHILE" condition "DO" statement )? condition = "ODD" expression | expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression -expression = ["+" | "-"] term {("+" | "-") term } -term = factor {("*" | "/") factor } +expression = ("+" | "-")? term (("+" | "-") term )* +term = factor (("*" | "/") factor )* factor = ident | number | "(" expression ")" EBNF> From 68388fbed90e0765925491d2ccc6ff3354bf7c0b Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 14:15:06 +1300 Subject: [PATCH 04/13] Updated peg.expr to use range-pattern for digits --- extra/peg/expr/expr-tests.factor | 25 +++++++++++++++++++++++++ extra/peg/expr/expr.factor | 5 ++--- 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 extra/peg/expr/expr-tests.factor diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor new file mode 100644 index 0000000000..0ed05765cd --- /dev/null +++ b/extra/peg/expr/expr-tests.factor @@ -0,0 +1,25 @@ +! Copyright (C) 2007 Chris Double. +! See http://factorcode.org/license.txt for BSD license. +! +USING: kernel tools.test peg.expr multiline sequences ; +IN: temporary + +{ 5 } [ + "2+3" eval-expr +] unit-test + +{ 6 } [ + "2*3" eval-expr +] unit-test + +{ 14 } [ + "2+3*4" eval-expr +] unit-test + +{ 17 } [ + "2+3*4+3" eval-expr +] unit-test + +{ 23 } [ + "2+3*(4+3)" eval-expr +] unit-test diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index ed13ac0e50..26ae76c0b0 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -16,9 +16,8 @@ divide = ("/") [[ drop [ / ] ]] add = ("+") [[ drop [ + ] ]] subtract = ("-") [[ drop [ - ] ]] -digit = "0" | "1" | "2" | "3" | "4" | - "5" | "6" | "7" | "8" | "9" -number = ((digit)+) [[ concat string>number ]] +digit = ([0-9]) [[ digit> ]] +number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] value = number | ("(" expr ")") [[ second ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]] From 39c228db6d14ae9229d712abb716489248c3dca8 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 14:22:14 +1300 Subject: [PATCH 05/13] Update peg.pl0 to use range pattern syntax This allows removing the words for ident and number, replacing them with EBNF expressions. --- extra/peg/pl0/pl0.factor | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index b30f6bfe70..34973e6a52 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -1,18 +1,10 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays strings math.parser sequences -peg peg.ebnf peg.parsers memoize namespaces ; +peg peg.ebnf peg.parsers memoize namespaces math ; IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 -MEMO: ident ( -- parser ) - [ - CHAR: a CHAR: z range , - CHAR: A CHAR: Z range , - ] choice* repeat1 [ >string ] action ; - -MEMO: number ( -- parser ) - CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ; <EBNF program = block "." @@ -28,4 +20,7 @@ condition = "ODD" expression | expression = ("+" | "-")? term (("+" | "-") term )* term = factor (("*" | "/") factor )* factor = ident | number | "(" expression ")" +ident = (([a-zA-Z])+) [[ >string ]] +digit = ([0-9]) [[ digit> ]] +number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] EBNF> From c1f69f01beb2c6a183e42bd13b81a40374039baf Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 14:57:12 +1300 Subject: [PATCH 06/13] Change ordering of [[ ... ]] --- extra/peg/ebnf/ebnf-tests.factor | 20 ++++++++++---------- extra/peg/ebnf/ebnf.factor | 28 +++++++++++++++------------- extra/peg/expr/expr-tests.factor | 2 +- extra/peg/expr/expr.factor | 3 ++- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 458c68e0d4..0989e4beb5 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf compiler.units ; +USING: kernel parser words tools.test peg peg.ebnf compiler.units ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -109,37 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { V{ 1 "b" } } [ - "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast + "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast ] unit-test { CHAR: A } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "A" foo parse parse-result-ast + "foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast ] unit-test { CHAR: Z } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" foo parse parse-result-ast + "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast ] unit-test { f } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "0" foo parse + "foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse ] unit-test { CHAR: 0 } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" foo parse parse-result-ast + "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" foo parse + "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" foo parse + "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 03f36c5f28..7d298a709d 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -172,7 +172,7 @@ DEFER: 'rhs' : 'range-parser' ( -- parser ) #! Match the syntax for declaring character ranges [ - "[" syntax , + [ "[" syntax , "[" token ensure-not , ] seq* hide , [ CHAR: ] = not ] satisfy repeat1 , "]" syntax , ] seq* [ first >string <ebnf-range> ] action ; @@ -208,7 +208,6 @@ DEFER: 'choice' "*" token sp ensure-not , "+" token sp ensure-not , "?" token sp ensure-not , - "[[" token sp ensure-not , ] seq* hide grouped ; : 'repeat0' ( -- parser ) @@ -226,13 +225,6 @@ DEFER: 'choice' [ drop t ] satisfy , ] seq* [ first ] action repeat0 [ >string ] action ; -: 'action' ( -- parser ) - [ - "(" [ 'choice' sp ] delay ")" syntax-pack , - "[[" 'factor-code' "]]" syntax-pack , - ] seq* [ first2 <ebnf-action> ] action ; - - : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that #! something that matches the following elements do @@ -242,7 +234,7 @@ DEFER: 'choice' 'group' sp , ] seq* [ first <ebnf-ensure-not> ] action ; -: 'sequence' ( -- parser ) +: ('sequence') ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ @@ -252,11 +244,21 @@ DEFER: 'choice' 'repeat0' sp , 'repeat1' sp , 'optional' sp , - 'action' sp , + ] choice* ; + +: 'sequence' ( -- parser ) + #! A sequence of terminals and non-terminals, including + #! groupings of those. + [ + [ + ('sequence') , + "[[" 'factor-code' "]]" syntax-pack , + ] seq* [ first2 <ebnf-action> ] action , + ('sequence') , ] choice* repeat1 [ dup length 1 = [ first ] [ <ebnf-sequence> ] if - ] action ; - + ] action ; + : 'choice' ( -- parser ) 'sequence' sp "|" token sp list-of [ dup length 1 = [ first ] [ <ebnf-choice> ] if diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor index 0ed05765cd..20da5cd16a 100644 --- a/extra/peg/expr/expr-tests.factor +++ b/extra/peg/expr/expr-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg.expr multiline sequences ; +USING: kernel tools.test peg peg.expr multiline sequences ; IN: temporary { 5 } [ diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index 26ae76c0b0..62ef4ea88f 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -26,4 +26,5 @@ expr = sum EBNF> : eval-expr ( string -- number ) - expr parse parse-result-ast ; \ No newline at end of file + expr parse parse-result-ast ; + From 7dc772db2647ebeb78c74dfa10d98b3963b5a94d Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 15:42:21 +1300 Subject: [PATCH 07/13] Refactor ebnf parser generation --- extra/peg/ebnf/ebnf.factor | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 7d298a709d..c7a007bfc8 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -34,6 +34,55 @@ C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action C: <ebnf> ebnf +GENERIC: (transform) ( ast -- parser ) + +: transform ( ast -- object ) + H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ; + +M: ebnf (transform) ( ast -- parser ) + ebnf-rules [ (transform) ] map peek ; + +M: ebnf-rule (transform) ( ast -- parser ) + dup ebnf-rule-elements (transform) [ + swap ebnf-rule-symbol set + ] keep ; + +M: ebnf-sequence (transform) ( ast -- parser ) + ebnf-sequence-elements [ (transform) ] map seq ; + +M: ebnf-choice (transform) ( ast -- parser ) + ebnf-choice-options [ (transform) ] map choice ; + +M: ebnf-any-character (transform) ( ast -- parser ) + drop any-char ; + +M: ebnf-range (transform) ( ast -- parser ) + ebnf-range-pattern range-pattern ; + +M: ebnf-ensure-not (transform) ( ast -- parser ) + ebnf-ensure-not-group (transform) ensure-not ; + +M: ebnf-repeat0 (transform) ( ast -- parser ) + ebnf-repeat0-group (transform) repeat0 ; + +M: ebnf-repeat1 (transform) ( ast -- parser ) + ebnf-repeat1-group (transform) repeat1 ; + +M: ebnf-optional (transform) ( ast -- parser ) + ebnf-optional-elements (transform) optional ; + +M: ebnf-action (transform) ( ast -- parser ) + [ ebnf-action-parser (transform) ] keep + ebnf-action-code string-lines parse-lines action ; + +M: ebnf-terminal (transform) ( ast -- parser ) + ebnf-terminal-symbol token sp ; + +M: ebnf-non-terminal (transform) ( ast -- parser ) + ebnf-non-terminal-symbol [ + , "parser" get , \ at , + ] [ ] make delay ; + SYMBOL: parsers SYMBOL: non-terminals @@ -295,4 +344,7 @@ DEFER: 'choice' f ] if* ; +: transform-ebnf ( string -- object ) + 'ebnf' parse parse-result-ast transform ; + : <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing From e7980ebc616579df199cef126e11f33d42a243ec Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 17:11:09 +1300 Subject: [PATCH 08/13] More refactoring of EBNF <EBNF .. EBNF> now produces a quotation that when called does the parsing EBNF: foo ... ;EBNF creates a 'foo' word with stack effect (string -- result) when called it parses the string and returns the result. --- extra/peg/ebnf/ebnf-tests.factor | 20 +-- extra/peg/ebnf/ebnf.factor | 227 ++++++++++--------------------- extra/peg/expr/expr.factor | 7 +- extra/peg/pl0/pl0-tests.factor | 12 +- extra/peg/pl0/pl0.factor | 6 +- 5 files changed, 93 insertions(+), 179 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 0989e4beb5..6606fa9ffc 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel parser words tools.test peg peg.ebnf compiler.units ; +USING: kernel tools.test peg peg.ebnf ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -109,37 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "foo='a' 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" <EBNF foo='a' 'b' EBNF> call parse-result-ast ] unit-test { V{ 1 "b" } } [ - "foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" <EBNF foo=('a')[[ drop 1 ]] 'b' EBNF> call parse-result-ast ] unit-test { V{ 1 2 } } [ - "foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" "foo" search execute parse parse-result-ast + "ab" <EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF> call parse-result-ast ] unit-test { CHAR: A } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse parse-result-ast + "A" <EBNF foo=[A-Z] EBNF> call parse-result-ast ] unit-test { CHAR: Z } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse parse-result-ast + "Z" <EBNF foo=[A-Z] EBNF> call parse-result-ast ] unit-test { f } [ - "foo=[A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse + "0" <EBNF foo=[A-Z] EBNF> call ] unit-test { CHAR: 0 } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "0" "foo" search execute parse parse-result-ast + "0" <EBNF foo=[^A-Z] EBNF> call parse-result-ast ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "A" "foo" search execute parse + "A" <EBNF foo=[^A-Z] EBNF> call ] unit-test { f } [ - "foo=[^A-Z]" ebnf>quot with-compilation-unit "Z" "foo" search execute parse + "Z" <EBNF foo=[^A-Z] EBNF> call ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index c7a007bfc8..b9f88f5f24 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel parser words arrays strings math.parser sequences +USING: kernel compiler.units parser words arrays strings math.parser sequences quotations vectors namespaces math assocs continuations peg peg.parsers unicode.categories multiline combinators.lib splitting ; @@ -34,136 +34,6 @@ C: <ebnf-rule> ebnf-rule C: <ebnf-action> ebnf-action C: <ebnf> ebnf -GENERIC: (transform) ( ast -- parser ) - -: transform ( ast -- object ) - H{ } clone dup dup [ "parser" set swap (transform) "main" set ] bind ; - -M: ebnf (transform) ( ast -- parser ) - ebnf-rules [ (transform) ] map peek ; - -M: ebnf-rule (transform) ( ast -- parser ) - dup ebnf-rule-elements (transform) [ - swap ebnf-rule-symbol set - ] keep ; - -M: ebnf-sequence (transform) ( ast -- parser ) - ebnf-sequence-elements [ (transform) ] map seq ; - -M: ebnf-choice (transform) ( ast -- parser ) - ebnf-choice-options [ (transform) ] map choice ; - -M: ebnf-any-character (transform) ( ast -- parser ) - drop any-char ; - -M: ebnf-range (transform) ( ast -- parser ) - ebnf-range-pattern range-pattern ; - -M: ebnf-ensure-not (transform) ( ast -- parser ) - ebnf-ensure-not-group (transform) ensure-not ; - -M: ebnf-repeat0 (transform) ( ast -- parser ) - ebnf-repeat0-group (transform) repeat0 ; - -M: ebnf-repeat1 (transform) ( ast -- parser ) - ebnf-repeat1-group (transform) repeat1 ; - -M: ebnf-optional (transform) ( ast -- parser ) - ebnf-optional-elements (transform) optional ; - -M: ebnf-action (transform) ( ast -- parser ) - [ ebnf-action-parser (transform) ] keep - ebnf-action-code string-lines parse-lines action ; - -M: ebnf-terminal (transform) ( ast -- parser ) - ebnf-terminal-symbol token sp ; - -M: ebnf-non-terminal (transform) ( ast -- parser ) - ebnf-non-terminal-symbol [ - , "parser" get , \ at , - ] [ ] make delay ; - -SYMBOL: parsers -SYMBOL: non-terminals - -: reset-parser-generation ( -- ) - V{ } clone parsers set - H{ } clone non-terminals set ; - -: store-parser ( parser -- number ) - parsers get [ push ] keep length 1- ; - -: get-parser ( index -- parser ) - parsers get nth ; - -: non-terminal-index ( name -- number ) - dup non-terminals get at [ - nip - ] [ - f store-parser [ swap non-terminals get set-at ] keep - ] if* ; - -GENERIC: (generate-parser) ( ast -- id ) - -: generate-parser ( ast -- id ) - (generate-parser) ; - -M: ebnf-terminal (generate-parser) ( ast -- id ) - ebnf-terminal-symbol token sp store-parser ; - -M: ebnf-non-terminal (generate-parser) ( ast -- id ) - [ - ebnf-non-terminal-symbol dup non-terminal-index , - parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or , - ] [ ] make delay sp store-parser ; - -M: ebnf-any-character (generate-parser) ( ast -- id ) - drop [ drop t ] satisfy store-parser ; - -M: ebnf-range (generate-parser) ( ast -- id ) - ebnf-range-pattern range-pattern store-parser ; - -M: ebnf-choice (generate-parser) ( ast -- id ) - ebnf-choice-options [ - generate-parser get-parser - ] map choice store-parser ; - -M: ebnf-sequence (generate-parser) ( ast -- id ) - ebnf-sequence-elements [ - generate-parser get-parser - ] map seq store-parser ; - -M: ebnf-ensure-not (generate-parser) ( ast -- id ) - ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ; - -M: ebnf-repeat0 (generate-parser) ( ast -- id ) - ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ; - -M: ebnf-repeat1 (generate-parser) ( ast -- id ) - ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ; - -M: ebnf-optional (generate-parser) ( ast -- id ) - ebnf-optional-elements generate-parser get-parser optional store-parser ; - -M: ebnf-rule (generate-parser) ( ast -- id ) - dup ebnf-rule-symbol non-terminal-index swap - ebnf-rule-elements generate-parser get-parser ! nt-id body - swap [ parsers get set-nth ] keep ; - -M: ebnf-action (generate-parser) ( ast -- id ) - [ ebnf-action-parser generate-parser get-parser ] keep - ebnf-action-code string-lines parse-lines action store-parser ; - -M: vector (generate-parser) ( ast -- id ) - [ generate-parser ] map peek ; - -M: ebnf (generate-parser) ( ast -- id ) - ebnf-rules [ - generate-parser - ] map peek ; - -DEFER: 'rhs' - : syntax ( string -- parser ) #! Parses the string, ignoring white space, and #! does not put the result in the AST. @@ -323,28 +193,81 @@ DEFER: 'choice' : 'ebnf' ( -- parser ) 'rule' sp repeat1 [ <ebnf> ] action ; -: ebnf>quot ( string -- quot ) - 'ebnf' parse [ - parse-result-ast [ - reset-parser-generation - generate-parser drop - [ - non-terminals get - [ - get-parser [ - swap , \ in , \ get , \ create , - 1quotation , \ define , - ] [ - drop - ] if* - ] assoc-each - ] [ ] make - ] with-scope - ] [ - f - ] if* ; +GENERIC: (transform) ( ast -- parser ) + +SYMBOL: parser +SYMBOL: main + +: transform ( ast -- object ) + H{ } clone dup dup [ parser set swap (transform) main set ] bind ; + +M: ebnf (transform) ( ast -- parser ) + ebnf-rules [ (transform) ] map peek ; + +M: ebnf-rule (transform) ( ast -- parser ) + dup ebnf-rule-elements (transform) [ + swap ebnf-rule-symbol set + ] keep ; + +M: ebnf-sequence (transform) ( ast -- parser ) + ebnf-sequence-elements [ (transform) ] map seq ; + +M: ebnf-choice (transform) ( ast -- parser ) + ebnf-choice-options [ (transform) ] map choice ; + +M: ebnf-any-character (transform) ( ast -- parser ) + drop any-char ; + +M: ebnf-range (transform) ( ast -- parser ) + ebnf-range-pattern range-pattern ; + +M: ebnf-ensure-not (transform) ( ast -- parser ) + ebnf-ensure-not-group (transform) ensure-not ; + +M: ebnf-repeat0 (transform) ( ast -- parser ) + ebnf-repeat0-group (transform) repeat0 ; + +M: ebnf-repeat1 (transform) ( ast -- parser ) + ebnf-repeat1-group (transform) repeat1 ; + +M: ebnf-optional (transform) ( ast -- parser ) + ebnf-optional-elements (transform) optional ; + +M: ebnf-action (transform) ( ast -- parser ) + [ ebnf-action-parser (transform) ] keep + ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ; + +M: ebnf-terminal (transform) ( ast -- parser ) + ebnf-terminal-symbol token sp ; + +M: ebnf-non-terminal (transform) ( ast -- parser ) + ebnf-non-terminal-symbol [ + , parser get , \ at , + ] [ ] make delay sp ; : transform-ebnf ( string -- object ) 'ebnf' parse parse-result-ast transform ; -: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing +: check-parse-result ( result -- result ) + dup [ + dup parse-result-remaining empty? [ + [ + "Unable to fully parse EBNF. Left to parse was: " % + parse-result-remaining % + ] "" make throw + ] unless + ] [ + "Could not parse EBNF" throw + ] if ; + +: ebnf>quot ( string -- hashtable quot ) + 'ebnf' parse check-parse-result + parse-result-ast transform dup main swap at compile ; + +: <EBNF "EBNF>" parse-multiline-string ebnf>quot nip parsed ; parsing + +: EBNF: + CREATE-WORD dup + ";EBNF" parse-multiline-string + ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing + diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index 62ef4ea88f..14f0e7c14e 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -9,8 +9,7 @@ IN: peg.expr #! { operator rhs } in to a tree structure of the correct precedence. swap [ first2 swap call ] reduce ; -<EBNF - +EBNF: expr times = ("*") [[ drop [ * ] ]] divide = ("/") [[ drop [ / ] ]] add = ("+") [[ drop [ + ] ]] @@ -23,8 +22,8 @@ value = number | ("(" expr ")") [[ second ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]] sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]] expr = sum -EBNF> +;EBNF : eval-expr ( string -- number ) - expr parse parse-result-ast ; + expr parse-result-ast ; diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index bf321d54e9..b3d2135da7 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -4,14 +4,6 @@ USING: kernel tools.test peg peg.pl0 multiline sequences ; IN: peg.pl0.tests -{ "abc" } [ - "abc" ident parse parse-result-ast -] unit-test - -{ 55 } [ - "55abc" number parse parse-result-ast -] unit-test - { t } [ <" VAR x, squ; @@ -29,7 +21,7 @@ BEGIN x := x + 1; END END. -"> program parse parse-result-remaining empty? +"> pl0 parse-result-remaining empty? ] unit-test { f } [ @@ -95,5 +87,5 @@ BEGIN y := 36; CALL gcd; END. - "> program parse parse-result-remaining empty? + "> pl0 parse-result-remaining empty? ] unit-test \ No newline at end of file diff --git a/extra/peg/pl0/pl0.factor b/extra/peg/pl0/pl0.factor index 34973e6a52..f7eb3cad23 100644 --- a/extra/peg/pl0/pl0.factor +++ b/extra/peg/pl0/pl0.factor @@ -6,8 +6,7 @@ IN: peg.pl0 #! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0 -<EBNF -program = block "." +EBNF: pl0 block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )? ( "VAR" ident ( "," ident )* ";" )? ( "PROCEDURE" ident ";" ( block ";" )? )* statement @@ -23,4 +22,5 @@ factor = ident | number | "(" expression ")" ident = (([a-zA-Z])+) [[ >string ]] digit = ([0-9]) [[ digit> ]] number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] -EBNF> +program = block "." +;EBNF From 44954753bdc0cdc593b6c8e8abd8efd8e4759ed0 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 17:13:27 +1300 Subject: [PATCH 09/13] Change <EBNF .. EBNF> to [EBNF .. EBNF] --- extra/peg/ebnf/ebnf-tests.factor | 18 +++++++++--------- extra/peg/ebnf/ebnf.factor | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 6606fa9ffc..54639431a4 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -109,37 +109,37 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "ab" <EBNF foo='a' 'b' EBNF> call parse-result-ast + "ab" [EBNF foo='a' 'b' EBNF] call parse-result-ast ] unit-test { V{ 1 "b" } } [ - "ab" <EBNF foo=('a')[[ drop 1 ]] 'b' EBNF> call parse-result-ast + "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call parse-result-ast ] unit-test { V{ 1 2 } } [ - "ab" <EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF> call parse-result-ast + "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call parse-result-ast ] unit-test { CHAR: A } [ - "A" <EBNF foo=[A-Z] EBNF> call parse-result-ast + "A" [EBNF foo=[A-Z] EBNF] call parse-result-ast ] unit-test { CHAR: Z } [ - "Z" <EBNF foo=[A-Z] EBNF> call parse-result-ast + "Z" [EBNF foo=[A-Z] EBNF] call parse-result-ast ] unit-test { f } [ - "0" <EBNF foo=[A-Z] EBNF> call + "0" [EBNF foo=[A-Z] EBNF] call ] unit-test { CHAR: 0 } [ - "0" <EBNF foo=[^A-Z] EBNF> call parse-result-ast + "0" [EBNF foo=[^A-Z] EBNF] call parse-result-ast ] unit-test { f } [ - "A" <EBNF foo=[^A-Z] EBNF> call + "A" [EBNF foo=[^A-Z] EBNF] call ] unit-test { f } [ - "Z" <EBNF foo=[^A-Z] EBNF> call + "Z" [EBNF foo=[^A-Z] EBNF] call ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index b9f88f5f24..caa1800297 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -264,7 +264,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) 'ebnf' parse check-parse-result parse-result-ast transform dup main swap at compile ; -: <EBNF "EBNF>" parse-multiline-string ebnf>quot nip parsed ; parsing +: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing : EBNF: CREATE-WORD dup From 8ade4f9b5b90b10fba1546bdb75d876356152129 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 17:16:50 +1300 Subject: [PATCH 10/13] Fix vocab name in expr tests --- extra/peg/expr/expr-tests.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor index 20da5cd16a..b6f3163bf4 100644 --- a/extra/peg/expr/expr-tests.factor +++ b/extra/peg/expr/expr-tests.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. ! USING: kernel tools.test peg peg.expr multiline sequences ; -IN: temporary +IN: peg.expr.tests { 5 } [ "2+3" eval-expr From dbd0583044940c4765caae207ef1e41f02e88994 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 17:19:41 +1300 Subject: [PATCH 11/13] Tidy up expr groups --- extra/peg/expr/expr.factor | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index 14f0e7c14e..6b690cb5ee 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -10,13 +10,13 @@ IN: peg.expr swap [ first2 swap call ] reduce ; EBNF: expr -times = ("*") [[ drop [ * ] ]] -divide = ("/") [[ drop [ / ] ]] -add = ("+") [[ drop [ + ] ]] -subtract = ("-") [[ drop [ - ] ]] +times = "*" [[ drop [ * ] ]] +divide = "/" [[ drop [ / ] ]] +add = "+" [[ drop [ + ] ]] +subtract = "-" [[ drop [ - ] ]] -digit = ([0-9]) [[ digit> ]] -number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]] +digit = [0-9] [[ digit> ]] +number = (digit)+ [[ unclip [ swap 10 * + ] reduce ]] value = number | ("(" expr ")") [[ second ]] product = (value ((times | divide) value)*) [[ first2 operator-fold ]] From d1e7ede35dc37c14bf3c28814fab0f0d47d18e7f Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Thu, 20 Mar 2008 17:25:27 +1300 Subject: [PATCH 12/13] Add support for & syntax in ebnf --- extra/peg/ebnf/ebnf.factor | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index caa1800297..ab7baa547e 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -10,6 +10,7 @@ TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-any-character ; TUPLE: ebnf-range pattern ; +TUPLE: ebnf-ensure group ; TUPLE: ebnf-ensure-not group ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; @@ -24,6 +25,7 @@ C: <ebnf-non-terminal> ebnf-non-terminal C: <ebnf-terminal> ebnf-terminal C: <ebnf-any-character> ebnf-any-character C: <ebnf-range> ebnf-range +C: <ebnf-ensure> ebnf-ensure C: <ebnf-ensure-not> ebnf-ensure-not C: <ebnf-choice> ebnf-choice C: <ebnf-sequence> ebnf-sequence @@ -73,6 +75,7 @@ C: <ebnf> ebnf [ dup CHAR: [ = ] [ dup CHAR: . = ] [ dup CHAR: ! = ] + [ dup CHAR: & = ] [ dup CHAR: * = ] [ dup CHAR: + = ] [ dup CHAR: ? = ] @@ -153,11 +156,21 @@ DEFER: 'choice' 'group' sp , ] seq* [ first <ebnf-ensure-not> ] action ; +: 'ensure' ( -- parser ) + #! Parses the '&' syntax to ensure that + #! something that matches the following elements does + #! exist in the parse stream. + [ + "&" syntax , + 'group' sp , + ] seq* [ first <ebnf-ensure> ] action ; + : ('sequence') ( -- parser ) #! A sequence of terminals and non-terminals, including #! groupings of those. [ 'ensure-not' sp , + 'ensure' sp , 'element' sp , 'group' sp , 'repeat0' sp , @@ -221,6 +234,9 @@ M: ebnf-any-character (transform) ( ast -- parser ) M: ebnf-range (transform) ( ast -- parser ) ebnf-range-pattern range-pattern ; +M: ebnf-ensure (transform) ( ast -- parser ) + ebnf-ensure-group (transform) ensure ; + M: ebnf-ensure-not (transform) ( ast -- parser ) ebnf-ensure-not-group (transform) ensure-not ; From 1c6882b32cc54d57c36296168e4db339a86560c3 Mon Sep 17 00:00:00 2001 From: Chris Double <chris@bethia.(none)> Date: Fri, 21 Mar 2008 01:25:45 +1300 Subject: [PATCH 13/13] Rip out packrat stuff It was broken since the transition to generating compiled quotations. As far as I know, no one was using packrat-parse anyway. Rework in progress... --- extra/peg/parsers/parsers.factor | 38 +++++------ extra/peg/peg-tests.factor | 4 -- extra/peg/peg.factor | 106 ++++++++----------------------- 3 files changed, 44 insertions(+), 104 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 63e9e9a336..3ccb1e7d10 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007, 2008 Chris Double, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle - vectors arrays combinators.lib memoize math.parser match + vectors arrays combinators.lib math.parser match unicode.categories sequences.deep peg peg.private peg.search math.ranges ; IN: peg.parsers @@ -19,26 +19,26 @@ TUPLE: just-parser p1 ; M: just-parser compile ( parser -- quot ) just-parser-p1 compile just-pattern append ; -MEMO: just ( parser -- parser ) - just-parser construct-boa init-parser ; +: just ( parser -- parser ) + just-parser construct-boa ; -MEMO: 1token ( ch -- parser ) 1string token ; +: 1token ( ch -- parser ) 1string token ; <PRIVATE -MEMO: (list-of) ( items separator repeat1? -- parser ) +: (list-of) ( items separator repeat1? -- parser ) >r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq [ unclip 1vector swap first append ] action ; PRIVATE> -MEMO: list-of ( items separator -- parser ) +: list-of ( items separator -- parser ) hide f (list-of) ; -MEMO: list-of-many ( items separator -- parser ) +: list-of-many ( items separator -- parser ) hide t (list-of) ; -MEMO: epsilon ( -- parser ) V{ } token ; +: epsilon ( -- parser ) V{ } token ; -MEMO: any-char ( -- parser ) [ drop t ] satisfy ; +: any-char ( -- parser ) [ drop t ] satisfy ; <PRIVATE @@ -47,10 +47,10 @@ MEMO: any-char ( -- parser ) [ drop t ] satisfy ; PRIVATE> -MEMO: exactly-n ( parser n -- parser' ) +: exactly-n ( parser n -- parser' ) swap <repetition> seq ; -MEMO: at-most-n ( parser n -- parser' ) +: at-most-n ( parser n -- parser' ) dup zero? [ 2drop epsilon ] [ @@ -58,27 +58,27 @@ MEMO: at-most-n ( parser n -- parser' ) -rot 1- at-most-n 2choice ] if ; -MEMO: at-least-n ( parser n -- parser' ) +: at-least-n ( parser n -- parser' ) dupd exactly-n swap repeat0 2seq [ flatten-vectors ] action ; -MEMO: from-m-to-n ( parser m n -- parser' ) +: from-m-to-n ( parser m n -- parser' ) >r [ exactly-n ] 2keep r> swap - at-most-n 2seq [ flatten-vectors ] action ; -MEMO: pack ( begin body end -- parser ) +: pack ( begin body end -- parser ) >r >r hide r> r> hide 3seq [ first ] action ; -MEMO: surrounded-by ( parser begin end -- parser' ) +: surrounded-by ( parser begin end -- parser' ) [ token ] 2apply swapd pack ; -MEMO: 'digit' ( -- parser ) +: 'digit' ( -- parser ) [ digit? ] satisfy [ digit> ] action ; -MEMO: 'integer' ( -- parser ) +: 'integer' ( -- parser ) 'digit' repeat1 [ 10 digits>integer ] action ; -MEMO: 'string' ( -- parser ) +: 'string' ( -- parser ) [ [ CHAR: " = ] satisfy hide , [ CHAR: " = not ] satisfy repeat0 , @@ -97,7 +97,7 @@ MEMO: 'string' ( -- parser ) ] action replace ; -MEMO: range-pattern ( pattern -- parser ) +: range-pattern ( pattern -- parser ) #! 'pattern' is a set of characters describing the #! parser to be produced. Any single character in #! the pattern matches that character. If the pattern diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 7a1ce99883..89cc243863 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -4,10 +4,6 @@ USING: kernel tools.test strings namespaces arrays sequences peg peg.private ; IN: peg.tests -{ 0 1 2 } [ - 0 next-id set-global get-next-id get-next-id get-next-id -] unit-test - { f } [ "endbegin" "begin" token parse ] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 16cf40f884..b3200ec5eb 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings namespaces math assocs shuffle - vectors arrays combinators.lib memoize math.parser match + vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser words ; IN: peg @@ -10,70 +10,14 @@ TUPLE: parse-result remaining ast ; GENERIC: compile ( parser -- quot ) -: (parse) ( state parser -- result ) +: parse ( state parser -- result ) compile call ; - -<PRIVATE - -SYMBOL: packrat-cache SYMBOL: ignore -SYMBOL: not-in-cache - -: not-in-cache? ( result -- ? ) - not-in-cache = ; : <parse-result> ( remaining ast -- parse-result ) parse-result construct-boa ; -SYMBOL: next-id - -: get-next-id ( -- number ) - next-id get-global 0 or dup 1+ next-id set-global ; - -TUPLE: parser id ; - -: init-parser ( parser -- parser ) - get-next-id parser construct-boa over set-delegate ; - -: from ( slice-or-string -- index ) - dup slice? [ slice-from ] [ drop 0 ] if ; - -: get-cached ( input parser -- result ) - [ from ] dip parser-id packrat-cache get at at* [ - drop not-in-cache - ] unless ; - -: put-cached ( result input parser -- ) - parser-id dup packrat-cache get at [ - nip - ] [ - H{ } clone dup >r swap packrat-cache get set-at r> - ] if* - [ from ] dip set-at ; - -PRIVATE> - -: parse ( input parser -- result ) - packrat-cache get [ - 2dup get-cached dup not-in-cache? [ -! "cache missed: " write over parser-id number>string write " - " write nl ! pick . - drop - #! Protect against left recursion blowing the callstack - #! by storing a failed parse in the cache. - [ f ] dipd [ put-cached ] 2keep - [ (parse) dup ] 2keep put-cached - ] [ -! "cache hit: " write over parser-id number>string write " - " write nl ! pick . - 2nip - ] if - ] [ - (parse) - ] if ; - -: packrat-parse ( input parser -- result ) - H{ } clone packrat-cache [ parse ] with-variable ; - <PRIVATE TUPLE: token-parser symbol ; @@ -295,17 +239,17 @@ M: delay-parser compile ( parser -- quot ) PRIVATE> -MEMO: token ( string -- parser ) - token-parser construct-boa init-parser ; +: token ( string -- parser ) + token-parser construct-boa ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa init-parser ; + satisfy-parser construct-boa ; -MEMO: range ( min max -- parser ) - range-parser construct-boa init-parser ; +: range ( min max -- parser ) + range-parser construct-boa ; : seq ( seq -- parser ) - seq-parser construct-boa init-parser ; + seq-parser construct-boa ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -320,7 +264,7 @@ MEMO: range ( min max -- parser ) { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa init-parser ; + choice-parser construct-boa ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -334,32 +278,32 @@ MEMO: range ( min max -- parser ) : choice* ( quot -- paser ) { } make choice ; inline -MEMO: repeat0 ( parser -- parser ) - repeat0-parser construct-boa init-parser ; +: repeat0 ( parser -- parser ) + repeat0-parser construct-boa ; -MEMO: repeat1 ( parser -- parser ) - repeat1-parser construct-boa init-parser ; +: repeat1 ( parser -- parser ) + repeat1-parser construct-boa ; -MEMO: optional ( parser -- parser ) - optional-parser construct-boa init-parser ; +: optional ( parser -- parser ) + optional-parser construct-boa ; -MEMO: ensure ( parser -- parser ) - ensure-parser construct-boa init-parser ; +: ensure ( parser -- parser ) + ensure-parser construct-boa ; -MEMO: ensure-not ( parser -- parser ) - ensure-not-parser construct-boa init-parser ; +: ensure-not ( parser -- parser ) + ensure-not-parser construct-boa ; : action ( parser quot -- parser ) - action-parser construct-boa init-parser ; + action-parser construct-boa ; -MEMO: sp ( parser -- parser ) - sp-parser construct-boa init-parser ; +: sp ( parser -- parser ) + sp-parser construct-boa ; -MEMO: hide ( parser -- parser ) +: hide ( parser -- parser ) [ drop ignore ] action ; -MEMO: delay ( quot -- parser ) - delay-parser construct-boa init-parser ; +: delay ( quot -- parser ) + delay-parser construct-boa ; : PEG: (:) [