From 37ade561a95a7bf10883336b5c6b9fd11acf9236 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 25 Jun 2008 12:29:16 +1200 Subject: [PATCH 01/25] Fix unary expression in js grammar --- extra/peg/javascript/parser/parser.factor | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index b7df9908da..002804dcd8 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -64,14 +64,14 @@ MulExpr = MulExpr:x "*" Unary:y => [[ x y "*" ast-binop | MulExpr:x "/" Unary:y => [[ x y "/" ast-binop boa ]] | MulExpr:x "%" Unary:y => [[ x y "%" ast-binop boa ]] | Unary -Unary = "-" Postfix:p => [[ p "-" ast-unop boa ]] - | "+" Postfix:p => [[ p ]] - | "++" Postfix:p => [[ p "++" ast-preop boa ]] - | "--" Postfix:p => [[ p "--" ast-preop boa ]] - | "!" Postfix:p => [[ p "!" ast-unop boa ]] - | "typeof" Postfix:p => [[ p "typeof" ast-unop boa ]] - | "void" Postfix:p => [[ p "void" ast-unop boa ]] - | "delete" Postfix:p => [[ p "delete" ast-unop boa ]] +Unary = "-" Unary:p => [[ p "-" ast-unop boa ]] + | "+" Unary:p => [[ p ]] + | "++" Unary:p => [[ p "++" ast-preop boa ]] + | "--" Unary:p => [[ p "--" ast-preop boa ]] + | "!" Unary:p => [[ p "!" ast-unop boa ]] + | "typeof" Unary:p => [[ p "typeof" ast-unop boa ]] + | "void" Unary:p => [[ p "void" ast-unop boa ]] + | "delete" Unary:p => [[ p "delete" ast-unop boa ]] | Postfix Postfix = PrimExpr:p SpacesNoNl "++" => [[ p "++" ast-postop boa ]] | PrimExpr:p SpacesNoNl "--" => [[ p "--" ast-postop boa ]] From 00827d3b12bcca1f7e7706914592da5cc4d4202a Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 25 Jun 2008 13:14:15 +1200 Subject: [PATCH 02/25] Throw error on failed parse, returning relevant error information --- extra/peg/peg.factor | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 54c25778de..0d0d8ed72c 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -9,20 +9,31 @@ IN: peg USE: prettyprint TUPLE: parse-result remaining ast ; - +TUPLE: parse-error details ; +TUPLE: error-details remaining message ; TUPLE: parser id compiled ; - M: parser equal? [ id>> ] bi@ = ; M: parser hashcode* id>> hashcode* ; -C: parser +C: parse-result +C: error-details +C: parser +SYMBOL: errors + +: ( -- parse-error ) + V{ } clone parse-error boa ; + +: add-error ( remaining message -- ) + errors get [ + [ ] [ details>> ] bi* push + ] [ + 2drop + ] if* ; + SYMBOL: ignore -: ( remaining ast -- parse-result ) - parse-result boa ; - SYMBOL: packrat SYMBOL: pos SYMBOL: input @@ -207,6 +218,7 @@ C: peg-head input set 0 pos set f lrstack set + errors set H{ } clone heads set H{ } clone packrat set ] H{ } make-assoc swap bind ; inline @@ -257,7 +269,7 @@ SYMBOL: delayed ] with-compilation-unit ; : compiled-parse ( state word -- result ) - swap [ execute ] with-packrat ; inline + swap [ execute [ errors get throw ] unless* ] with-packrat ; inline : parse ( input parser -- result ) dup word? [ compile ] unless compiled-parse ; @@ -288,7 +300,7 @@ TUPLE: token-parser symbol ; dup >r ?head-slice [ r> ] [ - r> 2drop f + drop input-slice "Expected token '" r> append "'" append add-error f ] if ; M: token-parser (compile) ( parser -- quot ) From e14bb84a5a7fe860c3550bf7de9427917914e875 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 25 Jun 2008 19:37:58 +1200 Subject: [PATCH 03/25] More error handling for pegs --- extra/peg/peg.factor | 51 +++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 0d0d8ed72c..a0f5fc05e8 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -1,36 +1,47 @@ ! Copyright (C) 2007, 2008 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings fry namespaces math assocs shuffle debugger io - vectors arrays math.parser math.order - unicode.categories compiler.units parser + vectors arrays math.parser math.order vectors combinators combinators.lib + sets unicode.categories compiler.units parser words quotations effects memoize accessors locals effects splitting ; IN: peg USE: prettyprint TUPLE: parse-result remaining ast ; -TUPLE: parse-error details ; -TUPLE: error-details remaining message ; +TUPLE: parse-error position messages ; TUPLE: parser id compiled ; M: parser equal? [ id>> ] bi@ = ; M: parser hashcode* id>> hashcode* ; C: parse-result -C: error-details +C: parse-error C: parser -SYMBOL: errors +SYMBOL: error-stack -: ( -- parse-error ) - V{ } clone parse-error boa ; +: (merge-errors) ( a b -- c ) + { + { [ over position>> not ] [ nip ] } + { [ dup position>> not ] [ drop ] } + [ 2dup [ position>> ] bi@ <=> { + { +lt+ [ nip ] } + { +gt+ [ drop ] } + { +eq+ [ messages>> over messages>> union [ position>> ] dip ] } + } case + ] + } cond ; + +: merge-errors ( -- ) + error-stack get dup length 1 > [ + dup pop over pop swap (merge-errors) swap push + ] [ + drop + ] if ; : add-error ( remaining message -- ) - errors get [ - [ ] [ details>> ] bi* push - ] [ - 2drop - ] if* ; + error-stack get push ; SYMBOL: ignore @@ -218,7 +229,7 @@ C: peg-head input set 0 pos set f lrstack set - errors set + V{ } clone error-stack set H{ } clone heads set H{ } clone packrat set ] H{ } make-assoc swap bind ; inline @@ -269,7 +280,7 @@ SYMBOL: delayed ] with-compilation-unit ; : compiled-parse ( state word -- result ) - swap [ execute [ errors get throw ] unless* ] with-packrat ; inline + swap [ execute [ error-stack get throw ] unless* ] with-packrat ; inline : parse ( input parser -- result ) dup word? [ compile ] unless compiled-parse ; @@ -298,9 +309,9 @@ TUPLE: token-parser symbol ; : parse-token ( input string -- result ) #! Parse the string, returning a parse result dup >r ?head-slice [ - r> + r> f f add-error ] [ - drop input-slice "Expected token '" r> append "'" append add-error f + drop input-slice input-from "Expected token '" r> append "'" append 1vector add-error f ] if ; M: token-parser (compile) ( parser -- quot ) @@ -366,7 +377,8 @@ TUPLE: seq-parser parsers ; M: seq-parser (compile) ( parser -- quot ) [ [ input-slice V{ } clone ] % - parsers>> [ compiled-parser 1quotation , \ parse-seq-element , ] each + parsers>> unclip compiled-parser 1quotation , \ parse-seq-element , [ + compiled-parser 1quotation [ merge-errors ] compose , \ parse-seq-element , ] each ] [ ] make ; TUPLE: choice-parser parsers ; @@ -374,7 +386,8 @@ TUPLE: choice-parser parsers ; M: choice-parser (compile) ( parser -- quot ) [ f , - parsers>> [ compiled-parser 1quotation , \ unless* , ] each + parsers>> [ compiled-parser ] map + unclip 1quotation , \ unless* , [ 1quotation [ merge-errors ] compose , \ unless* , ] each ] [ ] make ; TUPLE: repeat0-parser p1 ; From 9c96edb805ecd81cc0c2c60f93aa918f739940e6 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 11:29:57 +1200 Subject: [PATCH 04/25] Fix 'For' statement in JavaScript parser --- extra/peg/javascript/parser/parser.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index 002804dcd8..de6e2bae32 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -105,7 +105,7 @@ Binding = Name:n "=" Expr:v => [[ n v ast-var | Name:n => [[ n "undefined" ast-get boa ast-var boa ]] Block = "{" SrcElems:ss "}" => [[ ss ]] Bindings = (Binding ("," Binding => [[ second ]])* => [[ first2 swap prefix ]])? -For1 = "var" Binding => [[ second ]] +For1 = "var" Bindings => [[ second ]] | Expr | Spaces => [[ "undefined" ast-get boa ]] For2 = Expr From cf00bc8a0c0d5e3d211e81a099a57ba3374cabac Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 11:54:47 +1200 Subject: [PATCH 05/25] Add flags to regexp tokenizer in JavaScript --- extra/peg/javascript/ast/ast.factor | 2 +- extra/peg/javascript/parser/parser.factor | 12 ++++++------ extra/peg/javascript/tokenizer/tokenizer.factor | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/extra/peg/javascript/ast/ast.factor b/extra/peg/javascript/ast/ast.factor index b857dc51bb..47ab6da864 100644 --- a/extra/peg/javascript/ast/ast.factor +++ b/extra/peg/javascript/ast/ast.factor @@ -7,7 +7,7 @@ TUPLE: ast-keyword value ; TUPLE: ast-name value ; TUPLE: ast-number value ; TUPLE: ast-string value ; -TUPLE: ast-regexp value ; +TUPLE: ast-regexp body flags ; TUPLE: ast-cond-expr condition then else ; TUPLE: ast-set lhs rhs ; TUPLE: ast-get value ; diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index de6e2bae32..41387d0a5c 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -26,9 +26,9 @@ End = !(.) Space = " " | "\t" | "\n" Spaces = Space* => [[ ignore ]] Name = . ?[ ast-name? ]? => [[ value>> ]] -Number = . ?[ ast-number? ]? => [[ value>> ]] -String = . ?[ ast-string? ]? => [[ value>> ]] -RegExp = . ?[ ast-regexp? ]? => [[ value>> ]] +Number = . ?[ ast-number? ]? +String = . ?[ ast-string? ]? +RegExp = . ?[ ast-regexp? ]? SpacesNoNl = (!(nl) Space)* => [[ ignore ]] Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-expr boa ]] @@ -85,9 +85,9 @@ PrimExpr = PrimExpr:p "[" Expr:i "]" => [[ i p ast-getp PrimExprHd = "(" Expr:e ")" => [[ e ]] | "this" => [[ ast-this boa ]] | Name => [[ ast-get boa ]] - | Number => [[ ast-number boa ]] - | String => [[ ast-string boa ]] - | RegExp => [[ ast-regexp boa ]] + | Number + | String + | RegExp | "function" FuncRest:fr => [[ fr ]] | "new" PrimExpr:n "(" Args:as ")" => [[ n as ast-new boa ]] | "new" PrimExpr:n => [[ n f ast-new boa ]] diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index 195184a16c..825c8f03d1 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -57,8 +57,9 @@ StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]] Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]] | '"' StringChars2:cs '"' => [[ cs ast-string boa ]] | "'" StringChars3:cs "'" => [[ cs ast-string boa ]] +RegExpFlags = NameRest* RegExpBody = (!("/" | "\n" | "\r") .)* => [[ >string ]] -RegExp = "/" RegExpBody:r "/" => [[ r ast-regexp boa ]] +RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]] Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" | ">" | "<=" | "<" | "++" | "+=" | "+" | "--" | "-=" From acb6d3a312dff4450f37a4ffafc1132010a92578 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 14:32:20 +1200 Subject: [PATCH 06/25] Fix peg.ebnf tests. Handle \ in EBNF --- extra/peg/ebnf/ebnf-tests.factor | 60 ++++++++++--------- extra/peg/ebnf/ebnf.factor | 1 + .../peg/javascript/tokenizer/tokenizer.factor | 9 ++- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 2269af6625..a2807d20db 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -132,21 +132,21 @@ IN: peg.ebnf.tests "Z" [EBNF foo=[A-Z] EBNF] call ast>> ] unit-test -{ f } [ +[ "0" [EBNF foo=[A-Z] EBNF] call -] unit-test +] must-fail { CHAR: 0 } [ "0" [EBNF foo=[^A-Z] EBNF] call ast>> ] unit-test -{ f } [ +[ "A" [EBNF foo=[^A-Z] EBNF] call -] unit-test +] must-fail -{ f } [ +[ "Z" [EBNF foo=[^A-Z] EBNF] call -] unit-test +] must-fail { V{ "1" "+" "foo" } } [ "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] call ast>> @@ -176,17 +176,17 @@ IN: peg.ebnf.tests { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> ] unit-test -{ f } [ +[ { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call -] unit-test +] must-fail { 3 } [ { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> ] unit-test -{ f } [ +[ "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call -] unit-test +] must-fail { V{ "a" " " "b" } } [ "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> @@ -229,9 +229,9 @@ IN: peg.ebnf.tests "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> ] unit-test -{ f } [ +[ "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call -] unit-test +] must-fail { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. @@ -314,41 +314,41 @@ main = Primary "abc" [EBNF a="a" "b" foo=a "c" EBNF] call ast>> ] unit-test -{ f } [ +[ "a bc" [EBNF a="a" "b" foo=(a "c") EBNF] call -] unit-test +] must-fail -{ f } [ +[ "a bc" [EBNF a="a" "b" foo=a "c" EBNF] call -] unit-test +] must-fail -{ f } [ +[ "a bc" [EBNF a="a" "b" foo={a "c"} EBNF] call -] unit-test +] must-fail -{ f } [ +[ "ab c" [EBNF a="a" "b" foo=a "c" EBNF] call -] unit-test +] must-fail { V{ V{ "a" "b" } "c" } } [ "ab c" [EBNF a="a" "b" foo={a "c"} EBNF] call ast>> ] unit-test -{ f } [ +[ "ab c" [EBNF a="a" "b" foo=(a "c") EBNF] call -] unit-test +] must-fail -{ f } [ +[ "a b c" [EBNF a="a" "b" foo=a "c" EBNF] call -] unit-test +] must-fail -{ f } [ +[ "a b c" [EBNF a="a" "b" foo=(a "c") EBNF] call -] unit-test +] must-fail -{ f } [ +[ "a b c" [EBNF a="a" "b" foo={a "c"} EBNF] call -] unit-test +] must-fail { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] call ast>> @@ -515,4 +515,8 @@ Tok = Spaces (Number | Special ) { "++" } [ "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] call ast>> +] unit-test + +{ "\\" } [ + "\\" [EBNF foo="\\" EBNF] call ast>> ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 3d48665c8c..610cffd273 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -99,6 +99,7 @@ PEG: escaper ( string -- ast ) "\\t" token [ drop "\t" ] action , "\\n" token [ drop "\n" ] action , "\\r" token [ drop "\r" ] action , + "\\\\" token [ drop "\\" ] action , ] choice* any-char-parser 2array choice repeat0 ; : replace-escapes ( string -- string ) diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index 825c8f03d1..256e478571 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -58,7 +58,14 @@ Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]] | '"' StringChars2:cs '"' => [[ cs ast-string boa ]] | "'" StringChars3:cs "'" => [[ cs ast-string boa ]] RegExpFlags = NameRest* -RegExpBody = (!("/" | "\n" | "\r") .)* => [[ >string ]] +NonTerminator = !("\n" | "\r") . +BackslashSequence = "\\" NonTerminator +RegExpFirstChar = !("*" | "\\" | "/") NonTerminator + | BackslashSequence +RegExpChar = !("\\" | "/") NonTerminator + | BackslashSequence +RegExpChars = RegExpChar* +RegExpBody = RegExpFirstChar RegExpChars RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]] Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" From 4394cb08f69896f86b0712931610b6465d9c9b58 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 15:20:12 +1200 Subject: [PATCH 07/25] RegExp fix for javascript tokenizer --- extra/peg/javascript/tokenizer/tokenizer-tests.factor | 4 ++++ extra/peg/javascript/tokenizer/tokenizer.factor | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/extra/peg/javascript/tokenizer/tokenizer-tests.factor b/extra/peg/javascript/tokenizer/tokenizer-tests.factor index 509ff4a0fe..a61125d08c 100644 --- a/extra/peg/javascript/tokenizer/tokenizer-tests.factor +++ b/extra/peg/javascript/tokenizer/tokenizer-tests.factor @@ -21,3 +21,7 @@ IN: peg.javascript.tokenizer.tests } [ "123; 'hello'; foo(x);" tokenize-javascript ast>> ] unit-test + +{ V{ T{ ast-regexp f "<(w+)[^>]*?)/>" "g" } } } [ + "/<(\\w+)[^>]*?)\\/>/g" tokenize-javascript ast>> +] unit-test \ No newline at end of file diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index 256e478571..f65b0b2ad6 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -57,15 +57,15 @@ StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]] Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]] | '"' StringChars2:cs '"' => [[ cs ast-string boa ]] | "'" StringChars3:cs "'" => [[ cs ast-string boa ]] -RegExpFlags = NameRest* +RegExpFlags = NameRest* => [[ >string ]] NonTerminator = !("\n" | "\r") . -BackslashSequence = "\\" NonTerminator +BackslashSequence = "\\" NonTerminator => [[ second ]] RegExpFirstChar = !("*" | "\\" | "/") NonTerminator | BackslashSequence RegExpChar = !("\\" | "/") NonTerminator | BackslashSequence RegExpChars = RegExpChar* -RegExpBody = RegExpFirstChar RegExpChars +RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]] RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]] Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" From 7404c5dc01026b19b1f69bf7d8e4181758cdfc20 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 16:04:57 +1200 Subject: [PATCH 08/25] Add ShiftExpr to JavaScript parser --- extra/peg/javascript/parser/parser.factor | 14 +++++++++----- extra/peg/javascript/tokenizer/tokenizer.factor | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index 41387d0a5c..e491c35d2b 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -51,11 +51,15 @@ EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop | EqExpr:x "===" RelExpr:y => [[ x y "===" ast-binop boa ]] | EqExpr:x "!==" RelExpr:y => [[ x y "!==" ast-binop boa ]] | RelExpr -RelExpr = RelExpr:x ">" AddExpr:y => [[ x y ">" ast-binop boa ]] - | RelExpr:x ">=" AddExpr:y => [[ x y ">=" ast-binop boa ]] - | RelExpr:x "<" AddExpr:y => [[ x y "<" ast-binop boa ]] - | RelExpr:x "<=" AddExpr:y => [[ x y "<=" ast-binop boa ]] - | RelExpr:x "instanceof" AddExpr:y => [[ x y "instanceof" ast-binop boa ]] +RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]] + | RelExpr:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]] + | RelExpr:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]] + | RelExpr:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]] + | RelExpr:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]] + | ShiftExpr +ShiftExpr = ShiftExpr:x "<<" AddExpr:y => [[ x y "<<" ast-binop boa ]] + | ShiftExpr:x ">>>" AddExpr:y => [[ x y ">>>" ast-binop boa ]] + | ShiftExpr:x ">>" AddExpr:y => [[ x y ">>" ast-binop boa ]] | AddExpr AddExpr = AddExpr:x "+" MulExpr:y => [[ x y "+" ast-binop boa ]] | AddExpr:x "-" MulExpr:y => [[ x y "-" ast-binop boa ]] diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index f65b0b2ad6..0698c8427e 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -69,9 +69,9 @@ RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >strin RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]] Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" - | ">" | "<=" | "<" | "++" | "+=" | "+" | "--" | "-=" - | "-" | "*=" | "*" | "/=" | "/" | "%=" | "%" | "&&=" - | "&&" | "||=" | "||" | "." | "!" + | ">>>" | ">>" | ">" | "<=" | "<<" | "<" | "++" | "+=" + | "+" | "--" | "-=" | "-" | "*=" | "*" | "/=" | "/" + | "%=" | "%" | "&&=" | "&&" | "||=" | "||" | "." | "!" Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special ) Toks = Tok* Spaces ;EBNF From bf664e7ec895b07ae1c7f3fc00ca54e67de5c5b3 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 16:16:13 +1200 Subject: [PATCH 09/25] Add ShiftExpr to JavaScript parser --- extra/peg/javascript/parser/parser.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index e491c35d2b..39bab79ea9 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -56,6 +56,7 @@ RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-bino | RelExpr:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]] | RelExpr:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]] | RelExpr:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]] + | RelExpr:x "in" ShiftExpr:y => [[ x y "in" ast-binop boa ]] | ShiftExpr ShiftExpr = ShiftExpr:x "<<" AddExpr:y => [[ x y "<<" ast-binop boa ]] | ShiftExpr:x ">>>" AddExpr:y => [[ x y ">>>" ast-binop boa ]] From 87bbe8cae162c828e04a7af15ac5f3fa2d0f4b4e Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 18:24:59 +1200 Subject: [PATCH 10/25] Get for(x in y) { } working in js parser --- extra/peg/javascript/parser/parser.factor | 32 ++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index 39bab79ea9..2736496cc7 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -42,15 +42,35 @@ Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-exp | OrExpr:e "||=" Expr:rhs => [[ e rhs "||" ast-mset boa ]] | OrExpr:e => [[ e ]] +ExprNoIn = OrExprNoIn:e "?" ExprNoIn:t ":" ExprNoIn:f => [[ e t f ast-cond-expr boa ]] + | OrExprNoIn:e "=" ExprNoIn:rhs => [[ e rhs ast-set boa ]] + | OrExprNoIn:e "+=" ExprNoIn:rhs => [[ e rhs "+" ast-mset boa ]] + | OrExprNoIn:e "-=" ExprNoIn:rhs => [[ e rhs "-" ast-mset boa ]] + | OrExprNoIn:e "*=" ExprNoIn:rhs => [[ e rhs "*" ast-mset boa ]] + | OrExprNoIn:e "/=" ExprNoIn:rhs => [[ e rhs "/" ast-mset boa ]] + | OrExprNoIn:e "%=" ExprNoIn:rhs => [[ e rhs "%" ast-mset boa ]] + | OrExprNoIn:e "&&=" ExprNoIn:rhs => [[ e rhs "&&" ast-mset boa ]] + | OrExprNoIn:e "||=" ExprNoIn:rhs => [[ e rhs "||" ast-mset boa ]] + | OrExprNoIn:e => [[ e ]] + OrExpr = OrExpr:x "||" AndExpr:y => [[ x y "||" ast-binop boa ]] | AndExpr +OrExprNoIn = OrExprNoIn:x "||" AndExprNoIn:y => [[ x y "||" ast-binop boa ]] + | AndExprNoIn AndExpr = AndExpr:x "&&" EqExpr:y => [[ x y "&&" ast-binop boa ]] | EqExpr +AndExprNoIn = AndExprNoIn:x "&&" EqExprNoIn:y => [[ x y "&&" ast-binop boa ]] + | EqExprNoIn EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop boa ]] | EqExpr:x "!=" RelExpr:y => [[ x y "!=" ast-binop boa ]] | EqExpr:x "===" RelExpr:y => [[ x y "===" ast-binop boa ]] | EqExpr:x "!==" RelExpr:y => [[ x y "!==" ast-binop boa ]] | RelExpr +EqExprNoIn = EqExprNoIn:x "==" RelExprNoIn:y => [[ x y "==" ast-binop boa ]] + | EqExprNoIn:x "!=" RelExprNoIn:y => [[ x y "!=" ast-binop boa ]] + | EqExprNoIn:x "===" RelExprNoIn:y => [[ x y "===" ast-binop boa ]] + | EqExprNoIn:x "!==" RelExprNoIn:y => [[ x y "!==" ast-binop boa ]] + | RelExprNoIn RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]] | RelExpr:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]] | RelExpr:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]] @@ -58,6 +78,12 @@ RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-bino | RelExpr:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]] | RelExpr:x "in" ShiftExpr:y => [[ x y "in" ast-binop boa ]] | ShiftExpr +RelExprNoIn = RelExprNoIn:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]] + | RelExprNoIn:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]] + | RelExprNoIn:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]] + | RelExprNoIn:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]] + | RelExprNoIn:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]] + | ShiftExpr ShiftExpr = ShiftExpr:x "<<" AddExpr:y => [[ x y "<<" ast-binop boa ]] | ShiftExpr:x ">>>" AddExpr:y => [[ x y ">>>" ast-binop boa ]] | ShiftExpr:x ">>" AddExpr:y => [[ x y ">>" ast-binop boa ]] @@ -98,7 +124,7 @@ PrimExprHd = "(" Expr:e ")" => [[ e ]] | "new" PrimExpr:n => [[ n f ast-new boa ]] | "[" Args:es "]" => [[ es ast-array boa ]] | Json -JsonBindings = (JsonBinding ("," JsonBinding => [[ second ]])* => [[ first2 swap prefix ]])? +JsonBindings = (JsonBinding ("," JsonBinding => [[ second ]])* => [[ first2 swap prefix ]])? Json = "{" JsonBindings:bs "}" => [[ bs ast-json boa ]] JsonBinding = JsonPropName:n ":" Expr:v => [[ n v ast-binding boa ]] JsonPropName = Name | Number | String | RegExp @@ -111,14 +137,14 @@ Binding = Name:n "=" Expr:v => [[ n v ast-var Block = "{" SrcElems:ss "}" => [[ ss ]] Bindings = (Binding ("," Binding => [[ second ]])* => [[ first2 swap prefix ]])? For1 = "var" Bindings => [[ second ]] - | Expr + | ExprNoIn | Spaces => [[ "undefined" ast-get boa ]] For2 = Expr | Spaces => [[ "true" ast-get boa ]] For3 = Expr | Spaces => [[ "undefined" ast-get boa ]] ForIn1 = "var" Name:n => [[ n "undefined" ast-get boa ast-var boa ]] - | Expr + | PrimExprHd Switch1 = "case" Expr:c ":" SrcElems:cs => [[ c cs ast-case boa ]] | "default" ":" SrcElems:cs => [[ cs ast-default boa ]] SwitchBody = Switch1* From 8f718fa41eab364708867cc60d2a8f9644b1b765 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 19:05:18 +1200 Subject: [PATCH 11/25] Parse more valid JavaScript --- extra/peg/javascript/parser/parser.factor | 28 +++++++++++++++++-- .../peg/javascript/tokenizer/tokenizer.factor | 12 ++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index 2736496cc7..45da7c3bb4 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -40,6 +40,12 @@ Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-exp | OrExpr:e "%=" Expr:rhs => [[ e rhs "%" ast-mset boa ]] | OrExpr:e "&&=" Expr:rhs => [[ e rhs "&&" ast-mset boa ]] | OrExpr:e "||=" Expr:rhs => [[ e rhs "||" ast-mset boa ]] + | OrExpr:e "^=" Expr:rhs => [[ e rhs "^" ast-mset boa ]] + | OrExpr:e "&=" Expr:rhs => [[ e rhs "&" ast-mset boa ]] + | OrExpr:e "|=" Expr:rhs => [[ e rhs "|" ast-mset boa ]] + | OrExpr:e "<<=" Expr:rhs => [[ e rhs "<<" ast-mset boa ]] + | OrExpr:e ">>=" Expr:rhs => [[ e rhs ">>" ast-mset boa ]] + | OrExpr:e ">>>=" Expr:rhs => [[ e rhs ">>>" ast-mset boa ]] | OrExpr:e => [[ e ]] ExprNoIn = OrExprNoIn:e "?" ExprNoIn:t ":" ExprNoIn:f => [[ e t f ast-cond-expr boa ]] @@ -51,15 +57,33 @@ ExprNoIn = OrExprNoIn:e "?" ExprNoIn:t ":" ExprNoIn:f => [[ e t f as | OrExprNoIn:e "%=" ExprNoIn:rhs => [[ e rhs "%" ast-mset boa ]] | OrExprNoIn:e "&&=" ExprNoIn:rhs => [[ e rhs "&&" ast-mset boa ]] | OrExprNoIn:e "||=" ExprNoIn:rhs => [[ e rhs "||" ast-mset boa ]] + | OrExprNoIn:e "^=" ExprNoIn:rhs => [[ e rhs "^" ast-mset boa ]] + | OrExprNoIn:e "&=" ExprNoIn:rhs => [[ e rhs "&" ast-mset boa ]] + | OrExprNoIn:e "|=" ExprNoIn:rhs => [[ e rhs "|" ast-mset boa ]] + | OrExprNoIn:e "<<=" ExprNoIn:rhs => [[ e rhs "<<" ast-mset boa ]] + | OrExprNoIn:e ">>=" ExprNoIn:rhs => [[ e rhs ">>" ast-mset boa ]] + | OrExprNoIn:e ">>>=" ExprNoIn:rhs => [[ e rhs ">>>" ast-mset boa ]] | OrExprNoIn:e => [[ e ]] OrExpr = OrExpr:x "||" AndExpr:y => [[ x y "||" ast-binop boa ]] | AndExpr OrExprNoIn = OrExprNoIn:x "||" AndExprNoIn:y => [[ x y "||" ast-binop boa ]] | AndExprNoIn -AndExpr = AndExpr:x "&&" EqExpr:y => [[ x y "&&" ast-binop boa ]] +AndExpr = AndExpr:x "&&" BitOrExpr:y => [[ x y "&&" ast-binop boa ]] + | BitOrExpr +AndExprNoIn = AndExprNoIn:x "&&" BitOrExprNoIn:y => [[ x y "&&" ast-binop boa ]] + | BitOrExprNoIn +BitOrExpr = BitOrExpr:x "|" BitXORExpr:y => [[ x y "|" ast-binop boa ]] + | BitXORExpr +BitOrExprNoIn = BitOrExprNoIn:x "|" BitXORExprNoIn:y => [[ x y "|" ast-binop boa ]] + | BitXORExprNoIn +BitXORExpr = BitXORExpr:x "^" BitANDExpr:y => [[ x y "^" ast-binop boa ]] + | BitANDExpr +BitXORExprNoIn = BitXORExprNoIn:x "^" BitANDExprNoIn:y => [[ x y "^" ast-binop boa ]] + | BitANDExprNoIn +BitANDExpr = BitANDExpr:x "&" EqExpr:y => [[ x y "&" ast-binop boa ]] | EqExpr -AndExprNoIn = AndExprNoIn:x "&&" EqExprNoIn:y => [[ x y "&&" ast-binop boa ]] +BitANDExprNoIn = BitANDExprNoIn:x "&" EqExprNoIn:y => [[ x y "&" ast-binop boa ]] | EqExprNoIn EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop boa ]] | EqExpr:x "!=" RelExpr:y => [[ x y "!=" ast-binop boa ]] diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index 0698c8427e..30a3b5e7a5 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -67,11 +67,13 @@ RegExpChar = !("\\" | "/") NonTerminator RegExpChars = RegExpChar* RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]] RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]] -Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" - | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" - | ">>>" | ">>" | ">" | "<=" | "<<" | "<" | "++" | "+=" - | "+" | "--" | "-=" | "-" | "*=" | "*" | "/=" | "/" - | "%=" | "%" | "&&=" | "&&" | "||=" | "||" | "." | "!" +Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" + | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" + | ">>>=" | ">>>" | ">>=" | ">>" | ">" | "<=" | "<<=" | "<<" + | "<" | "++" | "+=" | "+" | "--" | "-=" | "-" | "*=" + | "*" | "/=" | "/" | "%=" | "%" | "&&=" | "&&" | "||=" + | "||" | "." | "!" | "&=" | "&" | "|=" | "|" | "^=" + | "^" Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special ) Toks = Tok* Spaces ;EBNF From c8511b483fa911f63e58f4ed171df76186632346 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Mon, 30 Jun 2008 19:25:23 +1200 Subject: [PATCH 12/25] Add support for 'with' in js parser. Now parses jquery successfully --- extra/peg/javascript/ast/ast.factor | 1 + extra/peg/javascript/parser/parser.factor | 1 + 2 files changed, 2 insertions(+) diff --git a/extra/peg/javascript/ast/ast.factor b/extra/peg/javascript/ast/ast.factor index 47ab6da864..9f67af86aa 100644 --- a/extra/peg/javascript/ast/ast.factor +++ b/extra/peg/javascript/ast/ast.factor @@ -38,5 +38,6 @@ TUPLE: ast-continue ; TUPLE: ast-throw e ; TUPLE: ast-try t e c f ; TUPLE: ast-return e ; +TUPLE: ast-with expr body ; TUPLE: ast-case c cs ; TUPLE: ast-default cs ; diff --git a/extra/peg/javascript/parser/parser.factor b/extra/peg/javascript/parser/parser.factor index 45da7c3bb4..7ace528150 100644 --- a/extra/peg/javascript/parser/parser.factor +++ b/extra/peg/javascript/parser/parser.factor @@ -189,6 +189,7 @@ Stmt = Block | "try" Block:t "catch" "(" Name:e ")" Block:c Finally:f => [[ t e c f ast-try boa ]] | "return" Expr:e Sc => [[ e ast-return boa ]] | "return" Sc => [[ "undefined" ast-get boa ast-return boa ]] + | "with" "(" Expr:e ")" Stmt:b => [[ e b ast-with boa ]] | Expr:e Sc => [[ e ]] | ";" => [[ "undefined" ast-get boa ]] SrcElem = "function" Name:n FuncRest:f => [[ n f ast-var boa ]] From b5cef674b1f99dbb3d763cd162f1891857c40c76 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 3 Jul 2008 16:52:22 +1200 Subject: [PATCH 13/25] Pegs throw exceptions on error now --- extra/peg/parsers/parsers-tests.factor | 65 ++++++++++++-------------- extra/peg/peg-tests.factor | 60 ++++++++++++------------ extra/peg/peg.factor | 2 +- 3 files changed, 62 insertions(+), 65 deletions(-) diff --git a/extra/peg/parsers/parsers-tests.factor b/extra/peg/parsers/parsers-tests.factor index e80baf3c4f..0cf3ad8b17 100644 --- a/extra/peg/parsers/parsers-tests.factor +++ b/extra/peg/parsers/parsers-tests.factor @@ -1,54 +1,51 @@ -USING: kernel peg peg.parsers tools.test ; +USING: kernel peg peg.parsers tools.test accessors ; IN: peg.parsers.tests -[ V{ "a" } ] -[ "a" "a" token "," token list-of parse parse-result-ast ] unit-test +{ V{ "a" } } +[ "a" "a" token "," token list-of parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" } ] -[ "a,a,a,a" "a" token "," token list-of parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "a,a,a,a" "a" token "," token list-of parse ast>> ] unit-test -[ f ] -[ "a" "a" token "," token list-of-many parse ] unit-test +[ "a" "a" token "," token list-of-many parse ] must-fail -[ V{ "a" "a" "a" "a" } ] -[ "a,a,a,a" "a" token "," token list-of-many parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "a,a,a,a" "a" token "," token list-of-many parse ast>> ] unit-test -[ f ] -[ "aaa" "a" token 4 exactly-n parse ] unit-test +[ "aaa" "a" token 4 exactly-n parse ] must-fail -[ V{ "a" "a" "a" "a" } ] -[ "aaaa" "a" token 4 exactly-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaa" "a" token 4 exactly-n parse ast>> ] unit-test -[ f ] -[ "aaa" "a" token 4 at-least-n parse ] unit-test +[ "aaa" "a" token 4 at-least-n parse ] must-fail -[ V{ "a" "a" "a" "a" } ] -[ "aaaa" "a" token 4 at-least-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaa" "a" token 4 at-least-n parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" "a" } ] -[ "aaaaa" "a" token 4 at-least-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" "a" } } +[ "aaaaa" "a" token 4 at-least-n parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" } ] -[ "aaaa" "a" token 4 at-most-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaa" "a" token 4 at-most-n parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" } ] -[ "aaaaa" "a" token 4 at-most-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaaa" "a" token 4 at-most-n parse ast>> ] unit-test -[ V{ "a" "a" "a" } ] -[ "aaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" } } +[ "aaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" } ] -[ "aaaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test -[ V{ "a" "a" "a" "a" } ] -[ "aaaaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test +{ V{ "a" "a" "a" "a" } } +[ "aaaaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test -[ 97 ] -[ "a" any-char parse parse-result-ast ] unit-test +{ 97 } +[ "a" any-char parse ast>> ] unit-test -[ V{ } ] -[ "" epsilon parse parse-result-ast ] unit-test +{ V{ } } +[ "" epsilon parse ast>> ] unit-test { "a" } [ - "a" "a" token just parse parse-result-ast + "a" "a" token just parse ast>> ] unit-test \ No newline at end of file diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 1beeb51678..466da83b6e 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -5,9 +5,9 @@ USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words math accessors ; IN: peg.tests -{ f } [ +[ "endbegin" "begin" token parse -] unit-test +] must-fail { "begin" "end" } [ "beginend" "begin" token parse @@ -15,13 +15,13 @@ IN: peg.tests >string ] unit-test -{ f } [ +[ "" CHAR: a CHAR: z range parse -] unit-test +] must-fail -{ f } [ +[ "1bcd" CHAR: a CHAR: z range parse -] unit-test +] must-fail { CHAR: a } [ "abcd" CHAR: a CHAR: z range parse ast>> @@ -31,9 +31,9 @@ IN: peg.tests "zbcd" CHAR: a CHAR: z range parse ast>> ] unit-test -{ f } [ +[ "bad" "a" token "b" token 2array seq parse -] unit-test +] must-fail { V{ "g" "o" } } [ "good" "g" token "o" token 2array seq parse ast>> @@ -47,13 +47,13 @@ IN: peg.tests "bbcd" "a" token "b" token 2array choice parse ast>> ] unit-test -{ f } [ +[ "cbcd" "a" token "b" token 2array choice parse -] unit-test +] must-fail -{ f } [ +[ "" "a" token "b" token 2array choice parse -] unit-test +] must-fail { 0 } [ "" "a" token repeat0 parse ast>> length @@ -67,13 +67,13 @@ IN: peg.tests "aaab" "a" token repeat0 parse ast>> ] unit-test -{ f } [ +[ "" "a" token repeat1 parse -] unit-test +] must-fail -{ f } [ +[ "b" "a" token repeat1 parse -] unit-test +] must-fail { V{ "a" "a" "a" } } [ "aaab" "a" token repeat1 parse ast>> @@ -87,17 +87,17 @@ IN: peg.tests "b" "a" token optional "b" token 2array seq parse ast>> ] unit-test -{ f } [ +[ "cb" "a" token optional "b" token 2array seq parse -] unit-test +] must-fail { V{ CHAR: a CHAR: b } } [ "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse ast>> ] unit-test -{ f } [ +[ "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse -] unit-test +] must-fail { t } [ "a+b" @@ -117,11 +117,11 @@ IN: peg.tests parse [ t ] [ f ] if ] unit-test -{ f } [ +[ "a++b" "a" token "+" token "++" token 2array choice "b" token 3array seq parse [ t ] [ f ] if -] unit-test +] must-fail { 1 } [ "a" "a" token [ drop 1 ] action parse ast>> @@ -131,13 +131,13 @@ IN: peg.tests "aa" "a" token [ drop 1 ] action dup 2array seq parse ast>> ] unit-test -{ f } [ +[ "b" "a" token [ drop 1 ] action parse -] unit-test +] must-fail -{ f } [ +[ "b" [ CHAR: a = ] satisfy parse -] unit-test +] must-fail { CHAR: a } [ "a" [ CHAR: a = ] satisfy parse ast>> @@ -155,9 +155,9 @@ IN: peg.tests "[a]" "[" token hide "a" token "]" token hide 3array seq parse ast>> ] unit-test -{ f } [ +[ "a]" "[" token hide "a" token "]" token hide 3array seq parse -] unit-test +] must-fail { V{ "1" "-" "1" } V{ "1" "+" "1" } } [ @@ -185,9 +185,9 @@ IN: peg.tests dupd 0 swap set-nth compile word? ] unit-test -{ f } [ +[ "A" [ drop t ] satisfy [ 66 >= ] semantic parse -] unit-test +] must-fail { CHAR: B } [ "B" [ drop t ] satisfy [ 66 >= ] semantic parse ast>> diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index a0f5fc05e8..a9695f90d8 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -280,7 +280,7 @@ SYMBOL: delayed ] with-compilation-unit ; : compiled-parse ( state word -- result ) - swap [ execute [ error-stack get throw ] unless* ] with-packrat ; inline + swap [ execute [ error-stack get first throw ] unless* ] with-packrat ; inline : parse ( input parser -- result ) dup word? [ compile ] unless compiled-parse ; From e47f944ccab3571c3fbc37700a5adf0954472f8b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 3 Jul 2008 17:38:28 +1200 Subject: [PATCH 14/25] Print error message nicely --- extra/peg/peg.factor | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index a9695f90d8..d388bbd124 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -19,6 +19,10 @@ C: parse-result C: parse-error C: parser +M: parse-error error. + "Peg parsing error at character position " write dup position>> number>string write + "." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ; + SYMBOL: error-stack : (merge-errors) ( a b -- c ) @@ -311,7 +315,7 @@ TUPLE: token-parser symbol ; dup >r ?head-slice [ r> f f add-error ] [ - drop input-slice input-from "Expected token '" r> append "'" append 1vector add-error f + drop input-slice input-from "token '" r> append "'" append 1vector add-error f ] if ; M: token-parser (compile) ( parser -- quot ) From 8aa7bc6d78a0c6d64b56a0a5fa78253961665671 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 4 Jul 2008 12:32:02 +1200 Subject: [PATCH 15/25] [EBNF ... EBNF] now does an implicit call --- extra/peg/ebnf/ebnf-tests.factor | 126 +++++++++++++++---------------- extra/peg/ebnf/ebnf.factor | 3 +- 2 files changed, 65 insertions(+), 64 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index a2807d20db..ba34248159 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -113,142 +113,142 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "ab" [EBNF foo='a' 'b' EBNF] call ast>> + "ab" [EBNF foo='a' 'b' EBNF] ast>> ] unit-test { V{ 1 "b" } } [ - "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call ast>> + "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] ast>> ] unit-test { V{ 1 2 } } [ - "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call ast>> + "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] ast>> ] unit-test { CHAR: A } [ - "A" [EBNF foo=[A-Z] EBNF] call ast>> + "A" [EBNF foo=[A-Z] EBNF] ast>> ] unit-test { CHAR: Z } [ - "Z" [EBNF foo=[A-Z] EBNF] call ast>> + "Z" [EBNF foo=[A-Z] EBNF] ast>> ] unit-test [ - "0" [EBNF foo=[A-Z] EBNF] call + "0" [EBNF foo=[A-Z] EBNF] ] must-fail { CHAR: 0 } [ - "0" [EBNF foo=[^A-Z] EBNF] call ast>> + "0" [EBNF foo=[^A-Z] EBNF] ast>> ] unit-test [ - "A" [EBNF foo=[^A-Z] EBNF] call + "A" [EBNF foo=[^A-Z] EBNF] ] must-fail [ - "Z" [EBNF foo=[^A-Z] EBNF] call + "Z" [EBNF foo=[^A-Z] EBNF] ] must-fail { V{ "1" "+" "foo" } } [ - "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] call ast>> + "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] ast>> ] unit-test { "foo" } [ - "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF] call ast>> + "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF] ast>> ] unit-test { "foo" } [ - "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call ast>> + "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ast>> ] unit-test { "bar" } [ - "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call ast>> + "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ast>> ] unit-test { 6 } [ - "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF] call ast>> + "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF] ast>> ] unit-test { 6 } [ - "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF] call ast>> + "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF] ast>> ] unit-test { 10 } [ - { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> + { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ast>> ] unit-test [ - { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call + { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ] must-fail { 3 } [ - { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> + { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ast>> ] unit-test [ - "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call + "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ] must-fail { V{ "a" " " "b" } } [ - "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> + "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> ] unit-test { V{ "a" "\t" "b" } } [ - "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> ] unit-test { V{ "a" "\n" "b" } } [ - "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> ] unit-test { V{ "a" f "b" } } [ - "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> + "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> ] unit-test { V{ "a" " " "b" } } [ - "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> + "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> ] unit-test { V{ "a" "\t" "b" } } [ - "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> ] unit-test { V{ "a" "\n" "b" } } [ - "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> ] unit-test { V{ "a" "b" } } [ - "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> + "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> ] unit-test { V{ "a" "b" } } [ - "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> + "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> ] unit-test { V{ "a" "b" } } [ - "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> + "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> ] unit-test [ - "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call + "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ] must-fail { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call ast>> + "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ast>> ] unit-test { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call ast>> + "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ast>> ] unit-test { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ #! Test indirect left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call ast>> + "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] ast>> ] unit-test { t } [ @@ -303,85 +303,85 @@ main = Primary 'ebnf' compile must-infer { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo=(a "c") EBNF] call ast>> + "abc" [EBNF a="a" "b" foo=(a "c") EBNF] ast>> ] unit-test { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo={a "c"} EBNF] call ast>> + "abc" [EBNF a="a" "b" foo={a "c"} EBNF] ast>> ] unit-test { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo=a "c" EBNF] call ast>> + "abc" [EBNF a="a" "b" foo=a "c" EBNF] ast>> ] unit-test [ - "a bc" [EBNF a="a" "b" foo=(a "c") EBNF] call + "a bc" [EBNF a="a" "b" foo=(a "c") EBNF] ] must-fail [ - "a bc" [EBNF a="a" "b" foo=a "c" EBNF] call + "a bc" [EBNF a="a" "b" foo=a "c" EBNF] ] must-fail [ - "a bc" [EBNF a="a" "b" foo={a "c"} EBNF] call + "a bc" [EBNF a="a" "b" foo={a "c"} EBNF] ] must-fail [ - "ab c" [EBNF a="a" "b" foo=a "c" EBNF] call + "ab c" [EBNF a="a" "b" foo=a "c" EBNF] ] must-fail { V{ V{ "a" "b" } "c" } } [ - "ab c" [EBNF a="a" "b" foo={a "c"} EBNF] call ast>> + "ab c" [EBNF a="a" "b" foo={a "c"} EBNF] ast>> ] unit-test [ - "ab c" [EBNF a="a" "b" foo=(a "c") EBNF] call + "ab c" [EBNF a="a" "b" foo=(a "c") EBNF] ] must-fail [ - "a b c" [EBNF a="a" "b" foo=a "c" EBNF] call + "a b c" [EBNF a="a" "b" foo=a "c" EBNF] ] must-fail [ - "a b c" [EBNF a="a" "b" foo=(a "c") EBNF] call + "a b c" [EBNF a="a" "b" foo=(a "c") EBNF] ] must-fail [ - "a b c" [EBNF a="a" "b" foo={a "c"} EBNF] call + "a b c" [EBNF a="a" "b" foo={a "c"} EBNF] ] must-fail { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ - "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] call ast>> + "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ast>> ] unit-test { V{ } } [ - "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF] call ast>> + "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ast>> ] unit-test { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ - "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF] call ast>> + "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ast>> ] unit-test { V{ } } [ - "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF] call ast>> + "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ast>> ] unit-test { V{ "a" "a" "a" } } [ - "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] call ast>> + "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] ast>> ] unit-test { t } [ - "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] call ast>> - "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] call ast>> = + "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] ast>> + "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] ast>> = ] unit-test { V{ "a" "a" "a" } } [ - "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] call ast>> + "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] ast>> ] unit-test { t } [ - "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] call ast>> - "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] call ast>> = + "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] ast>> + "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] ast>> = ] unit-test { t } [ @@ -445,11 +445,11 @@ foo= 'd' ] unit-test { t } [ - "USING: kernel peg.ebnf ; [EBNF foo='a' '\n' => [[ drop \"\n\" ]] EBNF]" eval drop t + "USING: kernel peg.ebnf ; \"a\\n\" [EBNF foo='a' '\n' => [[ drop \"\n\" ]] EBNF]" eval drop t ] unit-test [ - "USING: peg.ebnf ; [EBNF foo='a' foo='b' EBNF]" eval drop + "USING: peg.ebnf ; \"ab\" [EBNF foo='a' foo='b' EBNF]" eval drop ] must-fail { t } [ @@ -460,7 +460,7 @@ foo= 'd' #! Tokenizer tests { V{ "a" CHAR: b } } [ - "ab" [EBNF tokenizer=default foo="a" . EBNF] call ast>> + "ab" [EBNF tokenizer=default foo="a" . EBNF] ast>> ] unit-test TUPLE: ast-number value ; @@ -488,7 +488,7 @@ Tok = Spaces (Number | Special ) tokenizer = foo=. tokenizer=default baz=. main = bar foo foo baz - EBNF] call ast>> + EBNF] ast>> ] unit-test { V{ CHAR: 5 "+" CHAR: 2 } } [ @@ -499,7 +499,7 @@ Tok = Spaces (Number | Special ) spaces=space* => [[ ignore ]] tokenizer=spaces (number | operator) main= . . . - EBNF] call ast>> + EBNF] ast>> ] unit-test { V{ CHAR: 5 "+" CHAR: 2 } } [ @@ -510,13 +510,13 @@ Tok = Spaces (Number | Special ) spaces=space* => [[ ignore ]] tokenizer=spaces (number | operator) main= . . . - EBNF] call ast>> + EBNF] ast>> ] unit-test { "++" } [ - "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] call ast>> + "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] ast>> ] unit-test { "\\" } [ - "\\" [EBNF foo="\\" EBNF] call ast>> + "\\" [EBNF foo="\\" EBNF] ast>> ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 610cffd273..2a6b55ad9d 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -523,7 +523,8 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) parse-result-ast transform dup dup parser [ main swap at compile ] with-variable [ compiled-parse ] curry [ with-scope ] curry ; -: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip parsed reset-tokenizer ; parsing +: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip + parsed \ call parsed reset-tokenizer ; parsing : EBNF: reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string From 4c1fe8f0b30b7adabb819cbb74fddce6f75bdf9f Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 4 Jul 2008 12:40:42 +1200 Subject: [PATCH 16/25] Add syntax to return a parser object --- extra/peg/ebnf/ebnf.factor | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 2a6b55ad9d..ff4bd2db61 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -518,11 +518,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) "Could not parse EBNF" throw ] if ; +: parse-ebnf ( string -- hashtable ) + 'ebnf' parse check-parse-result ast>> transform ; + : ebnf>quot ( string -- hashtable quot ) - 'ebnf' parse check-parse-result - parse-result-ast transform dup dup parser [ main swap at compile ] with-variable + parse-ebnf dup dup parser [ main swap at compile ] with-variable [ compiled-parse ] curry [ with-scope ] curry ; +: " reset-tokenizer parse-multiline-string parse-ebnf main swap at + parsed reset-tokenizer ; parsing + : [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip parsed \ call parsed reset-tokenizer ; parsing From 72bd6b4dc852cc46b9c9a73946f19e78f7fd5e82 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 4 Jul 2008 14:20:19 +1200 Subject: [PATCH 17/25] Fix peg tests --- extra/peg/ebnf/ebnf-tests.factor | 44 ++++++++++++------------ extra/peg/ebnf/ebnf.factor | 4 +-- extra/peg/parsers/parsers-tests.factor | 28 ++++++++-------- extra/peg/parsers/parsers.factor | 14 ++++---- extra/peg/peg-tests.factor | 46 +++++++++++++------------- extra/peg/peg.factor | 5 ++- extra/peg/pl0/pl0-tests.factor | 18 +++++----- extra/peg/search/search.factor | 11 +++--- 8 files changed, 85 insertions(+), 85 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index ba34248159..ef90929b79 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -7,11 +7,11 @@ USING: kernel tools.test peg peg.ebnf words math math.parser IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ - "abc" 'non-terminal' parse ast>> + "abc" 'non-terminal' parse ] unit-test { T{ ebnf-terminal f "55" } } [ - "'55'" 'terminal' parse ast>> + "'55'" 'terminal' parse ] unit-test { @@ -22,7 +22,7 @@ IN: peg.ebnf.tests } } } [ - "digit = '1' | '2'" 'rule' parse ast>> + "digit = '1' | '2'" 'rule' parse ] unit-test { @@ -33,7 +33,7 @@ IN: peg.ebnf.tests } } } [ - "digit = '1' '2'" 'rule' parse ast>> + "digit = '1' '2'" 'rule' parse ] unit-test { @@ -46,7 +46,7 @@ IN: peg.ebnf.tests } } } [ - "one two | three" 'choice' parse ast>> + "one two | three" 'choice' parse ] unit-test { @@ -61,7 +61,7 @@ IN: peg.ebnf.tests } } } [ - "one {two | three}" 'choice' parse ast>> + "one {two | three}" 'choice' parse ] unit-test { @@ -81,7 +81,7 @@ IN: peg.ebnf.tests } } } [ - "one ((two | three) four)*" 'choice' parse ast>> + "one ((two | three) four)*" 'choice' parse ] unit-test { @@ -93,23 +93,23 @@ IN: peg.ebnf.tests } } } [ - "one ( two )? three" 'choice' parse ast>> + "one ( two )? three" 'choice' parse ] unit-test { "foo" } [ - "\"foo\"" 'identifier' parse ast>> + "\"foo\"" 'identifier' parse ] unit-test { "foo" } [ - "'foo'" 'identifier' parse ast>> + "'foo'" 'identifier' parse ] unit-test { "foo" } [ - "foo" 'non-terminal' parse ast>> ebnf-non-terminal-symbol + "foo" 'non-terminal' parse ebnf-non-terminal-symbol ] unit-test { "foo" } [ - "foo]" 'non-terminal' parse ast>> ebnf-non-terminal-symbol + "foo]" 'non-terminal' parse ebnf-non-terminal-symbol ] unit-test { V{ "a" "b" } } [ @@ -252,7 +252,7 @@ IN: peg.ebnf.tests ] unit-test { t } [ - "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' parse parse-result-remaining empty? + "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' (parse) remaining>> empty? ] unit-test EBNF: primary @@ -385,29 +385,29 @@ main = Primary ] unit-test { t } [ - "number=(digit)+:n 'a'" 'ebnf' parse remaining>> length zero? + "number=(digit)+:n 'a'" 'ebnf' (parse) remaining>> length zero? ] unit-test { t } [ - "number=(digit)+ 'a'" 'ebnf' parse remaining>> length zero? + "number=(digit)+ 'a'" 'ebnf' (parse) remaining>> length zero? ] unit-test { t } [ - "number=digit+ 'a'" 'ebnf' parse remaining>> length zero? + "number=digit+ 'a'" 'ebnf' (parse) remaining>> length zero? ] unit-test { t } [ - "number=digit+:n 'a'" 'ebnf' parse remaining>> length zero? + "number=digit+:n 'a'" 'ebnf' (parse) remaining>> length zero? ] unit-test { t } [ - "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse ast>> - "foo=name:n !(keyword) => [[ n ]]" 'rule' parse ast>> = + "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse + "foo=name:n !(keyword) => [[ n ]]" 'rule' parse = ] unit-test { t } [ - "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse ast>> - "foo=!(keyword) name:n => [[ n ]]" 'rule' parse ast>> = + "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse + "foo=!(keyword) name:n => [[ n ]]" 'rule' parse = ] unit-test << @@ -455,7 +455,7 @@ foo= 'd' { t } [ #! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule #! if a var in a namespace is set. This unit test is to remind me to fix this. - [ "fail" "foo" set "foo='a'" 'ebnf' parse ast>> transform drop t ] with-scope + [ "fail" "foo" set "foo='a'" 'ebnf' parse transform drop t ] with-scope ] unit-test #! Tokenizer tests diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index ff4bd2db61..2a57015fa6 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -504,7 +504,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) ] [ ] make box ; : transform-ebnf ( string -- object ) - 'ebnf' parse parse-result-ast transform ; + 'ebnf' parse transform ; : check-parse-result ( result -- result ) dup [ @@ -519,7 +519,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) ] if ; : parse-ebnf ( string -- hashtable ) - 'ebnf' parse check-parse-result ast>> transform ; + 'ebnf' (parse) check-parse-result ast>> transform ; : ebnf>quot ( string -- hashtable quot ) parse-ebnf dup dup parser [ main swap at compile ] with-variable diff --git a/extra/peg/parsers/parsers-tests.factor b/extra/peg/parsers/parsers-tests.factor index 0cf3ad8b17..20d19c9a64 100644 --- a/extra/peg/parsers/parsers-tests.factor +++ b/extra/peg/parsers/parsers-tests.factor @@ -2,50 +2,50 @@ USING: kernel peg peg.parsers tools.test accessors ; IN: peg.parsers.tests { V{ "a" } } -[ "a" "a" token "," token list-of parse ast>> ] unit-test +[ "a" "a" token "," token list-of parse ] unit-test { V{ "a" "a" "a" "a" } } -[ "a,a,a,a" "a" token "," token list-of parse ast>> ] unit-test +[ "a,a,a,a" "a" token "," token list-of parse ] unit-test [ "a" "a" token "," token list-of-many parse ] must-fail { V{ "a" "a" "a" "a" } } -[ "a,a,a,a" "a" token "," token list-of-many parse ast>> ] unit-test +[ "a,a,a,a" "a" token "," token list-of-many parse ] unit-test [ "aaa" "a" token 4 exactly-n parse ] must-fail { V{ "a" "a" "a" "a" } } -[ "aaaa" "a" token 4 exactly-n parse ast>> ] unit-test +[ "aaaa" "a" token 4 exactly-n parse ] unit-test [ "aaa" "a" token 4 at-least-n parse ] must-fail { V{ "a" "a" "a" "a" } } -[ "aaaa" "a" token 4 at-least-n parse ast>> ] unit-test +[ "aaaa" "a" token 4 at-least-n parse ] unit-test { V{ "a" "a" "a" "a" "a" } } -[ "aaaaa" "a" token 4 at-least-n parse ast>> ] unit-test +[ "aaaaa" "a" token 4 at-least-n parse ] unit-test { V{ "a" "a" "a" "a" } } -[ "aaaa" "a" token 4 at-most-n parse ast>> ] unit-test +[ "aaaa" "a" token 4 at-most-n parse ] unit-test { V{ "a" "a" "a" "a" } } -[ "aaaaa" "a" token 4 at-most-n parse ast>> ] unit-test +[ "aaaaa" "a" token 4 at-most-n parse ] unit-test { V{ "a" "a" "a" } } -[ "aaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test +[ "aaa" "a" token 3 4 from-m-to-n parse ] unit-test { V{ "a" "a" "a" "a" } } -[ "aaaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test +[ "aaaa" "a" token 3 4 from-m-to-n parse ] unit-test { V{ "a" "a" "a" "a" } } -[ "aaaaa" "a" token 3 4 from-m-to-n parse ast>> ] unit-test +[ "aaaaa" "a" token 3 4 from-m-to-n parse ] unit-test { 97 } -[ "a" any-char parse ast>> ] unit-test +[ "a" any-char parse ] unit-test { V{ } } -[ "" epsilon parse ast>> ] unit-test +[ "" epsilon parse ] unit-test { "a" } [ - "a" "a" token just parse ast>> + "a" "a" token just parse ] unit-test \ No newline at end of file diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index da44c12e8f..351e3b5fc1 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays math.parser unicode.categories sequences.deep peg peg.private - peg.search math.ranges words memoize ; + peg.search math.ranges words ; IN: peg.parsers TUPLE: just-parser p1 ; @@ -19,7 +19,7 @@ TUPLE: just-parser p1 ; M: just-parser (compile) ( parser -- quot ) just-parser-p1 compiled-parser just-pattern curry ; -MEMO: just ( parser -- parser ) +: just ( parser -- parser ) just-parser boa init-parser ; : 1token ( ch -- parser ) 1string token ; @@ -45,10 +45,10 @@ MEMO: just ( parser -- parser ) PRIVATE> -MEMO: exactly-n ( parser n -- parser' ) +: exactly-n ( parser n -- parser' ) swap seq ; -MEMO: at-most-n ( parser n -- parser' ) +: at-most-n ( parser n -- parser' ) dup zero? [ 2drop epsilon ] [ @@ -56,15 +56,15 @@ MEMO: at-most-n ( parser n -- parser' ) -rot 1- at-most-n 2choice ] if ; -MEMO: at-least-n ( parser n -- parser' ) +: at-least-n ( parser n -- parser' ) dupd exactly-n swap repeat0 2seq [ flatten-vectors ] action ; -MEMO: from-m-to-n ( parser m n -- parser' ) +: from-m-to-n ( parser m n -- parser' ) >r [ exactly-n ] 2keep r> swap - at-most-n 2seq [ flatten-vectors ] action ; -MEMO: pack ( begin body end -- parser ) +: pack ( begin body end -- parser ) >r >r hide r> r> hide 3seq [ first ] action ; : surrounded-by ( parser begin end -- parser' ) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 466da83b6e..f9e4a0d4a6 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -10,7 +10,7 @@ IN: peg.tests ] must-fail { "begin" "end" } [ - "beginend" "begin" token parse + "beginend" "begin" token (parse) { ast>> remaining>> } get-slots >string ] unit-test @@ -24,11 +24,11 @@ IN: peg.tests ] must-fail { CHAR: a } [ - "abcd" CHAR: a CHAR: z range parse ast>> + "abcd" CHAR: a CHAR: z range parse ] unit-test { CHAR: z } [ - "zbcd" CHAR: a CHAR: z range parse ast>> + "zbcd" CHAR: a CHAR: z range parse ] unit-test [ @@ -36,15 +36,15 @@ IN: peg.tests ] must-fail { V{ "g" "o" } } [ - "good" "g" token "o" token 2array seq parse ast>> + "good" "g" token "o" token 2array seq parse ] unit-test { "a" } [ - "abcd" "a" token "b" token 2array choice parse ast>> + "abcd" "a" token "b" token 2array choice parse ] unit-test { "b" } [ - "bbcd" "a" token "b" token 2array choice parse ast>> + "bbcd" "a" token "b" token 2array choice parse ] unit-test [ @@ -56,15 +56,15 @@ IN: peg.tests ] must-fail { 0 } [ - "" "a" token repeat0 parse ast>> length + "" "a" token repeat0 parse length ] unit-test { 0 } [ - "b" "a" token repeat0 parse ast>> length + "b" "a" token repeat0 parse length ] unit-test { V{ "a" "a" "a" } } [ - "aaab" "a" token repeat0 parse ast>> + "aaab" "a" token repeat0 parse ] unit-test [ @@ -76,15 +76,15 @@ IN: peg.tests ] must-fail { V{ "a" "a" "a" } } [ - "aaab" "a" token repeat1 parse ast>> + "aaab" "a" token repeat1 parse ] unit-test { V{ "a" "b" } } [ - "ab" "a" token optional "b" token 2array seq parse ast>> + "ab" "a" token optional "b" token 2array seq parse ] unit-test { V{ f "b" } } [ - "b" "a" token optional "b" token 2array seq parse ast>> + "b" "a" token optional "b" token 2array seq parse ] unit-test [ @@ -92,7 +92,7 @@ IN: peg.tests ] must-fail { V{ CHAR: a CHAR: b } } [ - "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse ast>> + "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse ] unit-test [ @@ -124,11 +124,11 @@ IN: peg.tests ] must-fail { 1 } [ - "a" "a" token [ drop 1 ] action parse ast>> + "a" "a" token [ drop 1 ] action parse ] unit-test { V{ 1 1 } } [ - "aa" "a" token [ drop 1 ] action dup 2array seq parse ast>> + "aa" "a" token [ drop 1 ] action dup 2array seq parse ] unit-test [ @@ -140,19 +140,19 @@ IN: peg.tests ] must-fail { CHAR: a } [ - "a" [ CHAR: a = ] satisfy parse ast>> + "a" [ CHAR: a = ] satisfy parse ] unit-test { "a" } [ - " a" "a" token sp parse ast>> + " a" "a" token sp parse ] unit-test { "a" } [ - "a" "a" token sp parse ast>> + "a" "a" token sp parse ] unit-test { V{ "a" } } [ - "[a]" "[" token hide "a" token "]" token hide 3array seq parse ast>> + "[a]" "[" token hide "a" token "]" token hide 3array seq parse ] unit-test [ @@ -165,8 +165,8 @@ IN: peg.tests [ "1" token , "-" token , "1" token , ] seq* , [ "1" token , "+" token , "1" token , ] seq* , ] choice* - "1-1" over parse ast>> swap - "1+1" swap parse ast>> + "1-1" over parse swap + "1+1" swap parse ] unit-test : expr ( -- parser ) @@ -175,7 +175,7 @@ IN: peg.tests [ expr ] delay "+" token "1" token 3seq "1" token 2choice ; { V{ V{ "1" "+" "1" } "+" "1" } } [ - "1+1+1" expr parse ast>> + "1+1+1" expr parse ] unit-test { t } [ @@ -190,6 +190,6 @@ IN: peg.tests ] must-fail { CHAR: B } [ - "B" [ drop t ] satisfy [ 66 >= ] semantic parse ast>> + "B" [ drop t ] satisfy [ 66 >= ] semantic parse ] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index d388bbd124..0847c57299 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -286,9 +286,12 @@ SYMBOL: delayed : compiled-parse ( state word -- result ) swap [ execute [ error-stack get first throw ] unless* ] with-packrat ; inline -: parse ( input parser -- result ) +: (parse) ( input parser -- result ) dup word? [ compile ] unless compiled-parse ; +: parse ( input parser -- ast ) + (parse) ast>> ; + > empty? + "CONST foo = 1;" "block" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "VAR foo;" "block" \ pl0 rule parse remaining>> empty? + "VAR foo;" "block" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "VAR foo,bar , baz;" "block" \ pl0 rule parse remaining>> empty? + "VAR foo,bar , baz;" "block" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "foo := 5" "statement" \ pl0 rule parse remaining>> empty? + "foo := 5" "statement" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "BEGIN foo := 5 END" "statement" \ pl0 rule parse remaining>> empty? + "BEGIN foo := 5 END" "statement" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse remaining>> empty? + "IF 1=1 THEN foo := 5" "statement" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse remaining>> empty? + "WHILE 1=1 DO foo := 5" "statement" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse remaining>> empty? + "WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ - "PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse remaining>> empty? + "PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule (parse) remaining>> empty? ] unit-test { t } [ diff --git a/extra/peg/search/search.factor b/extra/peg/search/search.factor index 7ab7e83d12..04e4affe39 100755 --- a/extra/peg/search/search.factor +++ b/extra/peg/search/search.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2006 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel math io io.streams.string sequences strings -combinators peg memoize arrays ; +combinators peg memoize arrays continuations ; IN: peg.search : tree-write ( object -- ) @@ -16,15 +16,12 @@ MEMO: any-char-parser ( -- parser ) [ drop t ] satisfy ; : search ( string parser -- seq ) - any-char-parser [ drop f ] action 2array choice repeat0 parse dup [ - parse-result-ast sift - ] [ - drop { } - ] if ; + any-char-parser [ drop f ] action 2array choice repeat0 + [ parse sift ] [ 3drop { } ] recover ; : (replace) ( string parser -- seq ) - any-char-parser 2array choice repeat0 parse parse-result-ast sift ; + any-char-parser 2array choice repeat0 parse sift ; : replace ( string parser -- result ) [ (replace) [ tree-write ] each ] with-string-writer ; From f3145c5961dab694f51ba8a1845362d5dcb6a1f9 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 4 Jul 2008 15:48:52 +1200 Subject: [PATCH 18/25] [EBNF and EBNF: now return ast --- extra/peg/ebnf/ebnf-tests.factor | 112 +++++++++--------- extra/peg/ebnf/ebnf.factor | 2 +- extra/peg/javascript/javascript.factor | 6 +- .../peg/javascript/parser/parser-tests.factor | 10 +- .../tokenizer/tokenizer-tests.factor | 4 +- 5 files changed, 65 insertions(+), 69 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index ef90929b79..7f14293a15 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -113,23 +113,23 @@ IN: peg.ebnf.tests ] unit-test { V{ "a" "b" } } [ - "ab" [EBNF foo='a' 'b' EBNF] ast>> + "ab" [EBNF foo='a' 'b' EBNF] ] unit-test { V{ 1 "b" } } [ - "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] ast>> + "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] ] unit-test { V{ 1 2 } } [ - "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] ast>> + "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] ] unit-test { CHAR: A } [ - "A" [EBNF foo=[A-Z] EBNF] ast>> + "A" [EBNF foo=[A-Z] EBNF] ] unit-test { CHAR: Z } [ - "Z" [EBNF foo=[A-Z] EBNF] ast>> + "Z" [EBNF foo=[A-Z] EBNF] ] unit-test [ @@ -137,7 +137,7 @@ IN: peg.ebnf.tests ] must-fail { CHAR: 0 } [ - "0" [EBNF foo=[^A-Z] EBNF] ast>> + "0" [EBNF foo=[^A-Z] EBNF] ] unit-test [ @@ -149,31 +149,31 @@ IN: peg.ebnf.tests ] must-fail { V{ "1" "+" "foo" } } [ - "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] ast>> + "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] ] unit-test { "foo" } [ - "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF] ast>> + "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF] ] unit-test { "foo" } [ - "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ast>> + "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ] unit-test { "bar" } [ - "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ast>> + "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] ] unit-test { 6 } [ - "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF] ast>> + "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF] ] unit-test { 6 } [ - "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF] ast>> + "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF] ] unit-test { 10 } [ - { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ast>> + { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ] unit-test [ @@ -181,7 +181,7 @@ IN: peg.ebnf.tests ] must-fail { 3 } [ - { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ast>> + { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] ] unit-test [ @@ -189,44 +189,44 @@ IN: peg.ebnf.tests ] must-fail { V{ "a" " " "b" } } [ - "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> + "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ] unit-test { V{ "a" "\t" "b" } } [ - "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ] unit-test { V{ "a" "\n" "b" } } [ - "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ast>> + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] ] unit-test { V{ "a" f "b" } } [ - "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> + "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ] unit-test { V{ "a" " " "b" } } [ - "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> + "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ] unit-test { V{ "a" "\t" "b" } } [ - "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> + "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ] unit-test { V{ "a" "\n" "b" } } [ - "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ast>> + "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] ] unit-test { V{ "a" "b" } } [ - "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> + "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ] unit-test { V{ "a" "b" } } [ - "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> + "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ] unit-test { V{ "a" "b" } } [ - "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ast>> + "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] ] unit-test [ @@ -236,19 +236,19 @@ IN: peg.ebnf.tests { V{ V{ 49 } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ast>> + "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ] unit-test { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ #! Test direct left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ast>> + "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] ] unit-test { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ #! Test indirect left recursion. #! Using packrat, so first part of expr fails, causing 2nd choice to be used - "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] ast>> + "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] ] unit-test { t } [ @@ -281,37 +281,37 @@ main = Primary ;EBNF { "this" } [ - "this" primary ast>> + "this" primary ] unit-test { V{ "this" "." "x" } } [ - "this.x" primary ast>> + "this.x" primary ] unit-test { V{ V{ "this" "." "x" } "." "y" } } [ - "this.x.y" primary ast>> + "this.x.y" primary ] unit-test { V{ V{ "this" "." "x" } "." "m" "(" ")" } } [ - "this.x.m()" primary ast>> + "this.x.m()" primary ] unit-test { V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [ - "x[i][j].y" primary ast>> + "x[i][j].y" primary ] unit-test 'ebnf' compile must-infer { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo=(a "c") EBNF] ast>> + "abc" [EBNF a="a" "b" foo=(a "c") EBNF] ] unit-test { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo={a "c"} EBNF] ast>> + "abc" [EBNF a="a" "b" foo={a "c"} EBNF] ] unit-test { V{ V{ "a" "b" } "c" } } [ - "abc" [EBNF a="a" "b" foo=a "c" EBNF] ast>> + "abc" [EBNF a="a" "b" foo=a "c" EBNF] ] unit-test [ @@ -331,7 +331,7 @@ main = Primary ] must-fail { V{ V{ "a" "b" } "c" } } [ - "ab c" [EBNF a="a" "b" foo={a "c"} EBNF] ast>> + "ab c" [EBNF a="a" "b" foo={a "c"} EBNF] ] unit-test [ @@ -351,37 +351,37 @@ main = Primary ] must-fail { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ - "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ast>> + "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ] unit-test { V{ } } [ - "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ast>> + "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ] unit-test { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ - "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ast>> + "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF] ] unit-test { V{ } } [ - "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ast>> + "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF] ] unit-test { V{ "a" "a" "a" } } [ - "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] ast>> + "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] ] unit-test { t } [ - "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] ast>> - "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] ast>> = + "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] + "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] = ] unit-test { V{ "a" "a" "a" } } [ - "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] ast>> + "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] ] unit-test { t } [ - "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] ast>> - "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] ast>> = + "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] + "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] = ] unit-test { t } [ @@ -429,19 +429,19 @@ foo= 'd' ;EBNF { "a" } [ - "a" parser1 ast>> + "a" parser1 ] unit-test { V{ "a" "b" } } [ - "ab" parser2 ast>> + "ab" parser2 ] unit-test { V{ "a" "c" } } [ - "ac" parser3 ast>> + "ac" parser3 ] unit-test { V{ CHAR: a "d" } } [ - "ad" parser4 ast>> + "ad" parser4 ] unit-test { t } [ @@ -460,7 +460,7 @@ foo= 'd' #! Tokenizer tests { V{ "a" CHAR: b } } [ - "ab" [EBNF tokenizer=default foo="a" . EBNF] ast>> + "ab" [EBNF tokenizer=default foo="a" . EBNF] ] unit-test TUPLE: ast-number value ; @@ -488,7 +488,7 @@ Tok = Spaces (Number | Special ) tokenizer = foo=. tokenizer=default baz=. main = bar foo foo baz - EBNF] ast>> + EBNF] ] unit-test { V{ CHAR: 5 "+" CHAR: 2 } } [ @@ -499,7 +499,7 @@ Tok = Spaces (Number | Special ) spaces=space* => [[ ignore ]] tokenizer=spaces (number | operator) main= . . . - EBNF] ast>> + EBNF] ] unit-test { V{ CHAR: 5 "+" CHAR: 2 } } [ @@ -510,13 +510,13 @@ Tok = Spaces (Number | Special ) spaces=space* => [[ ignore ]] tokenizer=spaces (number | operator) main= . . . - EBNF] ast>> + EBNF] ] unit-test { "++" } [ - "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] ast>> + "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] ] unit-test { "\\" } [ - "\\" [EBNF foo="\\" EBNF] ast>> + "\\" [EBNF foo="\\" EBNF] ] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 2a57015fa6..2a75fcccc0 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -523,7 +523,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : ebnf>quot ( string -- hashtable quot ) parse-ebnf dup dup parser [ main swap at compile ] with-variable - [ compiled-parse ] curry [ with-scope ] curry ; + [ compiled-parse ] curry [ with-scope ast>> ] curry ; : " reset-tokenizer parse-multiline-string parse-ebnf main swap at parsed reset-tokenizer ; parsing diff --git a/extra/peg/javascript/javascript.factor b/extra/peg/javascript/javascript.factor index 8fe0538eae..4a919cf39f 100644 --- a/extra/peg/javascript/javascript.factor +++ b/extra/peg/javascript/javascript.factor @@ -4,8 +4,4 @@ USING: kernel accessors peg.javascript.tokenizer peg.javascript.parser ; IN: peg.javascript : parse-javascript ( string -- ast ) - javascript [ - ast>> - ] [ - "Unable to parse JavaScript" throw - ] if* ; + javascript ; diff --git a/extra/peg/javascript/parser/parser-tests.factor b/extra/peg/javascript/parser/parser-tests.factor index fd0e27b6d4..769dc41f78 100644 --- a/extra/peg/javascript/parser/parser-tests.factor +++ b/extra/peg/javascript/parser/parser-tests.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. ! USING: kernel tools.test peg peg.javascript.ast peg.javascript.parser - accessors multiline sequences math ; + accessors multiline sequences math peg.ebnf ; IN: peg.javascript.parser.tests \ javascript must-infer @@ -23,14 +23,14 @@ IN: peg.javascript.parser.tests } } } [ - "123; 'hello'; foo(x);" javascript ast>> + "123; 'hello'; foo(x);" javascript ] unit-test { t } [ <" var x=5 var y=10 -"> javascript remaining>> length zero? +"> main \ javascript rule (parse) remaining>> length zero? ] unit-test @@ -41,7 +41,7 @@ function foldl(f, initial, seq) { initial = f(initial, seq[i]); return initial; } -"> javascript remaining>> length zero? +"> main \ javascript rule (parse) remaining>> length zero? ] unit-test { t } [ @@ -52,6 +52,6 @@ ParseState.prototype.from = function(index) { r.length = this.length - index; return r; } -"> javascript remaining>> length zero? +"> main \ javascript rule (parse) remaining>> length zero? ] unit-test diff --git a/extra/peg/javascript/tokenizer/tokenizer-tests.factor b/extra/peg/javascript/tokenizer/tokenizer-tests.factor index a61125d08c..f0080a31b2 100644 --- a/extra/peg/javascript/tokenizer/tokenizer-tests.factor +++ b/extra/peg/javascript/tokenizer/tokenizer-tests.factor @@ -19,9 +19,9 @@ IN: peg.javascript.tokenizer.tests ";" } } [ - "123; 'hello'; foo(x);" tokenize-javascript ast>> + "123; 'hello'; foo(x);" tokenize-javascript ] unit-test { V{ T{ ast-regexp f "<(w+)[^>]*?)/>" "g" } } } [ - "/<(\\w+)[^>]*?)\\/>/g" tokenize-javascript ast>> + "/<(\\w+)[^>]*?)\\/>/g" tokenize-javascript ] unit-test \ No newline at end of file From 7f4fe7669861497137569e36130854dc77b5b872 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 4 Jul 2008 15:55:23 +1200 Subject: [PATCH 19/25] More peg test fixes --- extra/peg/expr/expr-tests.factor | 10 +++++----- extra/peg/expr/expr.factor | 4 ---- extra/peg/pl0/pl0-tests.factor | 4 ++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/extra/peg/expr/expr-tests.factor b/extra/peg/expr/expr-tests.factor index b6f3163bf4..59c70cd358 100644 --- a/extra/peg/expr/expr-tests.factor +++ b/extra/peg/expr/expr-tests.factor @@ -5,21 +5,21 @@ USING: kernel tools.test peg peg.expr multiline sequences ; IN: peg.expr.tests { 5 } [ - "2+3" eval-expr + "2+3" expr ] unit-test { 6 } [ - "2*3" eval-expr + "2*3" expr ] unit-test { 14 } [ - "2+3*4" eval-expr + "2+3*4" expr ] unit-test { 17 } [ - "2+3*4+3" eval-expr + "2+3*4+3" expr ] unit-test { 23 } [ - "2+3*(4+3)" eval-expr + "2+3*(4+3)" expr ] unit-test diff --git a/extra/peg/expr/expr.factor b/extra/peg/expr/expr.factor index e2df60ea9a..8b10b4fc0c 100644 --- a/extra/peg/expr/expr.factor +++ b/extra/peg/expr/expr.factor @@ -18,7 +18,3 @@ exp = exp "+" fac => [[ first3 nip + ]] | exp "-" fac => [[ first3 nip - ]] | fac ;EBNF - -: eval-expr ( string -- number ) - expr ast>> ; - diff --git a/extra/peg/pl0/pl0-tests.factor b/extra/peg/pl0/pl0-tests.factor index 4ba550a26c..e84d37e5d4 100644 --- a/extra/peg/pl0/pl0-tests.factor +++ b/extra/peg/pl0/pl0-tests.factor @@ -58,7 +58,7 @@ BEGIN x := x + 1; END END. -"> pl0 remaining>> empty? +"> main \ pl0 rule (parse) remaining>> empty? ] unit-test { f } [ @@ -124,5 +124,5 @@ BEGIN y := 36; CALL gcd; END. - "> pl0 remaining>> empty? + "> main \ pl0 rule (parse) remaining>> empty? ] unit-test \ No newline at end of file From d92c19f694057eccc8aa8656a5a73ef46f26c3ab Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 8 Jul 2008 16:10:06 +1200 Subject: [PATCH 20/25] Remove delegate usage from pegs --- extra/peg/peg.factor | 106 ++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 0847c57299..3882315dc9 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -10,14 +10,13 @@ USE: prettyprint TUPLE: parse-result remaining ast ; TUPLE: parse-error position messages ; -TUPLE: parser id compiled ; -M: parser equal? [ id>> ] bi@ = ; +TUPLE: parser peg compiled id ; +M: parser equal? [ id>> ] bi@ = ; M: parser hashcode* id>> hashcode* ; -C: parse-result -C: parse-error -C: parser +C: parse-result +C: parse-error M: parse-error error. "Peg parsing error at character position " write dup position>> number>string write @@ -59,11 +58,16 @@ SYMBOL: heads : failed? ( obj -- ? ) fail = ; -: delegates ( -- cache ) - \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ; +: peg-cache ( -- cache ) + #! Holds a hashtable mapping a peg tuple to + #! the parser tuple for that peg. The parser tuple + #! holds a unique id and the compiled form of that peg. + \ peg-cache get-global [ + H{ } clone dup \ peg-cache set-global + ] unless* ; : reset-pegs ( -- ) - H{ } clone \ delegates set-global ; + H{ } clone \ peg-cache set-global ; reset-pegs @@ -239,7 +243,7 @@ C: peg-head ] H{ } make-assoc swap bind ; inline -GENERIC: (compile) ( parser -- quot ) +GENERIC: (compile) ( peg -- quot ) : execute-parser ( word -- result ) pos get apply-rule dup failed? [ @@ -251,7 +255,7 @@ GENERIC: (compile) ( parser -- quot ) : parser-body ( parser -- quot ) #! Return the body of the word that is the compiled version #! of the parser. - gensym 2dup swap (compile) 0 1 define-declared swap dupd "peg" set-word-prop + gensym 2dup swap peg>> (compile) 0 1 define-declared swap dupd "peg" set-word-prop [ execute-parser ] curry ; : compiled-parser ( parser -- word ) @@ -304,12 +308,13 @@ SYMBOL: id 1 id set-global 0 ] if* ; -: init-parser ( parser -- parser ) - #! Set the delegate for the parser. Equivalent parsers - #! get a delegate with the same id. - dup clone delegates [ - drop next-id f - ] cache over set-delegate ; +: wrap-peg ( peg -- parser ) + #! Wrap a parser tuple around the peg object. + #! Look for an existing parser tuple for that + #! peg object. + peg-cache [ + f next-id parser boa + ] cache ; TUPLE: token-parser symbol ; @@ -321,7 +326,7 @@ TUPLE: token-parser symbol ; drop input-slice input-from "token '" r> append "'" append 1vector add-error f ] if ; -M: token-parser (compile) ( parser -- quot ) +M: token-parser (compile) ( peg -- quot ) symbol>> '[ input-slice , parse-token ] ; TUPLE: satisfy-parser quot ; @@ -338,7 +343,7 @@ TUPLE: satisfy-parser quot ; ] if ; inline -M: satisfy-parser (compile) ( parser -- quot ) +M: satisfy-parser (compile) ( peg -- quot ) quot>> '[ input-slice , parse-satisfy ] ; TUPLE: range-parser min max ; @@ -354,7 +359,7 @@ TUPLE: range-parser min max ; ] if ] if ; -M: range-parser (compile) ( parser -- quot ) +M: range-parser (compile) ( peg -- quot ) [ min>> ] [ max>> ] bi '[ input-slice , , parse-range ] ; TUPLE: seq-parser parsers ; @@ -381,7 +386,7 @@ TUPLE: seq-parser parsers ; 2drop f ] if ; inline -M: seq-parser (compile) ( parser -- quot ) +M: seq-parser (compile) ( peg -- quot ) [ [ input-slice V{ } clone ] % parsers>> unclip compiled-parser 1quotation , \ parse-seq-element , [ @@ -390,7 +395,7 @@ M: seq-parser (compile) ( parser -- quot ) TUPLE: choice-parser parsers ; -M: choice-parser (compile) ( parser -- quot ) +M: choice-parser (compile) ( peg -- quot ) [ f , parsers>> [ compiled-parser ] map @@ -408,7 +413,7 @@ TUPLE: repeat0-parser p1 ; nip ] if* ; inline -M: repeat0-parser (compile) ( parser -- quot ) +M: repeat0-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ input-slice V{ } clone , swap (repeat) ] ; @@ -422,7 +427,7 @@ TUPLE: repeat1-parser p1 ; f ] if* ; -M: repeat1-parser (compile) ( parser -- quot ) +M: repeat1-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ input-slice V{ } clone , swap (repeat) repeat1-empty-check ] ; @@ -432,7 +437,7 @@ TUPLE: optional-parser p1 ; : check-optional ( result -- result ) [ input-slice f ] unless* ; -M: optional-parser (compile) ( parser -- quot ) +M: optional-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ @ check-optional ] ; TUPLE: semantic-parser p1 quot ; @@ -444,7 +449,7 @@ TUPLE: semantic-parser p1 quot ; drop ] if ; inline -M: semantic-parser (compile) ( parser -- quot ) +M: semantic-parser (compile) ( peg -- quot ) [ p1>> compiled-parser 1quotation ] [ quot>> ] bi '[ @ , check-semantic ] ; @@ -453,7 +458,7 @@ TUPLE: ensure-parser p1 ; : check-ensure ( old-input result -- result ) [ ignore ] [ drop f ] if ; -M: ensure-parser (compile) ( parser -- quot ) +M: ensure-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ input-slice @ check-ensure ] ; TUPLE: ensure-not-parser p1 ; @@ -461,7 +466,7 @@ TUPLE: ensure-not-parser p1 ; : check-ensure-not ( old-input result -- result ) [ drop f ] [ ignore ] if ; -M: ensure-not-parser (compile) ( parser -- quot ) +M: ensure-not-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ input-slice @ check-ensure-not ] ; TUPLE: action-parser p1 quot ; @@ -473,7 +478,7 @@ TUPLE: action-parser p1 quot ; drop ] if ; inline -M: action-parser (compile) ( parser -- quot ) +M: action-parser (compile) ( peg -- quot ) [ p1>> compiled-parser 1quotation ] [ quot>> ] bi '[ @ , check-action ] ; : left-trim-slice ( string -- string ) @@ -485,14 +490,14 @@ M: action-parser (compile) ( parser -- quot ) TUPLE: sp-parser p1 ; -M: sp-parser (compile) ( parser -- quot ) +M: sp-parser (compile) ( peg -- quot ) p1>> compiled-parser 1quotation '[ input-slice left-trim-slice input-from pos set @ ] ; TUPLE: delay-parser quot ; -M: delay-parser (compile) ( parser -- quot ) +M: delay-parser (compile) ( peg -- quot ) #! For efficiency we memoize the quotation. #! This way it is run only once and the #! parser constructed once at run time. @@ -500,29 +505,26 @@ M: delay-parser (compile) ( parser -- quot ) TUPLE: box-parser quot ; -M: box-parser (compile) ( parser -- quot ) +M: box-parser (compile) ( peg -- quot ) #! Calls the quotation at compile time #! to produce the parser to be compiled. #! This differs from 'delay' which calls - #! it at run time. Due to using the runtime - #! environment at compile time, this parser - #! must not be cached, so we clear out the - #! delgates cache. - f >>compiled quot>> call compiled-parser 1quotation ; + #! it at run time. + quot>> call compiled-parser 1quotation ; PRIVATE> : token ( string -- parser ) - token-parser boa init-parser ; + token-parser boa wrap-peg ; : satisfy ( quot -- parser ) - satisfy-parser boa init-parser ; + satisfy-parser boa wrap-peg ; : range ( min max -- parser ) - range-parser boa init-parser ; + range-parser boa wrap-peg ; : seq ( seq -- parser ) - seq-parser boa init-parser ; + seq-parser boa wrap-peg ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -537,7 +539,7 @@ PRIVATE> { } make seq ; inline : choice ( seq -- parser ) - choice-parser boa init-parser ; + choice-parser boa wrap-peg ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -552,38 +554,38 @@ PRIVATE> { } make choice ; inline : repeat0 ( parser -- parser ) - repeat0-parser boa init-parser ; + repeat0-parser boa wrap-peg ; : repeat1 ( parser -- parser ) - repeat1-parser boa init-parser ; + repeat1-parser boa wrap-peg ; : optional ( parser -- parser ) - optional-parser boa init-parser ; + optional-parser boa wrap-peg ; : semantic ( parser quot -- parser ) - semantic-parser boa init-parser ; + semantic-parser boa wrap-peg ; : ensure ( parser -- parser ) - ensure-parser boa init-parser ; + ensure-parser boa wrap-peg ; : ensure-not ( parser -- parser ) - ensure-not-parser boa init-parser ; + ensure-not-parser boa wrap-peg ; : action ( parser quot -- parser ) - action-parser boa init-parser ; + action-parser boa wrap-peg ; : sp ( parser -- parser ) - sp-parser boa init-parser ; + sp-parser boa wrap-peg ; : hide ( parser -- parser ) [ drop ignore ] action ; : delay ( quot -- parser ) - delay-parser boa init-parser ; + delay-parser boa wrap-peg ; : box ( quot -- parser ) #! because a box has its quotation run at compile time - #! it must always have a new parser delgate created, + #! it must always have a new parser wrapper created, #! not a cached one. This is because the same box, #! compiled twice can have a different compiled word #! due to running at compile time. @@ -593,7 +595,7 @@ PRIVATE> #! parse. The action adds an indirection with a parser type #! that gets memoized and fixes this. Need to rethink how #! to fix boxes so this isn't needed... - box-parser boa next-id f over set-delegate [ ] action ; + box-parser boa f next-id parser boa [ ] action ; ERROR: parse-failed input word ; From ec896eeba8c32a974e84ab431e6673b6f591d438 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Tue, 8 Jul 2008 16:56:12 +1200 Subject: [PATCH 21/25] peg fixes --- extra/peg/parsers/parsers.factor | 2 +- extra/peg/peg-tests.factor | 2 +- extra/peg/peg.factor | 11 +++++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 351e3b5fc1..f6c2820ac2 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -20,7 +20,7 @@ M: just-parser (compile) ( parser -- quot ) just-parser-p1 compiled-parser just-pattern curry ; : just ( parser -- parser ) - just-parser boa init-parser ; + just-parser boa wrap-peg ; : 1token ( ch -- parser ) 1string token ; diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index f9e4a0d4a6..62e041441f 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -181,7 +181,7 @@ IN: peg.tests { t } [ #! Ensure a circular parser doesn't loop infinitely [ f , "a" token , ] seq* - dup parsers>> + dup peg>> parsers>> dupd 0 swap set-nth compile word? ] unit-test diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 3882315dc9..871db21084 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -245,12 +245,15 @@ C: peg-head GENERIC: (compile) ( peg -- quot ) -: execute-parser ( word -- result ) - pos get apply-rule dup failed? [ +: process-parser-result ( result -- result ) + dup failed? [ drop f ] [ input-slice swap - ] if ; inline + ] if ; + +: execute-parser ( word -- result ) + pos get apply-rule process-parser-result ; inline : parser-body ( parser -- quot ) #! Return the body of the word that is the compiled version @@ -323,7 +326,7 @@ TUPLE: token-parser symbol ; dup >r ?head-slice [ r> f f add-error ] [ - drop input-slice input-from "token '" r> append "'" append 1vector add-error f + drop pos get "token '" r> append "'" append 1vector add-error f ] if ; M: token-parser (compile) ( peg -- quot ) From 4135f81514c91257c901a1c2819c204955714d10 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 9 Jul 2008 11:45:51 +1200 Subject: [PATCH 22/25] Fix comment in peg eval-rule --- extra/peg/peg.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 871db21084..4cfa94ce48 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -109,7 +109,7 @@ C: peg-head : eval-rule ( rule -- ast ) #! Evaluate a rule, return an ast resulting from it. #! Return fail if the rule failed. The rule has - #! stack effect ( input -- parse-result ) + #! stack effect ( -- parse-result ) pos get swap execute process-rule-result ; inline : memo ( pos rule -- memo-entry ) From 9e78bb70f2216c8582827a9a880b2fca8ca32e1d Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 9 Jul 2008 12:07:17 +1200 Subject: [PATCH 23/25] packrat refactoring --- extra/peg/peg.factor | 72 +++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 4cfa94ce48..9540b1fd70 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -48,12 +48,27 @@ SYMBOL: error-stack SYMBOL: ignore -SYMBOL: packrat +: packrat ( id -- cache ) + #! The packrat cache is a mapping of parser-id->cache. + #! For each parser it maps to a cache holding a mapping + #! of position->result. The packrat cache therefore keeps + #! track of all parses that have occurred at each position + #! of the input string and the results obtained from that + #! parser. + \ packrat get [ drop H{ } clone ] cache ; + SYMBOL: pos SYMBOL: input SYMBOL: fail SYMBOL: lrstack -SYMBOL: heads + +: heads ( -- cache ) + #! A mapping from position->peg-head. It maps a + #! position in the input string being parsed to + #! the head of the left recursion which is currently + #! being grown. It is 'f' at any position where + #! left recursion growth is not underway. + \ heads get ; : failed? ( obj -- ? ) fail = ; @@ -71,19 +86,20 @@ SYMBOL: heads reset-pegs +#! An entry in the table of memoized parse results +#! ast = an AST produced from the parse +#! or the symbol 'fail' +#! or a left-recursion object +#! pos = the position in the input string of this entry TUPLE: memo-entry ans pos ; -C: memo-entry -TUPLE: left-recursion seed rule head next ; -C: left-recursion - +TUPLE: left-recursion seed rule head next ; TUPLE: peg-head rule involved-set eval-set ; -C: peg-head -: rule-parser ( rule -- parser ) +: rule-id ( word -- id ) #! A rule is the parser compiled down to a word. It has - #! a "peg" property containing the original parser. - "peg" word-prop ; + #! a "peg-id" property containing the id of the original parser. + "peg-id" word-prop ; : input-slice ( -- slice ) #! Return a slice of the input from the current parse position @@ -94,11 +110,6 @@ C: peg-head #! input slice is based on. dup slice? [ slice-from ] [ drop 0 ] if ; -: input-cache ( parser -- cache ) - #! From the packrat cache, obtain the cache for the parser - #! that maps the position to the parser result. - id>> packrat get [ drop H{ } clone ] cache ; - : process-rule-result ( p result -- result ) [ nip [ ast>> ] [ remaining>> ] bi input-from pos set @@ -114,11 +125,13 @@ C: peg-head : memo ( pos rule -- memo-entry ) #! Return the result from the memo cache. - rule-parser input-cache at ; + rule-id packrat at +! " memo result " write dup . + ; : set-memo ( memo-entry pos rule -- ) #! Store an entry in the cache - rule-parser input-cache set-at ; + rule-id packrat set-at ; : update-m ( ast m -- ) swap >>ans pos get >>pos drop ; @@ -141,9 +154,9 @@ C: peg-head ] if ; inline : grow-lr ( h p r m -- ast ) - >r >r [ heads get set-at ] 2keep r> r> + >r >r [ heads set-at ] 2keep r> r> pick over >r >r (grow-lr) r> r> - swap heads get delete-at + swap heads delete-at dup pos>> pos set ans>> ; inline @@ -156,7 +169,7 @@ C: peg-head :: setup-lr ( r l -- ) l head>> [ - r V{ } clone V{ } clone l (>>head) + r V{ } clone V{ } clone peg-head boa l (>>head) ] unless r l lrstack get (setup-lr) ; @@ -179,11 +192,11 @@ C: peg-head :: recall ( r p -- memo-entry ) [let* | m [ p r memo ] - h [ p heads get at ] + h [ p heads at ] | h [ m r h involved-set>> h rule>> suffix member? not and [ - fail p + fail p memo-entry boa ] [ r h eval-set>> member? [ h [ r swap remove ] change-eval-set drop @@ -201,8 +214,8 @@ C: peg-head :: apply-non-memo-rule ( r p -- ast ) [let* | - lr [ fail r f lrstack get ] - m [ lr lrstack set lr p dup p r set-memo ] + lr [ fail r f lrstack get left-recursion boa ] + m [ lr lrstack set lr p memo-entry boa dup p r set-memo ] ans [ r eval-rule ] | lrstack get next>> lrstack set @@ -224,10 +237,15 @@ C: peg-head nip ] if ; +USE: prettyprint + : apply-rule ( r p -- ast ) +! 2dup [ rule-id ] dip 2array "apply-rule: " write . 2dup recall [ +! " memoed" print nip apply-memo-rule ] [ +! " not memoed" print apply-non-memo-rule ] if* ; inline @@ -238,8 +256,8 @@ C: peg-head 0 pos set f lrstack set V{ } clone error-stack set - H{ } clone heads set - H{ } clone packrat set + H{ } clone \ heads set + H{ } clone \ packrat set ] H{ } make-assoc swap bind ; inline @@ -258,7 +276,7 @@ GENERIC: (compile) ( peg -- quot ) : parser-body ( parser -- quot ) #! Return the body of the word that is the compiled version #! of the parser. - gensym 2dup swap peg>> (compile) 0 1 define-declared swap dupd "peg" set-word-prop + gensym 2dup swap peg>> (compile) 0 1 define-declared swap dupd id>> "peg-id" set-word-prop [ execute-parser ] curry ; : compiled-parser ( parser -- word ) From 2ed0d561aef1338abf2f0ad1f34990e0360c66fe Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 9 Jul 2008 14:26:11 +1200 Subject: [PATCH 24/25] Store peg rules by their id rather than word in left recursion handling --- extra/peg/peg.factor | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 9540b1fd70..11d36f032c 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -93,8 +93,8 @@ reset-pegs #! pos = the position in the input string of this entry TUPLE: memo-entry ans pos ; -TUPLE: left-recursion seed rule head next ; -TUPLE: peg-head rule involved-set eval-set ; +TUPLE: left-recursion seed rule-id head next ; +TUPLE: peg-head rule-id involved-set eval-set ; : rule-id ( word -- id ) #! A rule is the parser compiled down to a word. It has @@ -123,15 +123,15 @@ TUPLE: peg-head rule involved-set eval-set ; #! stack effect ( -- parse-result ) pos get swap execute process-rule-result ; inline -: memo ( pos rule -- memo-entry ) +: memo ( pos id -- memo-entry ) #! Return the result from the memo cache. - rule-id packrat at + packrat at ! " memo result " write dup . ; -: set-memo ( memo-entry pos rule -- ) +: set-memo ( memo-entry pos id -- ) #! Store an entry in the cache - rule-id packrat set-at ; + packrat set-at ; : update-m ( ast m -- ) swap >>ans pos get >>pos drop ; @@ -163,13 +163,13 @@ TUPLE: peg-head rule involved-set eval-set ; :: (setup-lr) ( r l s -- ) s head>> l head>> eq? [ l head>> s (>>head) - l head>> [ s rule>> suffix ] change-involved-set drop + l head>> [ s rule-id>> suffix ] change-involved-set drop r l s next>> (setup-lr) ] unless ; :: setup-lr ( r l -- ) l head>> [ - r V{ } clone V{ } clone peg-head boa l (>>head) + r rule-id V{ } clone V{ } clone peg-head boa l (>>head) ] unless r l lrstack get (setup-lr) ; @@ -177,7 +177,7 @@ TUPLE: peg-head rule involved-set eval-set ; [let* | h [ m ans>> head>> ] | - h rule>> r eq? [ + h rule-id>> r rule-id eq? [ m ans>> seed>> m (>>ans) m ans>> failed? [ fail @@ -191,15 +191,15 @@ TUPLE: peg-head rule involved-set eval-set ; :: recall ( r p -- memo-entry ) [let* | - m [ p r memo ] + m [ p r rule-id memo ] h [ p heads at ] | h [ - m r h involved-set>> h rule>> suffix member? not and [ + m r rule-id h involved-set>> h rule-id>> suffix member? not and [ fail p memo-entry boa ] [ - r h eval-set>> member? [ - h [ r swap remove ] change-eval-set drop + r rule-id h eval-set>> member? [ + h [ r rule-id swap remove ] change-eval-set drop r eval-rule m update-m m @@ -214,8 +214,8 @@ TUPLE: peg-head rule involved-set eval-set ; :: apply-non-memo-rule ( r p -- ast ) [let* | - lr [ fail r f lrstack get left-recursion boa ] - m [ lr lrstack set lr p memo-entry boa dup p r set-memo ] + lr [ fail r rule-id f lrstack get left-recursion boa ] + m [ lr lrstack set lr p memo-entry boa dup p r rule-id set-memo ] ans [ r eval-rule ] | lrstack get next>> lrstack set From b720bc16a8e25fab65a9e3dcccfb044b8096c883 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Thu, 10 Jul 2008 17:17:36 +1200 Subject: [PATCH 25/25] Fix peg equals? method --- extra/peg/peg-tests.factor | 1 + extra/peg/peg.factor | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/extra/peg/peg-tests.factor b/extra/peg/peg-tests.factor index 62e041441f..b11b1011c3 100644 --- a/extra/peg/peg-tests.factor +++ b/extra/peg/peg-tests.factor @@ -193,3 +193,4 @@ IN: peg.tests "B" [ drop t ] satisfy [ 66 >= ] semantic parse ] unit-test +{ f } [ \ + T{ parser f f f } equal? ] unit-test \ No newline at end of file diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 11d36f032c..868072efa5 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel sequences strings fry namespaces math assocs shuffle debugger io vectors arrays math.parser math.order vectors combinators combinators.lib - sets unicode.categories compiler.units parser + combinators.short-circuit classes sets unicode.categories compiler.units parser words quotations effects memoize accessors locals effects splitting ; IN: peg @@ -12,7 +12,7 @@ TUPLE: parse-result remaining ast ; TUPLE: parse-error position messages ; TUPLE: parser peg compiled id ; -M: parser equal? [ id>> ] bi@ = ; +M: parser equal? { [ [ class ] bi@ = ] [ [ id>> ] bi@ = ] } 2&& ; M: parser hashcode* id>> hashcode* ; C: parse-result