Merge branch 'master' of git://double.co.nz/git/factor

db4
Slava Pestov 2008-07-10 00:18:30 -05:00
commit 8a9fd1c2b5
16 changed files with 520 additions and 396 deletions

View File

@ -7,11 +7,11 @@ USING: kernel tools.test peg peg.ebnf words math math.parser
IN: peg.ebnf.tests IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [ { T{ ebnf-non-terminal f "abc" } } [
"abc" 'non-terminal' parse ast>> "abc" 'non-terminal' parse
] unit-test ] unit-test
{ T{ ebnf-terminal f "55" } } [ { T{ ebnf-terminal f "55" } } [
"'55'" 'terminal' parse ast>> "'55'" 'terminal' parse
] unit-test ] unit-test
{ {
@ -22,7 +22,7 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"digit = '1' | '2'" 'rule' parse ast>> "digit = '1' | '2'" 'rule' parse
] unit-test ] unit-test
{ {
@ -33,7 +33,7 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"digit = '1' '2'" 'rule' parse ast>> "digit = '1' '2'" 'rule' parse
] unit-test ] unit-test
{ {
@ -46,7 +46,7 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"one two | three" 'choice' parse ast>> "one two | three" 'choice' parse
] unit-test ] unit-test
{ {
@ -61,7 +61,7 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"one {two | three}" 'choice' parse ast>> "one {two | three}" 'choice' parse
] unit-test ] unit-test
{ {
@ -81,7 +81,7 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"one ((two | three) four)*" 'choice' parse ast>> "one ((two | three) four)*" 'choice' parse
] unit-test ] unit-test
{ {
@ -93,166 +93,166 @@ IN: peg.ebnf.tests
} }
} }
} [ } [
"one ( two )? three" 'choice' parse ast>> "one ( two )? three" 'choice' parse
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"\"foo\"" 'identifier' parse ast>> "\"foo\"" 'identifier' parse
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"'foo'" 'identifier' parse ast>> "'foo'" 'identifier' parse
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"foo" 'non-terminal' parse ast>> ebnf-non-terminal-symbol "foo" 'non-terminal' parse ebnf-non-terminal-symbol
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"foo]" 'non-terminal' parse ast>> ebnf-non-terminal-symbol "foo]" 'non-terminal' parse ebnf-non-terminal-symbol
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"ab" [EBNF foo='a' 'b' EBNF] call ast>> "ab" [EBNF foo='a' 'b' EBNF]
] unit-test ] unit-test
{ V{ 1 "b" } } [ { V{ 1 "b" } } [
"ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call ast>> "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF]
] unit-test ] unit-test
{ V{ 1 2 } } [ { V{ 1 2 } } [
"ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call ast>> "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF]
] unit-test ] unit-test
{ CHAR: A } [ { CHAR: A } [
"A" [EBNF foo=[A-Z] EBNF] call ast>> "A" [EBNF foo=[A-Z] EBNF]
] unit-test ] unit-test
{ CHAR: Z } [ { CHAR: Z } [
"Z" [EBNF foo=[A-Z] EBNF] call ast>> "Z" [EBNF foo=[A-Z] EBNF]
] unit-test ] unit-test
{ f } [ [
"0" [EBNF foo=[A-Z] EBNF] call "0" [EBNF foo=[A-Z] EBNF]
] unit-test ] must-fail
{ CHAR: 0 } [ { CHAR: 0 } [
"0" [EBNF foo=[^A-Z] EBNF] call ast>> "0" [EBNF foo=[^A-Z] EBNF]
] unit-test ] unit-test
{ f } [ [
"A" [EBNF foo=[^A-Z] EBNF] call "A" [EBNF foo=[^A-Z] EBNF]
] unit-test ] must-fail
{ f } [ [
"Z" [EBNF foo=[^A-Z] EBNF] call "Z" [EBNF foo=[^A-Z] EBNF]
] unit-test ] must-fail
{ V{ "1" "+" "foo" } } [ { V{ "1" "+" "foo" } } [
"1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF] call ast>> "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF]
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF] call ast>> "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF]
] unit-test ] unit-test
{ "foo" } [ { "foo" } [
"1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call ast>> "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
] unit-test ] unit-test
{ "bar" } [ { "bar" } [
"1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF] call ast>> "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
] unit-test ] unit-test
{ 6 } [ { 6 } [
"4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF] call ast>> "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF]
] unit-test ] unit-test
{ 6 } [ { 6 } [
"4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF] call ast>> "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF]
] unit-test ] unit-test
{ 10 } [ { 10 } [
{ 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
] unit-test ] unit-test
{ f } [ [
{ "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
] unit-test ] must-fail
{ 3 } [ { 3 } [
{ 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF] call ast>> { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
] unit-test ] unit-test
{ f } [ [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
] unit-test ] must-fail
{ V{ "a" " " "b" } } [ { V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "\t" "b" } } [ { V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "\n" "b" } } [ { V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF] call ast>> "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
] unit-test ] unit-test
{ V{ "a" f "b" } } [ { V{ "a" f "b" } } [
"ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
] unit-test ] unit-test
{ V{ "a" " " "b" } } [ { V{ "a" " " "b" } } [
"a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "\t" "b" } } [ { V{ "a" "\t" "b" } } [
"a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "\n" "b" } } [ { V{ "a" "\n" "b" } } [
"a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF] call ast>> "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call ast>> "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
] unit-test ] unit-test
{ f } [ [
"axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF] call "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
] unit-test ] must-fail
{ V{ V{ 49 } "+" V{ 49 } } } [ { V{ V{ 49 } "+" V{ 49 } } } [
#! Test direct left recursion. #! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used #! Using packrat, so first part of expr fails, causing 2nd choice to be used
"1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call ast>> "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
] unit-test ] unit-test
{ V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
#! Test direct left recursion. #! Test direct left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used #! Using packrat, so first part of expr fails, causing 2nd choice to be used
"1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call ast>> "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
] unit-test ] unit-test
{ V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [ { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
#! Test indirect left recursion. #! Test indirect left recursion.
#! Using packrat, so first part of expr fails, causing 2nd choice to be used #! Using packrat, so first part of expr fails, causing 2nd choice to be used
"1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call ast>> "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF]
] unit-test ] unit-test
{ t } [ { t } [
"abcd='9' | ('8'):x => [[ x ]]" 'ebnf' parse parse-result-remaining empty? "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' (parse) remaining>> empty?
] unit-test ] unit-test
EBNF: primary EBNF: primary
@ -281,133 +281,133 @@ main = Primary
;EBNF ;EBNF
{ "this" } [ { "this" } [
"this" primary ast>> "this" primary
] unit-test ] unit-test
{ V{ "this" "." "x" } } [ { V{ "this" "." "x" } } [
"this.x" primary ast>> "this.x" primary
] unit-test ] unit-test
{ V{ V{ "this" "." "x" } "." "y" } } [ { V{ V{ "this" "." "x" } "." "y" } } [
"this.x.y" primary ast>> "this.x.y" primary
] unit-test ] unit-test
{ V{ V{ "this" "." "x" } "." "m" "(" ")" } } [ { V{ V{ "this" "." "x" } "." "m" "(" ")" } } [
"this.x.m()" primary ast>> "this.x.m()" primary
] unit-test ] unit-test
{ V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [ { V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [
"x[i][j].y" primary ast>> "x[i][j].y" primary
] unit-test ] unit-test
'ebnf' compile must-infer 'ebnf' compile must-infer
{ V{ V{ "a" "b" } "c" } } [ { V{ V{ "a" "b" } "c" } } [
"abc" [EBNF a="a" "b" foo=(a "c") EBNF] call ast>> "abc" [EBNF a="a" "b" foo=(a "c") EBNF]
] unit-test ] unit-test
{ V{ V{ "a" "b" } "c" } } [ { V{ V{ "a" "b" } "c" } } [
"abc" [EBNF a="a" "b" foo={a "c"} EBNF] call ast>> "abc" [EBNF a="a" "b" foo={a "c"} EBNF]
] unit-test ] unit-test
{ V{ V{ "a" "b" } "c" } } [ { V{ V{ "a" "b" } "c" } } [
"abc" [EBNF a="a" "b" foo=a "c" EBNF] call ast>> "abc" [EBNF a="a" "b" foo=a "c" EBNF]
] unit-test ] unit-test
{ f } [ [
"a bc" [EBNF a="a" "b" foo=(a "c") EBNF] call "a bc" [EBNF a="a" "b" foo=(a "c") EBNF]
] unit-test ] must-fail
{ f } [ [
"a bc" [EBNF a="a" "b" foo=a "c" EBNF] call "a bc" [EBNF a="a" "b" foo=a "c" EBNF]
] unit-test ] must-fail
{ f } [ [
"a bc" [EBNF a="a" "b" foo={a "c"} EBNF] call "a bc" [EBNF a="a" "b" foo={a "c"} EBNF]
] unit-test ] must-fail
{ f } [ [
"ab c" [EBNF a="a" "b" foo=a "c" EBNF] call "ab c" [EBNF a="a" "b" foo=a "c" EBNF]
] unit-test ] must-fail
{ V{ V{ "a" "b" } "c" } } [ { V{ V{ "a" "b" } "c" } } [
"ab c" [EBNF a="a" "b" foo={a "c"} EBNF] call ast>> "ab c" [EBNF a="a" "b" foo={a "c"} EBNF]
] unit-test ] unit-test
{ f } [ [
"ab c" [EBNF a="a" "b" foo=(a "c") EBNF] call "ab c" [EBNF a="a" "b" foo=(a "c") EBNF]
] unit-test ] must-fail
{ f } [ [
"a b c" [EBNF a="a" "b" foo=a "c" EBNF] call "a b c" [EBNF a="a" "b" foo=a "c" EBNF]
] unit-test ] must-fail
{ f } [ [
"a b c" [EBNF a="a" "b" foo=(a "c") EBNF] call "a b c" [EBNF a="a" "b" foo=(a "c") EBNF]
] unit-test ] must-fail
{ f } [ [
"a b c" [EBNF a="a" "b" foo={a "c"} EBNF] call "a b c" [EBNF a="a" "b" foo={a "c"} EBNF]
] unit-test ] must-fail
{ V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
"ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF] call ast>> "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
] unit-test ] unit-test
{ V{ } } [ { V{ } } [
"ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF] call ast>> "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
] unit-test ] unit-test
{ V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [ { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
"ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF] call ast>> "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
] unit-test ] unit-test
{ V{ } } [ { V{ } } [
"ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF] call ast>> "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
] unit-test ] unit-test
{ V{ "a" "a" "a" } } [ { V{ "a" "a" "a" } } [
"aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] call ast>> "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
] unit-test ] unit-test
{ t } [ { t } [
"aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF] call ast>> "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
"aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] call ast>> = "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] =
] unit-test ] unit-test
{ V{ "a" "a" "a" } } [ { V{ "a" "a" "a" } } [
"aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] call ast>> "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
] unit-test ] unit-test
{ t } [ { t } [
"aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF] call ast>> "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
"aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] call ast>> = "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] =
] unit-test ] unit-test
{ t } [ { t } [
"number=(digit)+:n 'a'" 'ebnf' parse remaining>> length zero? "number=(digit)+:n 'a'" 'ebnf' (parse) remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
"number=(digit)+ 'a'" 'ebnf' parse remaining>> length zero? "number=(digit)+ 'a'" 'ebnf' (parse) remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
"number=digit+ 'a'" 'ebnf' parse remaining>> length zero? "number=digit+ 'a'" 'ebnf' (parse) remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
"number=digit+:n 'a'" 'ebnf' parse remaining>> length zero? "number=digit+:n 'a'" 'ebnf' (parse) remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
"foo=(name):n !(keyword) => [[ n ]]" 'rule' parse ast>> "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse
"foo=name:n !(keyword) => [[ n ]]" 'rule' parse ast>> = "foo=name:n !(keyword) => [[ n ]]" 'rule' parse =
] unit-test ] unit-test
{ t } [ { t } [
"foo=!(keyword) (name):n => [[ n ]]" 'rule' parse ast>> "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse
"foo=!(keyword) name:n => [[ n ]]" 'rule' parse ast>> = "foo=!(keyword) name:n => [[ n ]]" 'rule' parse =
] unit-test ] unit-test
<< <<
@ -429,38 +429,38 @@ foo=<foreign any-char> 'd'
;EBNF ;EBNF
{ "a" } [ { "a" } [
"a" parser1 ast>> "a" parser1
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"ab" parser2 ast>> "ab" parser2
] unit-test ] unit-test
{ V{ "a" "c" } } [ { V{ "a" "c" } } [
"ac" parser3 ast>> "ac" parser3
] unit-test ] unit-test
{ V{ CHAR: a "d" } } [ { V{ CHAR: a "d" } } [
"ad" parser4 ast>> "ad" parser4
] unit-test ] unit-test
{ t } [ { t } [
"USING: kernel peg.ebnf ; [EBNF foo='a' '\n' => [[ drop \"\n\" ]] EBNF]" eval drop t "USING: kernel peg.ebnf ; \"a\\n\" [EBNF foo='a' '\n' => [[ drop \"\n\" ]] EBNF]" eval drop t
] unit-test ] unit-test
[ [
"USING: peg.ebnf ; [EBNF foo='a' foo='b' EBNF]" eval drop "USING: peg.ebnf ; \"ab\" [EBNF foo='a' foo='b' EBNF]" eval drop
] must-fail ] must-fail
{ t } [ { t } [
#! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule #! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule
#! if a var in a namespace is set. This unit test is to remind me to fix this. #! if a var in a namespace is set. This unit test is to remind me to fix this.
[ "fail" "foo" set "foo='a'" 'ebnf' parse ast>> transform drop t ] with-scope [ "fail" "foo" set "foo='a'" 'ebnf' parse transform drop t ] with-scope
] unit-test ] unit-test
#! Tokenizer tests #! Tokenizer tests
{ V{ "a" CHAR: b } } [ { V{ "a" CHAR: b } } [
"ab" [EBNF tokenizer=default foo="a" . EBNF] call ast>> "ab" [EBNF tokenizer=default foo="a" . EBNF]
] unit-test ] unit-test
TUPLE: ast-number value ; TUPLE: ast-number value ;
@ -488,7 +488,7 @@ Tok = Spaces (Number | Special )
tokenizer = <foreign a-tokenizer Tok> foo=. tokenizer = <foreign a-tokenizer Tok> foo=.
tokenizer=default baz=. tokenizer=default baz=.
main = bar foo foo baz main = bar foo foo baz
EBNF] call ast>> EBNF]
] unit-test ] unit-test
{ V{ CHAR: 5 "+" CHAR: 2 } } [ { V{ CHAR: 5 "+" CHAR: 2 } } [
@ -499,7 +499,7 @@ Tok = Spaces (Number | Special )
spaces=space* => [[ ignore ]] spaces=space* => [[ ignore ]]
tokenizer=spaces (number | operator) tokenizer=spaces (number | operator)
main= . . . main= . . .
EBNF] call ast>> EBNF]
] unit-test ] unit-test
{ V{ CHAR: 5 "+" CHAR: 2 } } [ { V{ CHAR: 5 "+" CHAR: 2 } } [
@ -510,9 +510,13 @@ Tok = Spaces (Number | Special )
spaces=space* => [[ ignore ]] spaces=space* => [[ ignore ]]
tokenizer=spaces (number | operator) tokenizer=spaces (number | operator)
main= . . . main= . . .
EBNF] call ast>> EBNF]
] unit-test ] unit-test
{ "++" } [ { "++" } [
"++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] call ast>> "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF]
] unit-test
{ "\\" } [
"\\" [EBNF foo="\\" EBNF]
] unit-test ] unit-test

View File

@ -99,6 +99,7 @@ PEG: escaper ( string -- ast )
"\\t" token [ drop "\t" ] action , "\\t" token [ drop "\t" ] action ,
"\\n" token [ drop "\n" ] action , "\\n" token [ drop "\n" ] action ,
"\\r" token [ drop "\r" ] action , "\\r" token [ drop "\r" ] action ,
"\\\\" token [ drop "\\" ] action ,
] choice* any-char-parser 2array choice repeat0 ; ] choice* any-char-parser 2array choice repeat0 ;
: replace-escapes ( string -- string ) : replace-escapes ( string -- string )
@ -503,7 +504,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
] [ ] make box ; ] [ ] make box ;
: transform-ebnf ( string -- object ) : transform-ebnf ( string -- object )
'ebnf' parse parse-result-ast transform ; 'ebnf' parse transform ;
: check-parse-result ( result -- result ) : check-parse-result ( result -- result )
dup [ dup [
@ -517,12 +518,18 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
"Could not parse EBNF" throw "Could not parse EBNF" throw
] if ; ] if ;
: ebnf>quot ( string -- hashtable quot ) : parse-ebnf ( string -- hashtable )
'ebnf' parse check-parse-result 'ebnf' (parse) check-parse-result ast>> transform ;
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ] curry ;
: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip parsed reset-tokenizer ; parsing : ebnf>quot ( string -- hashtable quot )
parse-ebnf dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ; parsing
: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ; parsing
: EBNF: : EBNF:
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string

View File

@ -5,21 +5,21 @@ USING: kernel tools.test peg peg.expr multiline sequences ;
IN: peg.expr.tests IN: peg.expr.tests
{ 5 } [ { 5 } [
"2+3" eval-expr "2+3" expr
] unit-test ] unit-test
{ 6 } [ { 6 } [
"2*3" eval-expr "2*3" expr
] unit-test ] unit-test
{ 14 } [ { 14 } [
"2+3*4" eval-expr "2+3*4" expr
] unit-test ] unit-test
{ 17 } [ { 17 } [
"2+3*4+3" eval-expr "2+3*4+3" expr
] unit-test ] unit-test
{ 23 } [ { 23 } [
"2+3*(4+3)" eval-expr "2+3*(4+3)" expr
] unit-test ] unit-test

View File

@ -18,7 +18,3 @@ exp = exp "+" fac => [[ first3 nip + ]]
| exp "-" fac => [[ first3 nip - ]] | exp "-" fac => [[ first3 nip - ]]
| fac | fac
;EBNF ;EBNF
: eval-expr ( string -- number )
expr ast>> ;

View File

@ -7,7 +7,7 @@ TUPLE: ast-keyword value ;
TUPLE: ast-name value ; TUPLE: ast-name value ;
TUPLE: ast-number value ; TUPLE: ast-number value ;
TUPLE: ast-string value ; TUPLE: ast-string value ;
TUPLE: ast-regexp value ; TUPLE: ast-regexp body flags ;
TUPLE: ast-cond-expr condition then else ; TUPLE: ast-cond-expr condition then else ;
TUPLE: ast-set lhs rhs ; TUPLE: ast-set lhs rhs ;
TUPLE: ast-get value ; TUPLE: ast-get value ;
@ -38,5 +38,6 @@ TUPLE: ast-continue ;
TUPLE: ast-throw e ; TUPLE: ast-throw e ;
TUPLE: ast-try t e c f ; TUPLE: ast-try t e c f ;
TUPLE: ast-return e ; TUPLE: ast-return e ;
TUPLE: ast-with expr body ;
TUPLE: ast-case c cs ; TUPLE: ast-case c cs ;
TUPLE: ast-default cs ; TUPLE: ast-default cs ;

View File

@ -4,8 +4,4 @@ USING: kernel accessors peg.javascript.tokenizer peg.javascript.parser ;
IN: peg.javascript IN: peg.javascript
: parse-javascript ( string -- ast ) : parse-javascript ( string -- ast )
javascript [ javascript ;
ast>>
] [
"Unable to parse JavaScript" throw
] if* ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.javascript.ast peg.javascript.parser USING: kernel tools.test peg peg.javascript.ast peg.javascript.parser
accessors multiline sequences math ; accessors multiline sequences math peg.ebnf ;
IN: peg.javascript.parser.tests IN: peg.javascript.parser.tests
\ javascript must-infer \ javascript must-infer
@ -23,14 +23,14 @@ IN: peg.javascript.parser.tests
} }
} }
} [ } [
"123; 'hello'; foo(x);" javascript ast>> "123; 'hello'; foo(x);" javascript
] unit-test ] unit-test
{ t } [ { t } [
<" <"
var x=5 var x=5
var y=10 var y=10
"> javascript remaining>> length zero? "> main \ javascript rule (parse) remaining>> length zero?
] unit-test ] unit-test
@ -41,7 +41,7 @@ function foldl(f, initial, seq) {
initial = f(initial, seq[i]); initial = f(initial, seq[i]);
return initial; return initial;
} }
"> javascript remaining>> length zero? "> main \ javascript rule (parse) remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
@ -52,6 +52,6 @@ ParseState.prototype.from = function(index) {
r.length = this.length - index; r.length = this.length - index;
return r; return r;
} }
"> javascript remaining>> length zero? "> main \ javascript rule (parse) remaining>> length zero?
] unit-test ] unit-test

View File

@ -26,9 +26,9 @@ End = !(.)
Space = " " | "\t" | "\n" Space = " " | "\t" | "\n"
Spaces = Space* => [[ ignore ]] Spaces = Space* => [[ ignore ]]
Name = . ?[ ast-name? ]? => [[ value>> ]] Name = . ?[ ast-name? ]? => [[ value>> ]]
Number = . ?[ ast-number? ]? => [[ value>> ]] Number = . ?[ ast-number? ]?
String = . ?[ ast-string? ]? => [[ value>> ]] String = . ?[ ast-string? ]?
RegExp = . ?[ ast-regexp? ]? => [[ value>> ]] RegExp = . ?[ ast-regexp? ]?
SpacesNoNl = (!(nl) Space)* => [[ ignore ]] SpacesNoNl = (!(nl) Space)* => [[ ignore ]]
Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-expr boa ]] Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-expr boa ]]
@ -40,22 +40,77 @@ Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-exp
| OrExpr:e "%=" Expr:rhs => [[ e rhs "%" ast-mset boa ]] | OrExpr:e "%=" Expr:rhs => [[ e rhs "%" ast-mset boa ]]
| OrExpr:e "&&=" Expr:rhs => [[ e rhs "&&" ast-mset boa ]] | OrExpr:e "&&=" Expr:rhs => [[ e rhs "&&" ast-mset boa ]]
| OrExpr:e "||=" Expr:rhs => [[ e rhs "||" ast-mset boa ]] | OrExpr:e "||=" Expr:rhs => [[ e rhs "||" ast-mset boa ]]
| OrExpr:e "^=" Expr:rhs => [[ e rhs "^" ast-mset boa ]]
| OrExpr:e "&=" Expr:rhs => [[ e rhs "&" ast-mset boa ]]
| OrExpr:e "|=" Expr:rhs => [[ e rhs "|" ast-mset boa ]]
| OrExpr:e "<<=" Expr:rhs => [[ e rhs "<<" ast-mset boa ]]
| OrExpr:e ">>=" Expr:rhs => [[ e rhs ">>" ast-mset boa ]]
| OrExpr:e ">>>=" Expr:rhs => [[ e rhs ">>>" ast-mset boa ]]
| OrExpr:e => [[ e ]] | OrExpr:e => [[ e ]]
ExprNoIn = OrExprNoIn:e "?" ExprNoIn:t ":" ExprNoIn:f => [[ e t f ast-cond-expr boa ]]
| OrExprNoIn:e "=" ExprNoIn:rhs => [[ e rhs ast-set boa ]]
| OrExprNoIn:e "+=" ExprNoIn:rhs => [[ e rhs "+" ast-mset boa ]]
| OrExprNoIn:e "-=" ExprNoIn:rhs => [[ e rhs "-" ast-mset boa ]]
| OrExprNoIn:e "*=" ExprNoIn:rhs => [[ e rhs "*" ast-mset boa ]]
| OrExprNoIn:e "/=" ExprNoIn:rhs => [[ e rhs "/" ast-mset boa ]]
| OrExprNoIn:e "%=" ExprNoIn:rhs => [[ e rhs "%" ast-mset boa ]]
| OrExprNoIn:e "&&=" ExprNoIn:rhs => [[ e rhs "&&" ast-mset boa ]]
| OrExprNoIn:e "||=" ExprNoIn:rhs => [[ e rhs "||" ast-mset boa ]]
| OrExprNoIn:e "^=" ExprNoIn:rhs => [[ e rhs "^" ast-mset boa ]]
| OrExprNoIn:e "&=" ExprNoIn:rhs => [[ e rhs "&" ast-mset boa ]]
| OrExprNoIn:e "|=" ExprNoIn:rhs => [[ e rhs "|" ast-mset boa ]]
| OrExprNoIn:e "<<=" ExprNoIn:rhs => [[ e rhs "<<" ast-mset boa ]]
| OrExprNoIn:e ">>=" ExprNoIn:rhs => [[ e rhs ">>" ast-mset boa ]]
| OrExprNoIn:e ">>>=" ExprNoIn:rhs => [[ e rhs ">>>" ast-mset boa ]]
| OrExprNoIn:e => [[ e ]]
OrExpr = OrExpr:x "||" AndExpr:y => [[ x y "||" ast-binop boa ]] OrExpr = OrExpr:x "||" AndExpr:y => [[ x y "||" ast-binop boa ]]
| AndExpr | AndExpr
AndExpr = AndExpr:x "&&" EqExpr:y => [[ x y "&&" ast-binop boa ]] OrExprNoIn = OrExprNoIn:x "||" AndExprNoIn:y => [[ x y "||" ast-binop boa ]]
| AndExprNoIn
AndExpr = AndExpr:x "&&" BitOrExpr:y => [[ x y "&&" ast-binop boa ]]
| BitOrExpr
AndExprNoIn = AndExprNoIn:x "&&" BitOrExprNoIn:y => [[ x y "&&" ast-binop boa ]]
| BitOrExprNoIn
BitOrExpr = BitOrExpr:x "|" BitXORExpr:y => [[ x y "|" ast-binop boa ]]
| BitXORExpr
BitOrExprNoIn = BitOrExprNoIn:x "|" BitXORExprNoIn:y => [[ x y "|" ast-binop boa ]]
| BitXORExprNoIn
BitXORExpr = BitXORExpr:x "^" BitANDExpr:y => [[ x y "^" ast-binop boa ]]
| BitANDExpr
BitXORExprNoIn = BitXORExprNoIn:x "^" BitANDExprNoIn:y => [[ x y "^" ast-binop boa ]]
| BitANDExprNoIn
BitANDExpr = BitANDExpr:x "&" EqExpr:y => [[ x y "&" ast-binop boa ]]
| EqExpr | EqExpr
BitANDExprNoIn = BitANDExprNoIn:x "&" EqExprNoIn:y => [[ x y "&" ast-binop boa ]]
| EqExprNoIn
EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop boa ]] EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop boa ]]
| EqExpr:x "!=" RelExpr:y => [[ x y "!=" ast-binop boa ]] | EqExpr:x "!=" RelExpr:y => [[ x y "!=" ast-binop boa ]]
| EqExpr:x "===" RelExpr:y => [[ x y "===" ast-binop boa ]] | EqExpr:x "===" RelExpr:y => [[ x y "===" ast-binop boa ]]
| EqExpr:x "!==" RelExpr:y => [[ x y "!==" ast-binop boa ]] | EqExpr:x "!==" RelExpr:y => [[ x y "!==" ast-binop boa ]]
| RelExpr | RelExpr
RelExpr = RelExpr:x ">" AddExpr:y => [[ x y ">" ast-binop boa ]] EqExprNoIn = EqExprNoIn:x "==" RelExprNoIn:y => [[ x y "==" ast-binop boa ]]
| RelExpr:x ">=" AddExpr:y => [[ x y ">=" ast-binop boa ]] | EqExprNoIn:x "!=" RelExprNoIn:y => [[ x y "!=" ast-binop boa ]]
| RelExpr:x "<" AddExpr:y => [[ x y "<" ast-binop boa ]] | EqExprNoIn:x "===" RelExprNoIn:y => [[ x y "===" ast-binop boa ]]
| RelExpr:x "<=" AddExpr:y => [[ x y "<=" ast-binop boa ]] | EqExprNoIn:x "!==" RelExprNoIn:y => [[ x y "!==" ast-binop boa ]]
| RelExpr:x "instanceof" AddExpr:y => [[ x y "instanceof" ast-binop boa ]] | RelExprNoIn
RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]]
| RelExpr:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]]
| RelExpr:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]]
| RelExpr:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]]
| RelExpr:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]]
| RelExpr:x "in" ShiftExpr:y => [[ x y "in" ast-binop boa ]]
| ShiftExpr
RelExprNoIn = RelExprNoIn:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]]
| RelExprNoIn:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]]
| RelExprNoIn:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]]
| RelExprNoIn:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]]
| RelExprNoIn:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]]
| ShiftExpr
ShiftExpr = ShiftExpr:x "<<" AddExpr:y => [[ x y "<<" ast-binop boa ]]
| ShiftExpr:x ">>>" AddExpr:y => [[ x y ">>>" ast-binop boa ]]
| ShiftExpr:x ">>" AddExpr:y => [[ x y ">>" ast-binop boa ]]
| AddExpr | AddExpr
AddExpr = AddExpr:x "+" MulExpr:y => [[ x y "+" ast-binop boa ]] AddExpr = AddExpr:x "+" MulExpr:y => [[ x y "+" ast-binop boa ]]
| AddExpr:x "-" MulExpr:y => [[ x y "-" ast-binop boa ]] | AddExpr:x "-" MulExpr:y => [[ x y "-" ast-binop boa ]]
@ -64,14 +119,14 @@ MulExpr = MulExpr:x "*" Unary:y => [[ x y "*" ast-binop
| MulExpr:x "/" Unary:y => [[ x y "/" ast-binop boa ]] | MulExpr:x "/" Unary:y => [[ x y "/" ast-binop boa ]]
| MulExpr:x "%" Unary:y => [[ x y "%" ast-binop boa ]] | MulExpr:x "%" Unary:y => [[ x y "%" ast-binop boa ]]
| Unary | Unary
Unary = "-" Postfix:p => [[ p "-" ast-unop boa ]] Unary = "-" Unary:p => [[ p "-" ast-unop boa ]]
| "+" Postfix:p => [[ p ]] | "+" Unary:p => [[ p ]]
| "++" Postfix:p => [[ p "++" ast-preop boa ]] | "++" Unary:p => [[ p "++" ast-preop boa ]]
| "--" Postfix:p => [[ p "--" ast-preop boa ]] | "--" Unary:p => [[ p "--" ast-preop boa ]]
| "!" Postfix:p => [[ p "!" ast-unop boa ]] | "!" Unary:p => [[ p "!" ast-unop boa ]]
| "typeof" Postfix:p => [[ p "typeof" ast-unop boa ]] | "typeof" Unary:p => [[ p "typeof" ast-unop boa ]]
| "void" Postfix:p => [[ p "void" ast-unop boa ]] | "void" Unary:p => [[ p "void" ast-unop boa ]]
| "delete" Postfix:p => [[ p "delete" ast-unop boa ]] | "delete" Unary:p => [[ p "delete" ast-unop boa ]]
| Postfix | Postfix
Postfix = PrimExpr:p SpacesNoNl "++" => [[ p "++" ast-postop boa ]] Postfix = PrimExpr:p SpacesNoNl "++" => [[ p "++" ast-postop boa ]]
| PrimExpr:p SpacesNoNl "--" => [[ p "--" ast-postop boa ]] | PrimExpr:p SpacesNoNl "--" => [[ p "--" ast-postop boa ]]
@ -85,15 +140,15 @@ PrimExpr = PrimExpr:p "[" Expr:i "]" => [[ i p ast-getp
PrimExprHd = "(" Expr:e ")" => [[ e ]] PrimExprHd = "(" Expr:e ")" => [[ e ]]
| "this" => [[ ast-this boa ]] | "this" => [[ ast-this boa ]]
| Name => [[ ast-get boa ]] | Name => [[ ast-get boa ]]
| Number => [[ ast-number boa ]] | Number
| String => [[ ast-string boa ]] | String
| RegExp => [[ ast-regexp boa ]] | RegExp
| "function" FuncRest:fr => [[ fr ]] | "function" FuncRest:fr => [[ fr ]]
| "new" PrimExpr:n "(" Args:as ")" => [[ n as ast-new boa ]] | "new" PrimExpr:n "(" Args:as ")" => [[ n as ast-new boa ]]
| "new" PrimExpr:n => [[ n f ast-new boa ]] | "new" PrimExpr:n => [[ n f ast-new boa ]]
| "[" Args:es "]" => [[ es ast-array boa ]] | "[" Args:es "]" => [[ es ast-array boa ]]
| Json | Json
JsonBindings = (JsonBinding ("," JsonBinding => [[ second ]])* => [[ first2 swap prefix ]])? JsonBindings = (JsonBinding ("," JsonBinding => [[ second ]])* => [[ first2 swap prefix ]])?
Json = "{" JsonBindings:bs "}" => [[ bs ast-json boa ]] Json = "{" JsonBindings:bs "}" => [[ bs ast-json boa ]]
JsonBinding = JsonPropName:n ":" Expr:v => [[ n v ast-binding boa ]] JsonBinding = JsonPropName:n ":" Expr:v => [[ n v ast-binding boa ]]
JsonPropName = Name | Number | String | RegExp JsonPropName = Name | Number | String | RegExp
@ -105,15 +160,15 @@ Binding = Name:n "=" Expr:v => [[ n v ast-var
| Name:n => [[ n "undefined" ast-get boa ast-var boa ]] | Name:n => [[ n "undefined" ast-get boa ast-var boa ]]
Block = "{" SrcElems:ss "}" => [[ ss ]] Block = "{" SrcElems:ss "}" => [[ ss ]]
Bindings = (Binding ("," Binding => [[ second ]])* => [[ first2 swap prefix ]])? Bindings = (Binding ("," Binding => [[ second ]])* => [[ first2 swap prefix ]])?
For1 = "var" Binding => [[ second ]] For1 = "var" Bindings => [[ second ]]
| Expr | ExprNoIn
| Spaces => [[ "undefined" ast-get boa ]] | Spaces => [[ "undefined" ast-get boa ]]
For2 = Expr For2 = Expr
| Spaces => [[ "true" ast-get boa ]] | Spaces => [[ "true" ast-get boa ]]
For3 = Expr For3 = Expr
| Spaces => [[ "undefined" ast-get boa ]] | Spaces => [[ "undefined" ast-get boa ]]
ForIn1 = "var" Name:n => [[ n "undefined" ast-get boa ast-var boa ]] ForIn1 = "var" Name:n => [[ n "undefined" ast-get boa ast-var boa ]]
| Expr | PrimExprHd
Switch1 = "case" Expr:c ":" SrcElems:cs => [[ c cs ast-case boa ]] Switch1 = "case" Expr:c ":" SrcElems:cs => [[ c cs ast-case boa ]]
| "default" ":" SrcElems:cs => [[ cs ast-default boa ]] | "default" ":" SrcElems:cs => [[ cs ast-default boa ]]
SwitchBody = Switch1* SwitchBody = Switch1*
@ -134,6 +189,7 @@ Stmt = Block
| "try" Block:t "catch" "(" Name:e ")" Block:c Finally:f => [[ t e c f ast-try boa ]] | "try" Block:t "catch" "(" Name:e ")" Block:c Finally:f => [[ t e c f ast-try boa ]]
| "return" Expr:e Sc => [[ e ast-return boa ]] | "return" Expr:e Sc => [[ e ast-return boa ]]
| "return" Sc => [[ "undefined" ast-get boa ast-return boa ]] | "return" Sc => [[ "undefined" ast-get boa ast-return boa ]]
| "with" "(" Expr:e ")" Stmt:b => [[ e b ast-with boa ]]
| Expr:e Sc => [[ e ]] | Expr:e Sc => [[ e ]]
| ";" => [[ "undefined" ast-get boa ]] | ";" => [[ "undefined" ast-get boa ]]
SrcElem = "function" Name:n FuncRest:f => [[ n f ast-var boa ]] SrcElem = "function" Name:n FuncRest:f => [[ n f ast-var boa ]]

View File

@ -19,5 +19,9 @@ IN: peg.javascript.tokenizer.tests
";" ";"
} }
} [ } [
"123; 'hello'; foo(x);" tokenize-javascript ast>> "123; 'hello'; foo(x);" tokenize-javascript
] unit-test ] unit-test
{ V{ T{ ast-regexp f "<(w+)[^>]*?)/>" "g" } } } [
"/<(\\w+)[^>]*?)\\/>/g" tokenize-javascript
] unit-test

View File

@ -57,13 +57,23 @@ StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]]
Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]] Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]]
| '"' StringChars2:cs '"' => [[ cs ast-string boa ]] | '"' StringChars2:cs '"' => [[ cs ast-string boa ]]
| "'" StringChars3:cs "'" => [[ cs ast-string boa ]] | "'" StringChars3:cs "'" => [[ cs ast-string boa ]]
RegExpBody = (!("/" | "\n" | "\r") .)* => [[ >string ]] RegExpFlags = NameRest* => [[ >string ]]
RegExp = "/" RegExpBody:r "/" => [[ r ast-regexp boa ]] NonTerminator = !("\n" | "\r") .
Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";" BackslashSequence = "\\" NonTerminator => [[ second ]]
| "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">=" RegExpFirstChar = !("*" | "\\" | "/") NonTerminator
| ">" | "<=" | "<" | "++" | "+=" | "+" | "--" | "-=" | BackslashSequence
| "-" | "*=" | "*" | "/=" | "/" | "%=" | "%" | "&&=" RegExpChar = !("\\" | "/") NonTerminator
| "&&" | "||=" | "||" | "." | "!" | BackslashSequence
RegExpChars = RegExpChar*
RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]]
RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]]
Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
| "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">="
| ">>>=" | ">>>" | ">>=" | ">>" | ">" | "<=" | "<<=" | "<<"
| "<" | "++" | "+=" | "+" | "--" | "-=" | "-" | "*="
| "*" | "/=" | "/" | "%=" | "%" | "&&=" | "&&" | "||="
| "||" | "." | "!" | "&=" | "&" | "|=" | "|" | "^="
| "^"
Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special ) Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special )
Toks = Tok* Spaces Toks = Tok* Spaces
;EBNF ;EBNF

View File

@ -1,54 +1,51 @@
USING: kernel peg peg.parsers tools.test ; USING: kernel peg peg.parsers tools.test accessors ;
IN: peg.parsers.tests IN: peg.parsers.tests
[ V{ "a" } ] { V{ "a" } }
[ "a" "a" token "," token list-of parse parse-result-ast ] unit-test [ "a" "a" token "," token list-of parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "a,a,a,a" "a" token "," token list-of parse parse-result-ast ] unit-test [ "a,a,a,a" "a" token "," token list-of parse ] unit-test
[ f ] [ "a" "a" token "," token list-of-many parse ] must-fail
[ "a" "a" token "," token list-of-many parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "a,a,a,a" "a" token "," token list-of-many parse parse-result-ast ] unit-test [ "a,a,a,a" "a" token "," token list-of-many parse ] unit-test
[ f ] [ "aaa" "a" token 4 exactly-n parse ] must-fail
[ "aaa" "a" token 4 exactly-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaa" "a" token 4 exactly-n parse parse-result-ast ] unit-test [ "aaaa" "a" token 4 exactly-n parse ] unit-test
[ f ] [ "aaa" "a" token 4 at-least-n parse ] must-fail
[ "aaa" "a" token 4 at-least-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaa" "a" token 4 at-least-n parse parse-result-ast ] unit-test [ "aaaa" "a" token 4 at-least-n parse ] unit-test
[ V{ "a" "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" "a" } }
[ "aaaaa" "a" token 4 at-least-n parse parse-result-ast ] unit-test [ "aaaaa" "a" token 4 at-least-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaa" "a" token 4 at-most-n parse parse-result-ast ] unit-test [ "aaaa" "a" token 4 at-most-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaaa" "a" token 4 at-most-n parse parse-result-ast ] unit-test [ "aaaaa" "a" token 4 at-most-n parse ] unit-test
[ V{ "a" "a" "a" } ] { V{ "a" "a" "a" } }
[ "aaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test [ "aaa" "a" token 3 4 from-m-to-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test [ "aaaa" "a" token 3 4 from-m-to-n parse ] unit-test
[ V{ "a" "a" "a" "a" } ] { V{ "a" "a" "a" "a" } }
[ "aaaaa" "a" token 3 4 from-m-to-n parse parse-result-ast ] unit-test [ "aaaaa" "a" token 3 4 from-m-to-n parse ] unit-test
[ 97 ] { 97 }
[ "a" any-char parse parse-result-ast ] unit-test [ "a" any-char parse ] unit-test
[ V{ } ] { V{ } }
[ "" epsilon parse parse-result-ast ] unit-test [ "" epsilon parse ] unit-test
{ "a" } [ { "a" } [
"a" "a" token just parse parse-result-ast "a" "a" token just parse
] unit-test ] unit-test

View File

@ -3,7 +3,7 @@
USING: kernel sequences strings namespaces math assocs shuffle USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays math.parser vectors arrays math.parser
unicode.categories sequences.deep peg peg.private unicode.categories sequences.deep peg peg.private
peg.search math.ranges words memoize ; peg.search math.ranges words ;
IN: peg.parsers IN: peg.parsers
TUPLE: just-parser p1 ; TUPLE: just-parser p1 ;
@ -19,8 +19,8 @@ TUPLE: just-parser p1 ;
M: just-parser (compile) ( parser -- quot ) M: just-parser (compile) ( parser -- quot )
just-parser-p1 compiled-parser just-pattern curry ; just-parser-p1 compiled-parser just-pattern curry ;
MEMO: just ( parser -- parser ) : just ( parser -- parser )
just-parser boa init-parser ; just-parser boa wrap-peg ;
: 1token ( ch -- parser ) 1string token ; : 1token ( ch -- parser ) 1string token ;
@ -45,10 +45,10 @@ MEMO: just ( parser -- parser )
PRIVATE> PRIVATE>
MEMO: exactly-n ( parser n -- parser' ) : exactly-n ( parser n -- parser' )
swap <repetition> seq ; swap <repetition> seq ;
MEMO: at-most-n ( parser n -- parser' ) : at-most-n ( parser n -- parser' )
dup zero? [ dup zero? [
2drop epsilon 2drop epsilon
] [ ] [
@ -56,15 +56,15 @@ MEMO: at-most-n ( parser n -- parser' )
-rot 1- at-most-n 2choice -rot 1- at-most-n 2choice
] if ; ] if ;
MEMO: at-least-n ( parser n -- parser' ) : at-least-n ( parser n -- parser' )
dupd exactly-n swap repeat0 2seq dupd exactly-n swap repeat0 2seq
[ flatten-vectors ] action ; [ flatten-vectors ] action ;
MEMO: from-m-to-n ( parser m n -- parser' ) : from-m-to-n ( parser m n -- parser' )
>r [ exactly-n ] 2keep r> swap - at-most-n 2seq >r [ exactly-n ] 2keep r> swap - at-most-n 2seq
[ flatten-vectors ] action ; [ flatten-vectors ] action ;
MEMO: pack ( begin body end -- parser ) : pack ( begin body end -- parser )
>r >r hide r> r> hide 3seq [ first ] action ; >r >r hide r> r> hide 3seq [ first ] action ;
: surrounded-by ( parser begin end -- parser' ) : surrounded-by ( parser begin end -- parser' )

View File

@ -5,99 +5,99 @@ USING: kernel tools.test strings namespaces arrays sequences
peg peg.private accessors words math accessors ; peg peg.private accessors words math accessors ;
IN: peg.tests IN: peg.tests
{ f } [ [
"endbegin" "begin" token parse "endbegin" "begin" token parse
] unit-test ] must-fail
{ "begin" "end" } [ { "begin" "end" } [
"beginend" "begin" token parse "beginend" "begin" token (parse)
{ ast>> remaining>> } get-slots { ast>> remaining>> } get-slots
>string >string
] unit-test ] unit-test
{ f } [ [
"" CHAR: a CHAR: z range parse "" CHAR: a CHAR: z range parse
] unit-test ] must-fail
{ f } [ [
"1bcd" CHAR: a CHAR: z range parse "1bcd" CHAR: a CHAR: z range parse
] unit-test ] must-fail
{ CHAR: a } [ { CHAR: a } [
"abcd" CHAR: a CHAR: z range parse ast>> "abcd" CHAR: a CHAR: z range parse
] unit-test ] unit-test
{ CHAR: z } [ { CHAR: z } [
"zbcd" CHAR: a CHAR: z range parse ast>> "zbcd" CHAR: a CHAR: z range parse
] unit-test ] unit-test
{ f } [ [
"bad" "a" token "b" token 2array seq parse "bad" "a" token "b" token 2array seq parse
] unit-test ] must-fail
{ V{ "g" "o" } } [ { V{ "g" "o" } } [
"good" "g" token "o" token 2array seq parse ast>> "good" "g" token "o" token 2array seq parse
] unit-test ] unit-test
{ "a" } [ { "a" } [
"abcd" "a" token "b" token 2array choice parse ast>> "abcd" "a" token "b" token 2array choice parse
] unit-test ] unit-test
{ "b" } [ { "b" } [
"bbcd" "a" token "b" token 2array choice parse ast>> "bbcd" "a" token "b" token 2array choice parse
] unit-test ] unit-test
{ f } [ [
"cbcd" "a" token "b" token 2array choice parse "cbcd" "a" token "b" token 2array choice parse
] unit-test ] must-fail
{ f } [ [
"" "a" token "b" token 2array choice parse "" "a" token "b" token 2array choice parse
] must-fail
{ 0 } [
"" "a" token repeat0 parse length
] unit-test ] unit-test
{ 0 } [ { 0 } [
"" "a" token repeat0 parse ast>> length "b" "a" token repeat0 parse length
] unit-test
{ 0 } [
"b" "a" token repeat0 parse ast>> length
] unit-test ] unit-test
{ V{ "a" "a" "a" } } [ { V{ "a" "a" "a" } } [
"aaab" "a" token repeat0 parse ast>> "aaab" "a" token repeat0 parse
] unit-test ] unit-test
{ f } [ [
"" "a" token repeat1 parse "" "a" token repeat1 parse
] unit-test ] must-fail
{ f } [ [
"b" "a" token repeat1 parse "b" "a" token repeat1 parse
] unit-test ] must-fail
{ V{ "a" "a" "a" } } [ { V{ "a" "a" "a" } } [
"aaab" "a" token repeat1 parse ast>> "aaab" "a" token repeat1 parse
] unit-test ] unit-test
{ V{ "a" "b" } } [ { V{ "a" "b" } } [
"ab" "a" token optional "b" token 2array seq parse ast>> "ab" "a" token optional "b" token 2array seq parse
] unit-test ] unit-test
{ V{ f "b" } } [ { V{ f "b" } } [
"b" "a" token optional "b" token 2array seq parse ast>> "b" "a" token optional "b" token 2array seq parse
] unit-test ] unit-test
{ f } [ [
"cb" "a" token optional "b" token 2array seq parse "cb" "a" token optional "b" token 2array seq parse
] unit-test ] must-fail
{ V{ CHAR: a CHAR: b } } [ { V{ CHAR: a CHAR: b } } [
"ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse ast>> "ab" "a" token ensure CHAR: a CHAR: z range dup 3array seq parse
] unit-test ] unit-test
{ f } [ [
"bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
] unit-test ] must-fail
{ t } [ { t } [
"a+b" "a+b"
@ -117,47 +117,47 @@ IN: peg.tests
parse [ t ] [ f ] if parse [ t ] [ f ] if
] unit-test ] unit-test
{ f } [ [
"a++b" "a++b"
"a" token "+" token "++" token 2array choice "b" token 3array seq "a" token "+" token "++" token 2array choice "b" token 3array seq
parse [ t ] [ f ] if parse [ t ] [ f ] if
] unit-test ] must-fail
{ 1 } [ { 1 } [
"a" "a" token [ drop 1 ] action parse ast>> "a" "a" token [ drop 1 ] action parse
] unit-test ] unit-test
{ V{ 1 1 } } [ { V{ 1 1 } } [
"aa" "a" token [ drop 1 ] action dup 2array seq parse ast>> "aa" "a" token [ drop 1 ] action dup 2array seq parse
] unit-test ] unit-test
{ f } [ [
"b" "a" token [ drop 1 ] action parse "b" "a" token [ drop 1 ] action parse
] unit-test ] must-fail
{ f } [ [
"b" [ CHAR: a = ] satisfy parse "b" [ CHAR: a = ] satisfy parse
] unit-test ] must-fail
{ CHAR: a } [ { CHAR: a } [
"a" [ CHAR: a = ] satisfy parse ast>> "a" [ CHAR: a = ] satisfy parse
] unit-test ] unit-test
{ "a" } [ { "a" } [
" a" "a" token sp parse ast>> " a" "a" token sp parse
] unit-test ] unit-test
{ "a" } [ { "a" } [
"a" "a" token sp parse ast>> "a" "a" token sp parse
] unit-test ] unit-test
{ V{ "a" } } [ { V{ "a" } } [
"[a]" "[" token hide "a" token "]" token hide 3array seq parse ast>> "[a]" "[" token hide "a" token "]" token hide 3array seq parse
] unit-test ] unit-test
{ f } [ [
"a]" "[" token hide "a" token "]" token hide 3array seq parse "a]" "[" token hide "a" token "]" token hide 3array seq parse
] unit-test ] must-fail
{ V{ "1" "-" "1" } V{ "1" "+" "1" } } [ { V{ "1" "-" "1" } V{ "1" "+" "1" } } [
@ -165,8 +165,8 @@ IN: peg.tests
[ "1" token , "-" token , "1" token , ] seq* , [ "1" token , "-" token , "1" token , ] seq* ,
[ "1" token , "+" token , "1" token , ] seq* , [ "1" token , "+" token , "1" token , ] seq* ,
] choice* ] choice*
"1-1" over parse ast>> swap "1-1" over parse swap
"1+1" swap parse ast>> "1+1" swap parse
] unit-test ] unit-test
: expr ( -- parser ) : expr ( -- parser )
@ -175,21 +175,22 @@ IN: peg.tests
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ; [ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
{ V{ V{ "1" "+" "1" } "+" "1" } } [ { V{ V{ "1" "+" "1" } "+" "1" } } [
"1+1+1" expr parse ast>> "1+1+1" expr parse
] unit-test ] unit-test
{ t } [ { t } [
#! Ensure a circular parser doesn't loop infinitely #! Ensure a circular parser doesn't loop infinitely
[ f , "a" token , ] seq* [ f , "a" token , ] seq*
dup parsers>> dup peg>> parsers>>
dupd 0 swap set-nth compile word? dupd 0 swap set-nth compile word?
] unit-test ] unit-test
{ f } [ [
"A" [ drop t ] satisfy [ 66 >= ] semantic parse "A" [ drop t ] satisfy [ 66 >= ] semantic parse
] unit-test ] must-fail
{ CHAR: B } [ { CHAR: B } [
"B" [ drop t ] satisfy [ 66 >= ] semantic parse ast>> "B" [ drop t ] satisfy [ 66 >= ] semantic parse
] unit-test ] unit-test
{ f } [ \ + T{ parser f f f } equal? ] unit-test

View File

@ -1,59 +1,105 @@
! Copyright (C) 2007, 2008 Chris Double. ! Copyright (C) 2007, 2008 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings fry namespaces math assocs shuffle debugger io USING: kernel sequences strings fry namespaces math assocs shuffle debugger io
vectors arrays math.parser math.order vectors arrays math.parser math.order vectors combinators combinators.lib
unicode.categories compiler.units parser combinators.short-circuit classes sets unicode.categories compiler.units parser
words quotations effects memoize accessors locals effects splitting ; words quotations effects memoize accessors locals effects splitting ;
IN: peg IN: peg
USE: prettyprint USE: prettyprint
TUPLE: parse-result remaining ast ; TUPLE: parse-result remaining ast ;
TUPLE: parse-error position messages ;
TUPLE: parser peg compiled id ;
TUPLE: parser id compiled ; M: parser equal? { [ [ class ] bi@ = ] [ [ id>> ] bi@ = ] } 2&& ;
M: parser equal? [ id>> ] bi@ = ;
M: parser hashcode* id>> hashcode* ; M: parser hashcode* id>> hashcode* ;
C: <parser> parser C: <parse-result> parse-result
C: <parse-error> parse-error
M: parse-error error.
"Peg parsing error at character position " write dup position>> number>string write
"." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ;
SYMBOL: error-stack
: (merge-errors) ( a b -- c )
{
{ [ over position>> not ] [ nip ] }
{ [ dup position>> not ] [ drop ] }
[ 2dup [ position>> ] bi@ <=> {
{ +lt+ [ nip ] }
{ +gt+ [ drop ] }
{ +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] }
} case
]
} cond ;
: merge-errors ( -- )
error-stack get dup length 1 > [
dup pop over pop swap (merge-errors) swap push
] [
drop
] if ;
: add-error ( remaining message -- )
<parse-error> error-stack get push ;
SYMBOL: ignore SYMBOL: ignore
: <parse-result> ( remaining ast -- parse-result ) : packrat ( id -- cache )
parse-result boa ; #! The packrat cache is a mapping of parser-id->cache.
#! For each parser it maps to a cache holding a mapping
#! of position->result. The packrat cache therefore keeps
#! track of all parses that have occurred at each position
#! of the input string and the results obtained from that
#! parser.
\ packrat get [ drop H{ } clone ] cache ;
SYMBOL: packrat
SYMBOL: pos SYMBOL: pos
SYMBOL: input SYMBOL: input
SYMBOL: fail SYMBOL: fail
SYMBOL: lrstack SYMBOL: lrstack
SYMBOL: heads
: heads ( -- cache )
#! A mapping from position->peg-head. It maps a
#! position in the input string being parsed to
#! the head of the left recursion which is currently
#! being grown. It is 'f' at any position where
#! left recursion growth is not underway.
\ heads get ;
: failed? ( obj -- ? ) : failed? ( obj -- ? )
fail = ; fail = ;
: delegates ( -- cache ) : peg-cache ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ; #! Holds a hashtable mapping a peg tuple to
#! the parser tuple for that peg. The parser tuple
#! holds a unique id and the compiled form of that peg.
\ peg-cache get-global [
H{ } clone dup \ peg-cache set-global
] unless* ;
: reset-pegs ( -- ) : reset-pegs ( -- )
H{ } clone \ delegates set-global ; H{ } clone \ peg-cache set-global ;
reset-pegs reset-pegs
#! An entry in the table of memoized parse results
#! ast = an AST produced from the parse
#! or the symbol 'fail'
#! or a left-recursion object
#! pos = the position in the input string of this entry
TUPLE: memo-entry ans pos ; TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry
TUPLE: left-recursion seed rule head next ; TUPLE: left-recursion seed rule-id head next ;
C: <left-recursion> left-recursion TUPLE: peg-head rule-id involved-set eval-set ;
TUPLE: peg-head rule involved-set eval-set ;
C: <head> peg-head
: rule-parser ( rule -- parser ) : rule-id ( word -- id )
#! A rule is the parser compiled down to a word. It has #! A rule is the parser compiled down to a word. It has
#! a "peg" property containing the original parser. #! a "peg-id" property containing the id of the original parser.
"peg" word-prop ; "peg-id" word-prop ;
: input-slice ( -- slice ) : input-slice ( -- slice )
#! Return a slice of the input from the current parse position #! Return a slice of the input from the current parse position
@ -64,11 +110,6 @@ C: <head> peg-head
#! input slice is based on. #! input slice is based on.
dup slice? [ slice-from ] [ drop 0 ] if ; dup slice? [ slice-from ] [ drop 0 ] if ;
: input-cache ( parser -- cache )
#! From the packrat cache, obtain the cache for the parser
#! that maps the position to the parser result.
id>> packrat get [ drop H{ } clone ] cache ;
: process-rule-result ( p result -- result ) : process-rule-result ( p result -- result )
[ [
nip [ ast>> ] [ remaining>> ] bi input-from pos set nip [ ast>> ] [ remaining>> ] bi input-from pos set
@ -79,16 +120,18 @@ C: <head> peg-head
: eval-rule ( rule -- ast ) : eval-rule ( rule -- ast )
#! Evaluate a rule, return an ast resulting from it. #! Evaluate a rule, return an ast resulting from it.
#! Return fail if the rule failed. The rule has #! Return fail if the rule failed. The rule has
#! stack effect ( input -- parse-result ) #! stack effect ( -- parse-result )
pos get swap execute process-rule-result ; inline pos get swap execute process-rule-result ; inline
: memo ( pos rule -- memo-entry ) : memo ( pos id -- memo-entry )
#! Return the result from the memo cache. #! Return the result from the memo cache.
rule-parser input-cache at ; packrat at
! " memo result " write dup .
;
: set-memo ( memo-entry pos rule -- ) : set-memo ( memo-entry pos id -- )
#! Store an entry in the cache #! Store an entry in the cache
rule-parser input-cache set-at ; packrat set-at ;
: update-m ( ast m -- ) : update-m ( ast m -- )
swap >>ans pos get >>pos drop ; swap >>ans pos get >>pos drop ;
@ -111,22 +154,22 @@ C: <head> peg-head
] if ; inline ] if ; inline
: grow-lr ( h p r m -- ast ) : grow-lr ( h p r m -- ast )
>r >r [ heads get set-at ] 2keep r> r> >r >r [ heads set-at ] 2keep r> r>
pick over >r >r (grow-lr) r> r> pick over >r >r (grow-lr) r> r>
swap heads get delete-at swap heads delete-at
dup pos>> pos set ans>> dup pos>> pos set ans>>
; inline ; inline
:: (setup-lr) ( r l s -- ) :: (setup-lr) ( r l s -- )
s head>> l head>> eq? [ s head>> l head>> eq? [
l head>> s (>>head) l head>> s (>>head)
l head>> [ s rule>> suffix ] change-involved-set drop l head>> [ s rule-id>> suffix ] change-involved-set drop
r l s next>> (setup-lr) r l s next>> (setup-lr)
] unless ; ] unless ;
:: setup-lr ( r l -- ) :: setup-lr ( r l -- )
l head>> [ l head>> [
r V{ } clone V{ } clone <head> l (>>head) r rule-id V{ } clone V{ } clone peg-head boa l (>>head)
] unless ] unless
r l lrstack get (setup-lr) ; r l lrstack get (setup-lr) ;
@ -134,7 +177,7 @@ C: <head> peg-head
[let* | [let* |
h [ m ans>> head>> ] h [ m ans>> head>> ]
| |
h rule>> r eq? [ h rule-id>> r rule-id eq? [
m ans>> seed>> m (>>ans) m ans>> seed>> m (>>ans)
m ans>> failed? [ m ans>> failed? [
fail fail
@ -148,15 +191,15 @@ C: <head> peg-head
:: recall ( r p -- memo-entry ) :: recall ( r p -- memo-entry )
[let* | [let* |
m [ p r memo ] m [ p r rule-id memo ]
h [ p heads get at ] h [ p heads at ]
| |
h [ h [
m r h involved-set>> h rule>> suffix member? not and [ m r rule-id h involved-set>> h rule-id>> suffix member? not and [
fail p <memo-entry> fail p memo-entry boa
] [ ] [
r h eval-set>> member? [ r rule-id h eval-set>> member? [
h [ r swap remove ] change-eval-set drop h [ r rule-id swap remove ] change-eval-set drop
r eval-rule r eval-rule
m update-m m update-m
m m
@ -171,8 +214,8 @@ C: <head> peg-head
:: apply-non-memo-rule ( r p -- ast ) :: apply-non-memo-rule ( r p -- ast )
[let* | [let* |
lr [ fail r f lrstack get <left-recursion> ] lr [ fail r rule-id f lrstack get left-recursion boa ]
m [ lr lrstack set lr p <memo-entry> dup p r set-memo ] m [ lr lrstack set lr p memo-entry boa dup p r rule-id set-memo ]
ans [ r eval-rule ] ans [ r eval-rule ]
| |
lrstack get next>> lrstack set lrstack get next>> lrstack set
@ -194,10 +237,15 @@ C: <head> peg-head
nip nip
] if ; ] if ;
USE: prettyprint
: apply-rule ( r p -- ast ) : apply-rule ( r p -- ast )
! 2dup [ rule-id ] dip 2array "apply-rule: " write .
2dup recall [ 2dup recall [
! " memoed" print
nip apply-memo-rule nip apply-memo-rule
] [ ] [
! " not memoed" print
apply-non-memo-rule apply-non-memo-rule
] if* ; inline ] if* ; inline
@ -207,24 +255,28 @@ C: <head> peg-head
input set input set
0 pos set 0 pos set
f lrstack set f lrstack set
H{ } clone heads set V{ } clone error-stack set
H{ } clone packrat set H{ } clone \ heads set
H{ } clone \ packrat set
] H{ } make-assoc swap bind ; inline ] H{ } make-assoc swap bind ; inline
GENERIC: (compile) ( parser -- quot ) GENERIC: (compile) ( peg -- quot )
: execute-parser ( word -- result ) : process-parser-result ( result -- result )
pos get apply-rule dup failed? [ dup failed? [
drop f drop f
] [ ] [
input-slice swap <parse-result> input-slice swap <parse-result>
] if ; inline ] if ;
: execute-parser ( word -- result )
pos get apply-rule process-parser-result ; inline
: parser-body ( parser -- quot ) : parser-body ( parser -- quot )
#! Return the body of the word that is the compiled version #! Return the body of the word that is the compiled version
#! of the parser. #! of the parser.
gensym 2dup swap (compile) 0 1 <effect> define-declared swap dupd "peg" set-word-prop gensym 2dup swap peg>> (compile) 0 1 <effect> define-declared swap dupd id>> "peg-id" set-word-prop
[ execute-parser ] curry ; [ execute-parser ] curry ;
: compiled-parser ( parser -- word ) : compiled-parser ( parser -- word )
@ -257,11 +309,14 @@ SYMBOL: delayed
] with-compilation-unit ; ] with-compilation-unit ;
: compiled-parse ( state word -- result ) : compiled-parse ( state word -- result )
swap [ execute ] with-packrat ; inline swap [ execute [ error-stack get first throw ] unless* ] with-packrat ; inline
: parse ( input parser -- result ) : (parse) ( input parser -- result )
dup word? [ compile ] unless compiled-parse ; dup word? [ compile ] unless compiled-parse ;
: parse ( input parser -- ast )
(parse) ast>> ;
<PRIVATE <PRIVATE
SYMBOL: id SYMBOL: id
@ -274,24 +329,25 @@ SYMBOL: id
1 id set-global 0 1 id set-global 0
] if* ; ] if* ;
: init-parser ( parser -- parser ) : wrap-peg ( peg -- parser )
#! Set the delegate for the parser. Equivalent parsers #! Wrap a parser tuple around the peg object.
#! get a delegate with the same id. #! Look for an existing parser tuple for that
dup clone delegates [ #! peg object.
drop next-id f <parser> peg-cache [
] cache over set-delegate ; f next-id parser boa
] cache ;
TUPLE: token-parser symbol ; TUPLE: token-parser symbol ;
: parse-token ( input string -- result ) : parse-token ( input string -- result )
#! Parse the string, returning a parse result #! Parse the string, returning a parse result
dup >r ?head-slice [ dup >r ?head-slice [
r> <parse-result> r> <parse-result> f f add-error
] [ ] [
r> 2drop f drop pos get "token '" r> append "'" append 1vector add-error f
] if ; ] if ;
M: token-parser (compile) ( parser -- quot ) M: token-parser (compile) ( peg -- quot )
symbol>> '[ input-slice , parse-token ] ; symbol>> '[ input-slice , parse-token ] ;
TUPLE: satisfy-parser quot ; TUPLE: satisfy-parser quot ;
@ -308,7 +364,7 @@ TUPLE: satisfy-parser quot ;
] if ; inline ] if ; inline
M: satisfy-parser (compile) ( parser -- quot ) M: satisfy-parser (compile) ( peg -- quot )
quot>> '[ input-slice , parse-satisfy ] ; quot>> '[ input-slice , parse-satisfy ] ;
TUPLE: range-parser min max ; TUPLE: range-parser min max ;
@ -324,7 +380,7 @@ TUPLE: range-parser min max ;
] if ] if
] if ; ] if ;
M: range-parser (compile) ( parser -- quot ) M: range-parser (compile) ( peg -- quot )
[ min>> ] [ max>> ] bi '[ input-slice , , parse-range ] ; [ min>> ] [ max>> ] bi '[ input-slice , , parse-range ] ;
TUPLE: seq-parser parsers ; TUPLE: seq-parser parsers ;
@ -351,18 +407,20 @@ TUPLE: seq-parser parsers ;
2drop f 2drop f
] if ; inline ] if ; inline
M: seq-parser (compile) ( parser -- quot ) M: seq-parser (compile) ( peg -- quot )
[ [
[ input-slice V{ } clone <parse-result> ] % [ input-slice V{ } clone <parse-result> ] %
parsers>> [ compiled-parser 1quotation , \ parse-seq-element , ] each parsers>> unclip compiled-parser 1quotation , \ parse-seq-element , [
compiled-parser 1quotation [ merge-errors ] compose , \ parse-seq-element , ] each
] [ ] make ; ] [ ] make ;
TUPLE: choice-parser parsers ; TUPLE: choice-parser parsers ;
M: choice-parser (compile) ( parser -- quot ) M: choice-parser (compile) ( peg -- quot )
[ [
f , f ,
parsers>> [ compiled-parser 1quotation , \ unless* , ] each parsers>> [ compiled-parser ] map
unclip 1quotation , \ unless* , [ 1quotation [ merge-errors ] compose , \ unless* , ] each
] [ ] make ; ] [ ] make ;
TUPLE: repeat0-parser p1 ; TUPLE: repeat0-parser p1 ;
@ -376,7 +434,7 @@ TUPLE: repeat0-parser p1 ;
nip nip
] if* ; inline ] if* ; inline
M: repeat0-parser (compile) ( parser -- quot ) M: repeat0-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ p1>> compiled-parser 1quotation '[
input-slice V{ } clone <parse-result> , swap (repeat) input-slice V{ } clone <parse-result> , swap (repeat)
] ; ] ;
@ -390,7 +448,7 @@ TUPLE: repeat1-parser p1 ;
f f
] if* ; ] if* ;
M: repeat1-parser (compile) ( parser -- quot ) M: repeat1-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ p1>> compiled-parser 1quotation '[
input-slice V{ } clone <parse-result> , swap (repeat) repeat1-empty-check input-slice V{ } clone <parse-result> , swap (repeat) repeat1-empty-check
] ; ] ;
@ -400,7 +458,7 @@ TUPLE: optional-parser p1 ;
: check-optional ( result -- result ) : check-optional ( result -- result )
[ input-slice f <parse-result> ] unless* ; [ input-slice f <parse-result> ] unless* ;
M: optional-parser (compile) ( parser -- quot ) M: optional-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ @ check-optional ] ; p1>> compiled-parser 1quotation '[ @ check-optional ] ;
TUPLE: semantic-parser p1 quot ; TUPLE: semantic-parser p1 quot ;
@ -412,7 +470,7 @@ TUPLE: semantic-parser p1 quot ;
drop drop
] if ; inline ] if ; inline
M: semantic-parser (compile) ( parser -- quot ) M: semantic-parser (compile) ( peg -- quot )
[ p1>> compiled-parser 1quotation ] [ quot>> ] bi [ p1>> compiled-parser 1quotation ] [ quot>> ] bi
'[ @ , check-semantic ] ; '[ @ , check-semantic ] ;
@ -421,7 +479,7 @@ TUPLE: ensure-parser p1 ;
: check-ensure ( old-input result -- result ) : check-ensure ( old-input result -- result )
[ ignore <parse-result> ] [ drop f ] if ; [ ignore <parse-result> ] [ drop f ] if ;
M: ensure-parser (compile) ( parser -- quot ) M: ensure-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ input-slice @ check-ensure ] ; p1>> compiled-parser 1quotation '[ input-slice @ check-ensure ] ;
TUPLE: ensure-not-parser p1 ; TUPLE: ensure-not-parser p1 ;
@ -429,7 +487,7 @@ TUPLE: ensure-not-parser p1 ;
: check-ensure-not ( old-input result -- result ) : check-ensure-not ( old-input result -- result )
[ drop f ] [ ignore <parse-result> ] if ; [ drop f ] [ ignore <parse-result> ] if ;
M: ensure-not-parser (compile) ( parser -- quot ) M: ensure-not-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ input-slice @ check-ensure-not ] ; p1>> compiled-parser 1quotation '[ input-slice @ check-ensure-not ] ;
TUPLE: action-parser p1 quot ; TUPLE: action-parser p1 quot ;
@ -441,7 +499,7 @@ TUPLE: action-parser p1 quot ;
drop drop
] if ; inline ] if ; inline
M: action-parser (compile) ( parser -- quot ) M: action-parser (compile) ( peg -- quot )
[ p1>> compiled-parser 1quotation ] [ quot>> ] bi '[ @ , check-action ] ; [ p1>> compiled-parser 1quotation ] [ quot>> ] bi '[ @ , check-action ] ;
: left-trim-slice ( string -- string ) : left-trim-slice ( string -- string )
@ -453,14 +511,14 @@ M: action-parser (compile) ( parser -- quot )
TUPLE: sp-parser p1 ; TUPLE: sp-parser p1 ;
M: sp-parser (compile) ( parser -- quot ) M: sp-parser (compile) ( peg -- quot )
p1>> compiled-parser 1quotation '[ p1>> compiled-parser 1quotation '[
input-slice left-trim-slice input-from pos set @ input-slice left-trim-slice input-from pos set @
] ; ] ;
TUPLE: delay-parser quot ; TUPLE: delay-parser quot ;
M: delay-parser (compile) ( parser -- quot ) M: delay-parser (compile) ( peg -- quot )
#! For efficiency we memoize the quotation. #! For efficiency we memoize the quotation.
#! This way it is run only once and the #! This way it is run only once and the
#! parser constructed once at run time. #! parser constructed once at run time.
@ -468,29 +526,26 @@ M: delay-parser (compile) ( parser -- quot )
TUPLE: box-parser quot ; TUPLE: box-parser quot ;
M: box-parser (compile) ( parser -- quot ) M: box-parser (compile) ( peg -- quot )
#! Calls the quotation at compile time #! Calls the quotation at compile time
#! to produce the parser to be compiled. #! to produce the parser to be compiled.
#! This differs from 'delay' which calls #! This differs from 'delay' which calls
#! it at run time. Due to using the runtime #! it at run time.
#! environment at compile time, this parser quot>> call compiled-parser 1quotation ;
#! must not be cached, so we clear out the
#! delgates cache.
f >>compiled quot>> call compiled-parser 1quotation ;
PRIVATE> PRIVATE>
: token ( string -- parser ) : token ( string -- parser )
token-parser boa init-parser ; token-parser boa wrap-peg ;
: satisfy ( quot -- parser ) : satisfy ( quot -- parser )
satisfy-parser boa init-parser ; satisfy-parser boa wrap-peg ;
: range ( min max -- parser ) : range ( min max -- parser )
range-parser boa init-parser ; range-parser boa wrap-peg ;
: seq ( seq -- parser ) : seq ( seq -- parser )
seq-parser boa init-parser ; seq-parser boa wrap-peg ;
: 2seq ( parser1 parser2 -- parser ) : 2seq ( parser1 parser2 -- parser )
2array seq ; 2array seq ;
@ -505,7 +560,7 @@ PRIVATE>
{ } make seq ; inline { } make seq ; inline
: choice ( seq -- parser ) : choice ( seq -- parser )
choice-parser boa init-parser ; choice-parser boa wrap-peg ;
: 2choice ( parser1 parser2 -- parser ) : 2choice ( parser1 parser2 -- parser )
2array choice ; 2array choice ;
@ -520,38 +575,38 @@ PRIVATE>
{ } make choice ; inline { } make choice ; inline
: repeat0 ( parser -- parser ) : repeat0 ( parser -- parser )
repeat0-parser boa init-parser ; repeat0-parser boa wrap-peg ;
: repeat1 ( parser -- parser ) : repeat1 ( parser -- parser )
repeat1-parser boa init-parser ; repeat1-parser boa wrap-peg ;
: optional ( parser -- parser ) : optional ( parser -- parser )
optional-parser boa init-parser ; optional-parser boa wrap-peg ;
: semantic ( parser quot -- parser ) : semantic ( parser quot -- parser )
semantic-parser boa init-parser ; semantic-parser boa wrap-peg ;
: ensure ( parser -- parser ) : ensure ( parser -- parser )
ensure-parser boa init-parser ; ensure-parser boa wrap-peg ;
: ensure-not ( parser -- parser ) : ensure-not ( parser -- parser )
ensure-not-parser boa init-parser ; ensure-not-parser boa wrap-peg ;
: action ( parser quot -- parser ) : action ( parser quot -- parser )
action-parser boa init-parser ; action-parser boa wrap-peg ;
: sp ( parser -- parser ) : sp ( parser -- parser )
sp-parser boa init-parser ; sp-parser boa wrap-peg ;
: hide ( parser -- parser ) : hide ( parser -- parser )
[ drop ignore ] action ; [ drop ignore ] action ;
: delay ( quot -- parser ) : delay ( quot -- parser )
delay-parser boa init-parser ; delay-parser boa wrap-peg ;
: box ( quot -- parser ) : box ( quot -- parser )
#! because a box has its quotation run at compile time #! because a box has its quotation run at compile time
#! it must always have a new parser delgate created, #! it must always have a new parser wrapper created,
#! not a cached one. This is because the same box, #! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word #! compiled twice can have a different compiled word
#! due to running at compile time. #! due to running at compile time.
@ -561,7 +616,7 @@ PRIVATE>
#! parse. The action adds an indirection with a parser type #! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how #! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed... #! to fix boxes so this isn't needed...
box-parser boa next-id f <parser> over set-delegate [ ] action ; box-parser boa f next-id parser boa [ ] action ;
ERROR: parse-failed input word ; ERROR: parse-failed input word ;

View File

@ -6,39 +6,39 @@ USING: kernel tools.test peg peg.ebnf peg.pl0
IN: peg.pl0.tests IN: peg.pl0.tests
{ t } [ { t } [
"CONST foo = 1;" "block" \ pl0 rule parse remaining>> empty? "CONST foo = 1;" "block" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"VAR foo;" "block" \ pl0 rule parse remaining>> empty? "VAR foo;" "block" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"VAR foo,bar , baz;" "block" \ pl0 rule parse remaining>> empty? "VAR foo,bar , baz;" "block" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"foo := 5" "statement" \ pl0 rule parse remaining>> empty? "foo := 5" "statement" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"BEGIN foo := 5 END" "statement" \ pl0 rule parse remaining>> empty? "BEGIN foo := 5 END" "statement" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"IF 1=1 THEN foo := 5" "statement" \ pl0 rule parse remaining>> empty? "IF 1=1 THEN foo := 5" "statement" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"WHILE 1=1 DO foo := 5" "statement" \ pl0 rule parse remaining>> empty? "WHILE 1=1 DO foo := 5" "statement" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule parse remaining>> empty? "WHILE ODD 1 DO foo := 5" "statement" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
"PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule parse remaining>> empty? "PROCEDURE square; BEGIN squ:=x*x END" "block" \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ t } [ { t } [
@ -58,7 +58,7 @@ BEGIN
x := x + 1; x := x + 1;
END END
END. END.
"> pl0 remaining>> empty? "> main \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test
{ f } [ { f } [
@ -124,5 +124,5 @@ BEGIN
y := 36; y := 36;
CALL gcd; CALL gcd;
END. END.
"> pl0 remaining>> empty? "> main \ pl0 rule (parse) remaining>> empty?
] unit-test ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2006 Chris Double. ! Copyright (C) 2006 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel math io io.streams.string sequences strings USING: kernel math io io.streams.string sequences strings
combinators peg memoize arrays ; combinators peg memoize arrays continuations ;
IN: peg.search IN: peg.search
: tree-write ( object -- ) : tree-write ( object -- )
@ -16,15 +16,12 @@ MEMO: any-char-parser ( -- parser )
[ drop t ] satisfy ; [ drop t ] satisfy ;
: search ( string parser -- seq ) : search ( string parser -- seq )
any-char-parser [ drop f ] action 2array choice repeat0 parse dup [ any-char-parser [ drop f ] action 2array choice repeat0
parse-result-ast sift [ parse sift ] [ 3drop { } ] recover ;
] [
drop { }
] if ;
: (replace) ( string parser -- seq ) : (replace) ( string parser -- seq )
any-char-parser 2array choice repeat0 parse parse-result-ast sift ; any-char-parser 2array choice repeat0 parse sift ;
: replace ( string parser -- result ) : replace ( string parser -- result )
[ (replace) [ tree-write ] each ] with-string-writer ; [ (replace) [ tree-write ] each ] with-string-writer ;