Merge branch 'master' of git://double.co.nz/git/factor
commit
18a9bc3355
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
!
|
!
|
||||||
USING: kernel tools.test peg peg.ebnf compiler.units ;
|
USING: kernel tools.test peg peg.ebnf ;
|
||||||
IN: peg.ebnf.tests
|
IN: peg.ebnf.tests
|
||||||
|
|
||||||
{ T{ ebnf-non-terminal f "abc" } } [
|
{ T{ ebnf-non-terminal f "abc" } } [
|
||||||
|
@ -109,13 +109,37 @@ IN: peg.ebnf.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
{ V{ "a" "b" } } [
|
{ V{ "a" "b" } } [
|
||||||
"foo='a' 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast
|
"ab" [EBNF foo='a' 'b' EBNF] call parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
{ V{ 1 "b" } } [
|
{ V{ 1 "b" } } [
|
||||||
"foo=('a')[[ drop 1 ]] 'b'" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast
|
"ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF] call parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
{ V{ 1 2 } } [
|
{ V{ 1 2 } } [
|
||||||
"foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]]" ebnf>quot with-compilation-unit "ab" foo parse parse-result-ast
|
"ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ CHAR: A } [
|
||||||
|
"A" [EBNF foo=[A-Z] EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ CHAR: Z } [
|
||||||
|
"Z" [EBNF foo=[A-Z] EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ f } [
|
||||||
|
"0" [EBNF foo=[A-Z] EBNF] call
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ CHAR: 0 } [
|
||||||
|
"0" [EBNF foo=[^A-Z] EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ f } [
|
||||||
|
"A" [EBNF foo=[^A-Z] EBNF] call
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ f } [
|
||||||
|
"Z" [EBNF foo=[^A-Z] EBNF] call
|
||||||
] unit-test
|
] unit-test
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel parser words arrays strings math.parser sequences
|
USING: kernel compiler.units parser words arrays strings math.parser sequences
|
||||||
quotations vectors namespaces math assocs continuations peg
|
quotations vectors namespaces math assocs continuations peg
|
||||||
peg.parsers unicode.categories multiline combinators.lib
|
peg.parsers unicode.categories multiline combinators.lib
|
||||||
splitting ;
|
splitting ;
|
||||||
|
@ -9,6 +9,8 @@ IN: peg.ebnf
|
||||||
TUPLE: ebnf-non-terminal symbol ;
|
TUPLE: ebnf-non-terminal symbol ;
|
||||||
TUPLE: ebnf-terminal symbol ;
|
TUPLE: ebnf-terminal symbol ;
|
||||||
TUPLE: ebnf-any-character ;
|
TUPLE: ebnf-any-character ;
|
||||||
|
TUPLE: ebnf-range pattern ;
|
||||||
|
TUPLE: ebnf-ensure group ;
|
||||||
TUPLE: ebnf-ensure-not group ;
|
TUPLE: ebnf-ensure-not group ;
|
||||||
TUPLE: ebnf-choice options ;
|
TUPLE: ebnf-choice options ;
|
||||||
TUPLE: ebnf-sequence elements ;
|
TUPLE: ebnf-sequence elements ;
|
||||||
|
@ -22,6 +24,8 @@ TUPLE: ebnf rules ;
|
||||||
C: <ebnf-non-terminal> ebnf-non-terminal
|
C: <ebnf-non-terminal> ebnf-non-terminal
|
||||||
C: <ebnf-terminal> ebnf-terminal
|
C: <ebnf-terminal> ebnf-terminal
|
||||||
C: <ebnf-any-character> ebnf-any-character
|
C: <ebnf-any-character> ebnf-any-character
|
||||||
|
C: <ebnf-range> ebnf-range
|
||||||
|
C: <ebnf-ensure> ebnf-ensure
|
||||||
C: <ebnf-ensure-not> ebnf-ensure-not
|
C: <ebnf-ensure-not> ebnf-ensure-not
|
||||||
C: <ebnf-choice> ebnf-choice
|
C: <ebnf-choice> ebnf-choice
|
||||||
C: <ebnf-sequence> ebnf-sequence
|
C: <ebnf-sequence> ebnf-sequence
|
||||||
|
@ -32,84 +36,6 @@ C: <ebnf-rule> ebnf-rule
|
||||||
C: <ebnf-action> ebnf-action
|
C: <ebnf-action> ebnf-action
|
||||||
C: <ebnf> ebnf
|
C: <ebnf> ebnf
|
||||||
|
|
||||||
SYMBOL: parsers
|
|
||||||
SYMBOL: non-terminals
|
|
||||||
|
|
||||||
: reset-parser-generation ( -- )
|
|
||||||
V{ } clone parsers set
|
|
||||||
H{ } clone non-terminals set ;
|
|
||||||
|
|
||||||
: store-parser ( parser -- number )
|
|
||||||
parsers get [ push ] keep length 1- ;
|
|
||||||
|
|
||||||
: get-parser ( index -- parser )
|
|
||||||
parsers get nth ;
|
|
||||||
|
|
||||||
: non-terminal-index ( name -- number )
|
|
||||||
dup non-terminals get at [
|
|
||||||
nip
|
|
||||||
] [
|
|
||||||
f store-parser [ swap non-terminals get set-at ] keep
|
|
||||||
] if* ;
|
|
||||||
|
|
||||||
GENERIC: (generate-parser) ( ast -- id )
|
|
||||||
|
|
||||||
: generate-parser ( ast -- id )
|
|
||||||
(generate-parser) ;
|
|
||||||
|
|
||||||
M: ebnf-terminal (generate-parser) ( ast -- id )
|
|
||||||
ebnf-terminal-symbol token sp store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-non-terminal (generate-parser) ( ast -- id )
|
|
||||||
[
|
|
||||||
ebnf-non-terminal-symbol dup non-terminal-index ,
|
|
||||||
parsers get , \ nth , [ search ] [ 2drop f ] recover , \ or ,
|
|
||||||
] [ ] make delay sp store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-any-character (generate-parser) ( ast -- id )
|
|
||||||
drop [ drop t ] satisfy store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-choice (generate-parser) ( ast -- id )
|
|
||||||
ebnf-choice-options [
|
|
||||||
generate-parser get-parser
|
|
||||||
] map choice store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-sequence (generate-parser) ( ast -- id )
|
|
||||||
ebnf-sequence-elements [
|
|
||||||
generate-parser get-parser
|
|
||||||
] map seq store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-ensure-not (generate-parser) ( ast -- id )
|
|
||||||
ebnf-ensure-not-group generate-parser get-parser ensure-not store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-repeat0 (generate-parser) ( ast -- id )
|
|
||||||
ebnf-repeat0-group generate-parser get-parser repeat0 store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-repeat1 (generate-parser) ( ast -- id )
|
|
||||||
ebnf-repeat1-group generate-parser get-parser repeat1 store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-optional (generate-parser) ( ast -- id )
|
|
||||||
ebnf-optional-elements generate-parser get-parser optional store-parser ;
|
|
||||||
|
|
||||||
M: ebnf-rule (generate-parser) ( ast -- id )
|
|
||||||
dup ebnf-rule-symbol non-terminal-index swap
|
|
||||||
ebnf-rule-elements generate-parser get-parser ! nt-id body
|
|
||||||
swap [ parsers get set-nth ] keep ;
|
|
||||||
|
|
||||||
M: ebnf-action (generate-parser) ( ast -- id )
|
|
||||||
[ ebnf-action-parser generate-parser get-parser ] keep
|
|
||||||
ebnf-action-code string-lines parse-lines action store-parser ;
|
|
||||||
|
|
||||||
M: vector (generate-parser) ( ast -- id )
|
|
||||||
[ generate-parser ] map peek ;
|
|
||||||
|
|
||||||
M: ebnf (generate-parser) ( ast -- id )
|
|
||||||
ebnf-rules [
|
|
||||||
generate-parser
|
|
||||||
] map peek ;
|
|
||||||
|
|
||||||
DEFER: 'rhs'
|
|
||||||
|
|
||||||
: syntax ( string -- parser )
|
: syntax ( string -- parser )
|
||||||
#! Parses the string, ignoring white space, and
|
#! Parses the string, ignoring white space, and
|
||||||
#! does not put the result in the AST.
|
#! does not put the result in the AST.
|
||||||
|
@ -149,6 +75,7 @@ DEFER: 'rhs'
|
||||||
[ dup CHAR: [ = ]
|
[ dup CHAR: [ = ]
|
||||||
[ dup CHAR: . = ]
|
[ dup CHAR: . = ]
|
||||||
[ dup CHAR: ! = ]
|
[ dup CHAR: ! = ]
|
||||||
|
[ dup CHAR: & = ]
|
||||||
[ dup CHAR: * = ]
|
[ dup CHAR: * = ]
|
||||||
[ dup CHAR: + = ]
|
[ dup CHAR: + = ]
|
||||||
[ dup CHAR: ? = ]
|
[ dup CHAR: ? = ]
|
||||||
|
@ -164,6 +91,14 @@ DEFER: 'rhs'
|
||||||
#! A parser to match the symbol for any character match.
|
#! A parser to match the symbol for any character match.
|
||||||
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
|
[ CHAR: . = ] satisfy [ drop <ebnf-any-character> ] action ;
|
||||||
|
|
||||||
|
: 'range-parser' ( -- parser )
|
||||||
|
#! Match the syntax for declaring character ranges
|
||||||
|
[
|
||||||
|
[ "[" syntax , "[" token ensure-not , ] seq* hide ,
|
||||||
|
[ CHAR: ] = not ] satisfy repeat1 ,
|
||||||
|
"]" syntax ,
|
||||||
|
] seq* [ first >string <ebnf-range> ] action ;
|
||||||
|
|
||||||
: 'element' ( -- parser )
|
: 'element' ( -- parser )
|
||||||
#! An element of a rule. It can be a terminal or a
|
#! An element of a rule. It can be a terminal or a
|
||||||
#! non-terminal but must not be followed by a "=".
|
#! non-terminal but must not be followed by a "=".
|
||||||
|
@ -173,6 +108,7 @@ DEFER: 'rhs'
|
||||||
[
|
[
|
||||||
'non-terminal' ,
|
'non-terminal' ,
|
||||||
'terminal' ,
|
'terminal' ,
|
||||||
|
'range-parser' ,
|
||||||
'any-character' ,
|
'any-character' ,
|
||||||
] choice* ,
|
] choice* ,
|
||||||
"=" syntax ensure-not ,
|
"=" syntax ensure-not ,
|
||||||
|
@ -194,7 +130,6 @@ DEFER: 'choice'
|
||||||
"*" token sp ensure-not ,
|
"*" token sp ensure-not ,
|
||||||
"+" token sp ensure-not ,
|
"+" token sp ensure-not ,
|
||||||
"?" token sp ensure-not ,
|
"?" token sp ensure-not ,
|
||||||
"[[" token sp ensure-not ,
|
|
||||||
] seq* hide grouped ;
|
] seq* hide grouped ;
|
||||||
|
|
||||||
: 'repeat0' ( -- parser )
|
: 'repeat0' ( -- parser )
|
||||||
|
@ -212,13 +147,6 @@ DEFER: 'choice'
|
||||||
[ drop t ] satisfy ,
|
[ drop t ] satisfy ,
|
||||||
] seq* [ first ] action repeat0 [ >string ] action ;
|
] seq* [ first ] action repeat0 [ >string ] action ;
|
||||||
|
|
||||||
: 'action' ( -- parser )
|
|
||||||
[
|
|
||||||
"(" [ 'choice' sp ] delay ")" syntax-pack ,
|
|
||||||
"[[" 'factor-code' "]]" syntax-pack ,
|
|
||||||
] seq* [ first2 <ebnf-action> ] action ;
|
|
||||||
|
|
||||||
|
|
||||||
: 'ensure-not' ( -- parser )
|
: 'ensure-not' ( -- parser )
|
||||||
#! Parses the '!' syntax to ensure that
|
#! Parses the '!' syntax to ensure that
|
||||||
#! something that matches the following elements do
|
#! something that matches the following elements do
|
||||||
|
@ -228,17 +156,37 @@ DEFER: 'choice'
|
||||||
'group' sp ,
|
'group' sp ,
|
||||||
] seq* [ first <ebnf-ensure-not> ] action ;
|
] seq* [ first <ebnf-ensure-not> ] action ;
|
||||||
|
|
||||||
: 'sequence' ( -- parser )
|
: 'ensure' ( -- parser )
|
||||||
|
#! Parses the '&' syntax to ensure that
|
||||||
|
#! something that matches the following elements does
|
||||||
|
#! exist in the parse stream.
|
||||||
|
[
|
||||||
|
"&" syntax ,
|
||||||
|
'group' sp ,
|
||||||
|
] seq* [ first <ebnf-ensure> ] action ;
|
||||||
|
|
||||||
|
: ('sequence') ( -- parser )
|
||||||
#! A sequence of terminals and non-terminals, including
|
#! A sequence of terminals and non-terminals, including
|
||||||
#! groupings of those.
|
#! groupings of those.
|
||||||
[
|
[
|
||||||
'ensure-not' sp ,
|
'ensure-not' sp ,
|
||||||
|
'ensure' sp ,
|
||||||
'element' sp ,
|
'element' sp ,
|
||||||
'group' sp ,
|
'group' sp ,
|
||||||
'repeat0' sp ,
|
'repeat0' sp ,
|
||||||
'repeat1' sp ,
|
'repeat1' sp ,
|
||||||
'optional' sp ,
|
'optional' sp ,
|
||||||
'action' sp ,
|
] choice* ;
|
||||||
|
|
||||||
|
: 'sequence' ( -- parser )
|
||||||
|
#! A sequence of terminals and non-terminals, including
|
||||||
|
#! groupings of those.
|
||||||
|
[
|
||||||
|
[
|
||||||
|
('sequence') ,
|
||||||
|
"[[" 'factor-code' "]]" syntax-pack ,
|
||||||
|
] seq* [ first2 <ebnf-action> ] action ,
|
||||||
|
('sequence') ,
|
||||||
] choice* repeat1 [
|
] choice* repeat1 [
|
||||||
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
dup length 1 = [ first ] [ <ebnf-sequence> ] if
|
||||||
] action ;
|
] action ;
|
||||||
|
@ -258,25 +206,84 @@ DEFER: 'choice'
|
||||||
: 'ebnf' ( -- parser )
|
: 'ebnf' ( -- parser )
|
||||||
'rule' sp repeat1 [ <ebnf> ] action ;
|
'rule' sp repeat1 [ <ebnf> ] action ;
|
||||||
|
|
||||||
: ebnf>quot ( string -- quot )
|
GENERIC: (transform) ( ast -- parser )
|
||||||
'ebnf' parse [
|
|
||||||
parse-result-ast [
|
SYMBOL: parser
|
||||||
reset-parser-generation
|
SYMBOL: main
|
||||||
generate-parser drop
|
|
||||||
[
|
: transform ( ast -- object )
|
||||||
non-terminals get
|
H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
|
||||||
[
|
|
||||||
get-parser [
|
M: ebnf (transform) ( ast -- parser )
|
||||||
swap , \ in , \ get , \ create ,
|
ebnf-rules [ (transform) ] map peek ;
|
||||||
1quotation , \ define ,
|
|
||||||
] [
|
M: ebnf-rule (transform) ( ast -- parser )
|
||||||
drop
|
dup ebnf-rule-elements (transform) [
|
||||||
] if*
|
swap ebnf-rule-symbol set
|
||||||
] assoc-each
|
] keep ;
|
||||||
] [ ] make
|
|
||||||
] with-scope
|
M: ebnf-sequence (transform) ( ast -- parser )
|
||||||
] [
|
ebnf-sequence-elements [ (transform) ] map seq ;
|
||||||
f
|
|
||||||
] if* ;
|
M: ebnf-choice (transform) ( ast -- parser )
|
||||||
|
ebnf-choice-options [ (transform) ] map choice ;
|
||||||
|
|
||||||
|
M: ebnf-any-character (transform) ( ast -- parser )
|
||||||
|
drop any-char ;
|
||||||
|
|
||||||
|
M: ebnf-range (transform) ( ast -- parser )
|
||||||
|
ebnf-range-pattern range-pattern ;
|
||||||
|
|
||||||
|
M: ebnf-ensure (transform) ( ast -- parser )
|
||||||
|
ebnf-ensure-group (transform) ensure ;
|
||||||
|
|
||||||
|
M: ebnf-ensure-not (transform) ( ast -- parser )
|
||||||
|
ebnf-ensure-not-group (transform) ensure-not ;
|
||||||
|
|
||||||
|
M: ebnf-repeat0 (transform) ( ast -- parser )
|
||||||
|
ebnf-repeat0-group (transform) repeat0 ;
|
||||||
|
|
||||||
|
M: ebnf-repeat1 (transform) ( ast -- parser )
|
||||||
|
ebnf-repeat1-group (transform) repeat1 ;
|
||||||
|
|
||||||
|
M: ebnf-optional (transform) ( ast -- parser )
|
||||||
|
ebnf-optional-elements (transform) optional ;
|
||||||
|
|
||||||
|
M: ebnf-action (transform) ( ast -- parser )
|
||||||
|
[ ebnf-action-parser (transform) ] keep
|
||||||
|
ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ;
|
||||||
|
|
||||||
|
M: ebnf-terminal (transform) ( ast -- parser )
|
||||||
|
ebnf-terminal-symbol token sp ;
|
||||||
|
|
||||||
|
M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
|
ebnf-non-terminal-symbol [
|
||||||
|
, parser get , \ at ,
|
||||||
|
] [ ] make delay sp ;
|
||||||
|
|
||||||
|
: transform-ebnf ( string -- object )
|
||||||
|
'ebnf' parse parse-result-ast transform ;
|
||||||
|
|
||||||
|
: check-parse-result ( result -- result )
|
||||||
|
dup [
|
||||||
|
dup parse-result-remaining empty? [
|
||||||
|
[
|
||||||
|
"Unable to fully parse EBNF. Left to parse was: " %
|
||||||
|
parse-result-remaining %
|
||||||
|
] "" make throw
|
||||||
|
] unless
|
||||||
|
] [
|
||||||
|
"Could not parse EBNF" throw
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: ebnf>quot ( string -- hashtable quot )
|
||||||
|
'ebnf' parse check-parse-result
|
||||||
|
parse-result-ast transform dup main swap at compile ;
|
||||||
|
|
||||||
|
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
||||||
|
|
||||||
|
: EBNF:
|
||||||
|
CREATE-WORD dup
|
||||||
|
";EBNF" parse-multiline-string
|
||||||
|
ebnf>quot swapd define "ebnf-parser" set-word-prop ; parsing
|
||||||
|
|
||||||
: <EBNF "EBNF>" parse-multiline-string ebnf>quot call ; parsing
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
! Copyright (C) 2007 Chris Double.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
!
|
||||||
|
USING: kernel tools.test peg peg.expr multiline sequences ;
|
||||||
|
IN: peg.expr.tests
|
||||||
|
|
||||||
|
{ 5 } [
|
||||||
|
"2+3" eval-expr
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ 6 } [
|
||||||
|
"2*3" eval-expr
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ 14 } [
|
||||||
|
"2+3*4" eval-expr
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ 17 } [
|
||||||
|
"2+3*4+3" eval-expr
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ 23 } [
|
||||||
|
"2+3*(4+3)" eval-expr
|
||||||
|
] unit-test
|
|
@ -9,22 +9,21 @@ IN: peg.expr
|
||||||
#! { operator rhs } in to a tree structure of the correct precedence.
|
#! { operator rhs } in to a tree structure of the correct precedence.
|
||||||
swap [ first2 swap call ] reduce ;
|
swap [ first2 swap call ] reduce ;
|
||||||
|
|
||||||
<EBNF
|
EBNF: expr
|
||||||
|
times = "*" [[ drop [ * ] ]]
|
||||||
|
divide = "/" [[ drop [ / ] ]]
|
||||||
|
add = "+" [[ drop [ + ] ]]
|
||||||
|
subtract = "-" [[ drop [ - ] ]]
|
||||||
|
|
||||||
times = ("*") [[ drop [ * ] ]]
|
digit = [0-9] [[ digit> ]]
|
||||||
divide = ("/") [[ drop [ / ] ]]
|
number = (digit)+ [[ unclip [ swap 10 * + ] reduce ]]
|
||||||
add = ("+") [[ drop [ + ] ]]
|
|
||||||
subtract = ("-") [[ drop [ - ] ]]
|
|
||||||
|
|
||||||
digit = "0" | "1" | "2" | "3" | "4" |
|
|
||||||
"5" | "6" | "7" | "8" | "9"
|
|
||||||
number = ((digit)+) [[ concat string>number ]]
|
|
||||||
|
|
||||||
value = number | ("(" expr ")") [[ second ]]
|
value = number | ("(" expr ")") [[ second ]]
|
||||||
product = (value ((times | divide) value)*) [[ first2 operator-fold ]]
|
product = (value ((times | divide) value)*) [[ first2 operator-fold ]]
|
||||||
sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]]
|
sum = (product ((add | subtract) product)*) [[ first2 operator-fold ]]
|
||||||
expr = sum
|
expr = sum
|
||||||
EBNF>
|
;EBNF
|
||||||
|
|
||||||
: eval-expr ( string -- number )
|
: eval-expr ( string -- number )
|
||||||
expr parse parse-result-ast ;
|
expr parse-result-ast ;
|
||||||
|
|
||||||
|
|
|
@ -159,3 +159,21 @@ HELP: 'string'
|
||||||
} { $description
|
} { $description
|
||||||
"Returns a parser that matches an string composed of a \", anything that is not \", and another \"."
|
"Returns a parser that matches an string composed of a \", anything that is not \", and another \"."
|
||||||
} { $see-also 'integer' } ;
|
} { $see-also 'integer' } ;
|
||||||
|
|
||||||
|
HELP: range-pattern
|
||||||
|
{ $values
|
||||||
|
{ "pattern" "a string" }
|
||||||
|
{ "parser" "a parser" }
|
||||||
|
} { $description
|
||||||
|
"Returns a parser that matches a single character based on the set "
|
||||||
|
"of characters in the pattern string."
|
||||||
|
"Any single character in the pattern matches that character. "
|
||||||
|
"If the pattern begins with a ^ then the set is negated "
|
||||||
|
"(the element matches any character not in the set). Any pair "
|
||||||
|
"of characters separated with a dash (-) represents the "
|
||||||
|
"range of characters from the first to the second, inclusive."
|
||||||
|
{ $examples
|
||||||
|
{ $example "USING: peg peg.parsers prettyprint ;" "\"a\" \"_a-zA-Z\" range-pattern parse parse-result-ast 1string ." "\"a\"" }
|
||||||
|
{ $example "USING: peg peg.parsers prettyprint ;" "\"0\" \"^0-9\" range-pattern parse ." "f" }
|
||||||
|
}
|
||||||
|
} ;
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
! Copyright (C) 2007, 2008 Chris Double, Doug Coleman.
|
! Copyright (C) 2007, 2008 Chris Double, Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel sequences strings namespaces math assocs shuffle
|
USING: kernel sequences strings namespaces math assocs shuffle
|
||||||
vectors arrays combinators.lib memoize math.parser match
|
vectors arrays combinators.lib math.parser match
|
||||||
unicode.categories sequences.deep peg peg.private ;
|
unicode.categories sequences.deep peg peg.private
|
||||||
|
peg.search math.ranges ;
|
||||||
IN: peg.parsers
|
IN: peg.parsers
|
||||||
|
|
||||||
TUPLE: just-parser p1 ;
|
TUPLE: just-parser p1 ;
|
||||||
|
@ -18,26 +19,26 @@ TUPLE: just-parser p1 ;
|
||||||
M: just-parser compile ( parser -- quot )
|
M: just-parser compile ( parser -- quot )
|
||||||
just-parser-p1 compile just-pattern append ;
|
just-parser-p1 compile just-pattern append ;
|
||||||
|
|
||||||
MEMO: just ( parser -- parser )
|
: just ( parser -- parser )
|
||||||
just-parser construct-boa init-parser ;
|
just-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: 1token ( ch -- parser ) 1string token ;
|
: 1token ( ch -- parser ) 1string token ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
MEMO: (list-of) ( items separator repeat1? -- parser )
|
: (list-of) ( items separator repeat1? -- parser )
|
||||||
>r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq
|
>r over 2seq r> [ repeat1 ] [ repeat0 ] if [ concat ] action 2seq
|
||||||
[ unclip 1vector swap first append ] action ;
|
[ unclip 1vector swap first append ] action ;
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
MEMO: list-of ( items separator -- parser )
|
: list-of ( items separator -- parser )
|
||||||
hide f (list-of) ;
|
hide f (list-of) ;
|
||||||
|
|
||||||
MEMO: list-of-many ( items separator -- parser )
|
: list-of-many ( items separator -- parser )
|
||||||
hide t (list-of) ;
|
hide t (list-of) ;
|
||||||
|
|
||||||
MEMO: epsilon ( -- parser ) V{ } token ;
|
: epsilon ( -- parser ) V{ } token ;
|
||||||
|
|
||||||
MEMO: any-char ( -- parser ) [ drop t ] satisfy ;
|
: any-char ( -- parser ) [ drop t ] satisfy ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
@ -46,10 +47,10 @@ MEMO: any-char ( -- parser ) [ drop t ] satisfy ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
MEMO: exactly-n ( parser n -- parser' )
|
: exactly-n ( parser n -- parser' )
|
||||||
swap <repetition> seq ;
|
swap <repetition> seq ;
|
||||||
|
|
||||||
MEMO: at-most-n ( parser n -- parser' )
|
: at-most-n ( parser n -- parser' )
|
||||||
dup zero? [
|
dup zero? [
|
||||||
2drop epsilon
|
2drop epsilon
|
||||||
] [
|
] [
|
||||||
|
@ -57,29 +58,56 @@ MEMO: at-most-n ( parser n -- parser' )
|
||||||
-rot 1- at-most-n 2choice
|
-rot 1- at-most-n 2choice
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
MEMO: at-least-n ( parser n -- parser' )
|
: at-least-n ( parser n -- parser' )
|
||||||
dupd exactly-n swap repeat0 2seq
|
dupd exactly-n swap repeat0 2seq
|
||||||
[ flatten-vectors ] action ;
|
[ flatten-vectors ] action ;
|
||||||
|
|
||||||
MEMO: from-m-to-n ( parser m n -- parser' )
|
: from-m-to-n ( parser m n -- parser' )
|
||||||
>r [ exactly-n ] 2keep r> swap - at-most-n 2seq
|
>r [ exactly-n ] 2keep r> swap - at-most-n 2seq
|
||||||
[ flatten-vectors ] action ;
|
[ flatten-vectors ] action ;
|
||||||
|
|
||||||
MEMO: pack ( begin body end -- parser )
|
: pack ( begin body end -- parser )
|
||||||
>r >r hide r> r> hide 3seq [ first ] action ;
|
>r >r hide r> r> hide 3seq [ first ] action ;
|
||||||
|
|
||||||
MEMO: surrounded-by ( parser begin end -- parser' )
|
: surrounded-by ( parser begin end -- parser' )
|
||||||
[ token ] 2apply swapd pack ;
|
[ token ] 2apply swapd pack ;
|
||||||
|
|
||||||
MEMO: 'digit' ( -- parser )
|
: 'digit' ( -- parser )
|
||||||
[ digit? ] satisfy [ digit> ] action ;
|
[ digit? ] satisfy [ digit> ] action ;
|
||||||
|
|
||||||
MEMO: 'integer' ( -- parser )
|
: 'integer' ( -- parser )
|
||||||
'digit' repeat1 [ 10 digits>integer ] action ;
|
'digit' repeat1 [ 10 digits>integer ] action ;
|
||||||
|
|
||||||
MEMO: 'string' ( -- parser )
|
: 'string' ( -- parser )
|
||||||
[
|
[
|
||||||
[ CHAR: " = ] satisfy hide ,
|
[ CHAR: " = ] satisfy hide ,
|
||||||
[ CHAR: " = not ] satisfy repeat0 ,
|
[ CHAR: " = not ] satisfy repeat0 ,
|
||||||
[ CHAR: " = ] satisfy hide ,
|
[ CHAR: " = ] satisfy hide ,
|
||||||
] { } make seq [ first >string ] action ;
|
] { } make seq [ first >string ] action ;
|
||||||
|
|
||||||
|
: (range-pattern) ( pattern -- string )
|
||||||
|
#! Given a range pattern, produce a string containing
|
||||||
|
#! all characters within that range.
|
||||||
|
[
|
||||||
|
any-char ,
|
||||||
|
[ CHAR: - = ] satisfy hide ,
|
||||||
|
any-char ,
|
||||||
|
] seq* [
|
||||||
|
first2 [a,b] >string
|
||||||
|
] action
|
||||||
|
replace ;
|
||||||
|
|
||||||
|
: range-pattern ( pattern -- parser )
|
||||||
|
#! 'pattern' is a set of characters describing the
|
||||||
|
#! parser to be produced. Any single character in
|
||||||
|
#! the pattern matches that character. If the pattern
|
||||||
|
#! begins with a ^ then the set is negated (the element
|
||||||
|
#! matches any character not in the set). Any pair of
|
||||||
|
#! characters separated with a dash (-) represents the
|
||||||
|
#! range of characters from the first to the second,
|
||||||
|
#! inclusive.
|
||||||
|
dup first CHAR: ^ = [
|
||||||
|
1 tail (range-pattern) [ member? not ] curry satisfy
|
||||||
|
] [
|
||||||
|
(range-pattern) [ member? ] curry satisfy
|
||||||
|
] if ;
|
||||||
|
|
|
@ -4,10 +4,6 @@
|
||||||
USING: kernel tools.test strings namespaces arrays sequences peg peg.private ;
|
USING: kernel tools.test strings namespaces arrays sequences peg peg.private ;
|
||||||
IN: peg.tests
|
IN: peg.tests
|
||||||
|
|
||||||
{ 0 1 2 } [
|
|
||||||
0 next-id set-global get-next-id get-next-id get-next-id
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
{ f } [
|
{ f } [
|
||||||
"endbegin" "begin" token parse
|
"endbegin" "begin" token parse
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel sequences strings namespaces math assocs shuffle
|
USING: kernel sequences strings namespaces math assocs shuffle
|
||||||
vectors arrays combinators.lib memoize math.parser match
|
vectors arrays combinators.lib math.parser match
|
||||||
unicode.categories sequences.lib compiler.units parser
|
unicode.categories sequences.lib compiler.units parser
|
||||||
words ;
|
words ;
|
||||||
IN: peg
|
IN: peg
|
||||||
|
@ -10,70 +10,14 @@ TUPLE: parse-result remaining ast ;
|
||||||
|
|
||||||
GENERIC: compile ( parser -- quot )
|
GENERIC: compile ( parser -- quot )
|
||||||
|
|
||||||
: (parse) ( state parser -- result )
|
: parse ( state parser -- result )
|
||||||
compile call ;
|
compile call ;
|
||||||
|
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
|
|
||||||
SYMBOL: packrat-cache
|
|
||||||
SYMBOL: ignore
|
SYMBOL: ignore
|
||||||
SYMBOL: not-in-cache
|
|
||||||
|
|
||||||
: not-in-cache? ( result -- ? )
|
|
||||||
not-in-cache = ;
|
|
||||||
|
|
||||||
: <parse-result> ( remaining ast -- parse-result )
|
: <parse-result> ( remaining ast -- parse-result )
|
||||||
parse-result construct-boa ;
|
parse-result construct-boa ;
|
||||||
|
|
||||||
SYMBOL: next-id
|
|
||||||
|
|
||||||
: get-next-id ( -- number )
|
|
||||||
next-id get-global 0 or dup 1+ next-id set-global ;
|
|
||||||
|
|
||||||
TUPLE: parser id ;
|
|
||||||
|
|
||||||
: init-parser ( parser -- parser )
|
|
||||||
get-next-id parser construct-boa over set-delegate ;
|
|
||||||
|
|
||||||
: from ( slice-or-string -- index )
|
|
||||||
dup slice? [ slice-from ] [ drop 0 ] if ;
|
|
||||||
|
|
||||||
: get-cached ( input parser -- result )
|
|
||||||
[ from ] dip parser-id packrat-cache get at at* [
|
|
||||||
drop not-in-cache
|
|
||||||
] unless ;
|
|
||||||
|
|
||||||
: put-cached ( result input parser -- )
|
|
||||||
parser-id dup packrat-cache get at [
|
|
||||||
nip
|
|
||||||
] [
|
|
||||||
H{ } clone dup >r swap packrat-cache get set-at r>
|
|
||||||
] if*
|
|
||||||
[ from ] dip set-at ;
|
|
||||||
|
|
||||||
PRIVATE>
|
|
||||||
|
|
||||||
: parse ( input parser -- result )
|
|
||||||
packrat-cache get [
|
|
||||||
2dup get-cached dup not-in-cache? [
|
|
||||||
! "cache missed: " write over parser-id number>string write " - " write nl ! pick .
|
|
||||||
drop
|
|
||||||
#! Protect against left recursion blowing the callstack
|
|
||||||
#! by storing a failed parse in the cache.
|
|
||||||
[ f ] dipd [ put-cached ] 2keep
|
|
||||||
[ (parse) dup ] 2keep put-cached
|
|
||||||
] [
|
|
||||||
! "cache hit: " write over parser-id number>string write " - " write nl ! pick .
|
|
||||||
2nip
|
|
||||||
] if
|
|
||||||
] [
|
|
||||||
(parse)
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: packrat-parse ( input parser -- result )
|
|
||||||
H{ } clone packrat-cache [ parse ] with-variable ;
|
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
TUPLE: token-parser symbol ;
|
TUPLE: token-parser symbol ;
|
||||||
|
@ -295,17 +239,17 @@ M: delay-parser compile ( parser -- quot )
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
MEMO: token ( string -- parser )
|
: token ( string -- parser )
|
||||||
token-parser construct-boa init-parser ;
|
token-parser construct-boa ;
|
||||||
|
|
||||||
: satisfy ( quot -- parser )
|
: satisfy ( quot -- parser )
|
||||||
satisfy-parser construct-boa init-parser ;
|
satisfy-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: range ( min max -- parser )
|
: range ( min max -- parser )
|
||||||
range-parser construct-boa init-parser ;
|
range-parser construct-boa ;
|
||||||
|
|
||||||
: seq ( seq -- parser )
|
: seq ( seq -- parser )
|
||||||
seq-parser construct-boa init-parser ;
|
seq-parser construct-boa ;
|
||||||
|
|
||||||
: 2seq ( parser1 parser2 -- parser )
|
: 2seq ( parser1 parser2 -- parser )
|
||||||
2array seq ;
|
2array seq ;
|
||||||
|
@ -320,7 +264,7 @@ MEMO: range ( min max -- parser )
|
||||||
{ } make seq ; inline
|
{ } make seq ; inline
|
||||||
|
|
||||||
: choice ( seq -- parser )
|
: choice ( seq -- parser )
|
||||||
choice-parser construct-boa init-parser ;
|
choice-parser construct-boa ;
|
||||||
|
|
||||||
: 2choice ( parser1 parser2 -- parser )
|
: 2choice ( parser1 parser2 -- parser )
|
||||||
2array choice ;
|
2array choice ;
|
||||||
|
@ -334,32 +278,32 @@ MEMO: range ( min max -- parser )
|
||||||
: choice* ( quot -- paser )
|
: choice* ( quot -- paser )
|
||||||
{ } make choice ; inline
|
{ } make choice ; inline
|
||||||
|
|
||||||
MEMO: repeat0 ( parser -- parser )
|
: repeat0 ( parser -- parser )
|
||||||
repeat0-parser construct-boa init-parser ;
|
repeat0-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: repeat1 ( parser -- parser )
|
: repeat1 ( parser -- parser )
|
||||||
repeat1-parser construct-boa init-parser ;
|
repeat1-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: optional ( parser -- parser )
|
: optional ( parser -- parser )
|
||||||
optional-parser construct-boa init-parser ;
|
optional-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: ensure ( parser -- parser )
|
: ensure ( parser -- parser )
|
||||||
ensure-parser construct-boa init-parser ;
|
ensure-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: ensure-not ( parser -- parser )
|
: ensure-not ( parser -- parser )
|
||||||
ensure-not-parser construct-boa init-parser ;
|
ensure-not-parser construct-boa ;
|
||||||
|
|
||||||
: action ( parser quot -- parser )
|
: action ( parser quot -- parser )
|
||||||
action-parser construct-boa init-parser ;
|
action-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: sp ( parser -- parser )
|
: sp ( parser -- parser )
|
||||||
sp-parser construct-boa init-parser ;
|
sp-parser construct-boa ;
|
||||||
|
|
||||||
MEMO: hide ( parser -- parser )
|
: hide ( parser -- parser )
|
||||||
[ drop ignore ] action ;
|
[ drop ignore ] action ;
|
||||||
|
|
||||||
MEMO: delay ( quot -- parser )
|
: delay ( quot -- parser )
|
||||||
delay-parser construct-boa init-parser ;
|
delay-parser construct-boa ;
|
||||||
|
|
||||||
: PEG:
|
: PEG:
|
||||||
(:) [
|
(:) [
|
||||||
|
|
|
@ -4,14 +4,6 @@
|
||||||
USING: kernel tools.test peg peg.pl0 multiline sequences ;
|
USING: kernel tools.test peg peg.pl0 multiline sequences ;
|
||||||
IN: peg.pl0.tests
|
IN: peg.pl0.tests
|
||||||
|
|
||||||
{ "abc" } [
|
|
||||||
"abc" ident parse parse-result-ast
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
{ 55 } [
|
|
||||||
"55abc" number parse parse-result-ast
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
{ t } [
|
{ t } [
|
||||||
<"
|
<"
|
||||||
VAR x, squ;
|
VAR x, squ;
|
||||||
|
@ -29,7 +21,7 @@ BEGIN
|
||||||
x := x + 1;
|
x := x + 1;
|
||||||
END
|
END
|
||||||
END.
|
END.
|
||||||
"> program parse parse-result-remaining empty?
|
"> pl0 parse-result-remaining empty?
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
{ f } [
|
{ f } [
|
||||||
|
@ -95,5 +87,5 @@ BEGIN
|
||||||
y := 36;
|
y := 36;
|
||||||
CALL gcd;
|
CALL gcd;
|
||||||
END.
|
END.
|
||||||
"> program parse parse-result-remaining empty?
|
"> pl0 parse-result-remaining empty?
|
||||||
] unit-test
|
] unit-test
|
|
@ -1,31 +1,26 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel arrays strings math.parser sequences
|
USING: kernel arrays strings math.parser sequences
|
||||||
peg peg.ebnf peg.parsers memoize namespaces ;
|
peg peg.ebnf peg.parsers memoize namespaces math ;
|
||||||
IN: peg.pl0
|
IN: peg.pl0
|
||||||
|
|
||||||
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
|
#! Grammar for PL/0 based on http://en.wikipedia.org/wiki/PL/0
|
||||||
MEMO: ident ( -- parser )
|
|
||||||
[
|
|
||||||
CHAR: a CHAR: z range ,
|
|
||||||
CHAR: A CHAR: Z range ,
|
|
||||||
] choice* repeat1 [ >string ] action ;
|
|
||||||
|
|
||||||
MEMO: number ( -- parser )
|
EBNF: pl0
|
||||||
CHAR: 0 CHAR: 9 range repeat1 [ string>number ] action ;
|
block = ( "CONST" ident "=" number ( "," ident "=" number )* ";" )?
|
||||||
|
( "VAR" ident ( "," ident )* ";" )?
|
||||||
<EBNF
|
( "PROCEDURE" ident ";" ( block ";" )? )* statement
|
||||||
program = block "."
|
statement = ( ident ":=" expression | "CALL" ident |
|
||||||
block = [ "CONST" ident "=" number { "," ident "=" number } ";" ]
|
"BEGIN" statement (";" statement )* "END" |
|
||||||
[ "VAR" ident { "," ident } ";" ]
|
|
||||||
{ "PROCEDURE" ident ";" [ block ";" ] } statement
|
|
||||||
statement = [ ident ":=" expression | "CALL" ident |
|
|
||||||
"BEGIN" statement {";" statement } "END" |
|
|
||||||
"IF" condition "THEN" statement |
|
"IF" condition "THEN" statement |
|
||||||
"WHILE" condition "DO" statement ]
|
"WHILE" condition "DO" statement )?
|
||||||
condition = "ODD" expression |
|
condition = "ODD" expression |
|
||||||
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
|
expression ("=" | "#" | "<=" | "<" | ">=" | ">") expression
|
||||||
expression = ["+" | "-"] term {("+" | "-") term }
|
expression = ("+" | "-")? term (("+" | "-") term )*
|
||||||
term = factor {("*" | "/") factor }
|
term = factor (("*" | "/") factor )*
|
||||||
factor = ident | number | "(" expression ")"
|
factor = ident | number | "(" expression ")"
|
||||||
EBNF>
|
ident = (([a-zA-Z])+) [[ >string ]]
|
||||||
|
digit = ([0-9]) [[ digit> ]]
|
||||||
|
number = ((digit)+) [[ unclip [ swap 10 * + ] reduce ]]
|
||||||
|
program = block "."
|
||||||
|
;EBNF
|
||||||
|
|
Loading…
Reference in New Issue