Merge branch 'master' of git://double.co.nz/git/factor
commit
4ed86f1d1f
|
@ -144,30 +144,65 @@ IN: peg.ebnf.tests
|
||||||
"Z" [EBNF foo=[^A-Z] EBNF] call
|
"Z" [EBNF foo=[^A-Z] EBNF] call
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
{ V{ V{ 49 } "+" V{ 49 } } } [
|
||||||
#! Test direct left recursion. Currently left recursion should cause a
|
#! Test direct left recursion.
|
||||||
#! failure of that parser.
|
|
||||||
#! Not using packrat, so recursion causes data stack overflow
|
|
||||||
"1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call
|
|
||||||
] must-fail
|
|
||||||
|
|
||||||
{ V{ 49 } } [
|
|
||||||
#! Test direct left recursion. Currently left recursion should cause a
|
|
||||||
#! failure of that parser.
|
|
||||||
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||||
"1+1" [ [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call ] with-packrat parse-result-ast
|
"1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
{ V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
|
||||||
#! Test indirect left recursion. Currently left recursion should cause a
|
#! Test direct left recursion.
|
||||||
#! failure of that parser.
|
|
||||||
#! Not using packrat, so recursion causes data stack overflow
|
|
||||||
"1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call
|
|
||||||
] must-fail
|
|
||||||
|
|
||||||
{ V{ 49 } } [
|
|
||||||
#! Test indirect left recursion. Currently left recursion should cause a
|
|
||||||
#! failure of that parser.
|
|
||||||
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||||
"1+1" [ [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call ] with-packrat parse-result-ast
|
"1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
|
||||||
|
#! Test indirect left recursion.
|
||||||
|
#! Using packrat, so first part of expr fails, causing 2nd choice to be used
|
||||||
|
"1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF] call parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
EBNF: primary
|
||||||
|
Primary = PrimaryNoNewArray
|
||||||
|
PrimaryNoNewArray = ClassInstanceCreationExpression
|
||||||
|
| MethodInvocation
|
||||||
|
| FieldAccess
|
||||||
|
| ArrayAccess
|
||||||
|
| "this"
|
||||||
|
ClassInstanceCreationExpression = "new" ClassOrInterfaceType "(" ")"
|
||||||
|
| Primary "." "new" Identifier "(" ")"
|
||||||
|
MethodInvocation = Primary "." MethodName "(" ")"
|
||||||
|
| MethodName "(" ")"
|
||||||
|
FieldAccess = Primary "." Identifier
|
||||||
|
| "super" "." Identifier
|
||||||
|
ArrayAccess = Primary "[" Expression "]"
|
||||||
|
| ExpressionName "[" Expression "]"
|
||||||
|
ClassOrInterfaceType = ClassName | InterfaceTypeName
|
||||||
|
ClassName = "C" | "D"
|
||||||
|
InterfaceTypeName = "I" | "J"
|
||||||
|
Identifier = "x" | "y" | ClassOrInterfaceType
|
||||||
|
MethodName = "m" | "n"
|
||||||
|
ExpressionName = Identifier
|
||||||
|
Expression = "i" | "j"
|
||||||
|
main = Primary
|
||||||
|
;EBNF
|
||||||
|
|
||||||
|
{ "this" } [
|
||||||
|
"this" primary parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ "this" "." "x" } } [
|
||||||
|
"this.x" primary parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ V{ "this" "." "x" } "." "y" } } [
|
||||||
|
"this.x.y" primary parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ V{ "this" "." "x" } "." "m" "(" ")" } } [
|
||||||
|
"this.x.m()" primary parse-result-ast
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
{ V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [
|
||||||
|
"x[i][j].y" primary parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
|
@ -266,7 +266,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
] [ ] make delay sp ;
|
] [ ] make delay sp ;
|
||||||
|
|
||||||
: transform-ebnf ( string -- object )
|
: transform-ebnf ( string -- object )
|
||||||
'ebnf' packrat-parse parse-result-ast transform ;
|
'ebnf' parse parse-result-ast transform ;
|
||||||
|
|
||||||
: check-parse-result ( result -- result )
|
: check-parse-result ( result -- result )
|
||||||
dup [
|
dup [
|
||||||
|
@ -281,8 +281,8 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: ebnf>quot ( string -- hashtable quot )
|
: ebnf>quot ( string -- hashtable quot )
|
||||||
'ebnf' packrat-parse check-parse-result
|
'ebnf' parse check-parse-result
|
||||||
parse-result-ast transform dup main swap at compile 1quotation ;
|
parse-result-ast transform dup main swap at compile [ parse ] curry ;
|
||||||
|
|
||||||
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@ USING: kernel sequences strings namespaces math assocs shuffle
|
||||||
IN: peg.parsers
|
IN: peg.parsers
|
||||||
|
|
||||||
TUPLE: just-parser p1 ;
|
TUPLE: just-parser p1 ;
|
||||||
M: just-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: just-pattern
|
: just-pattern
|
||||||
[
|
[
|
||||||
|
@ -21,7 +20,7 @@ M: just-parser (compile) ( parser -- quot )
|
||||||
just-parser-p1 compiled-parser just-pattern curry ;
|
just-parser-p1 compiled-parser just-pattern curry ;
|
||||||
|
|
||||||
MEMO: just ( parser -- parser )
|
MEMO: just ( parser -- parser )
|
||||||
just-parser construct-boa ;
|
just-parser construct-boa init-parser ;
|
||||||
|
|
||||||
: 1token ( ch -- parser ) 1string token ;
|
: 1token ( ch -- parser ) 1string token ;
|
||||||
|
|
||||||
|
|
|
@ -12,41 +12,7 @@ HELP: parse
|
||||||
{ $description
|
{ $description
|
||||||
"Given the input string, parse it using the given parser. The result is a <parse-result> object if "
|
"Given the input string, parse it using the given parser. The result is a <parse-result> object if "
|
||||||
"the parse was successful, otherwise it is f." }
|
"the parse was successful, otherwise it is f." }
|
||||||
{ $see-also compile with-packrat packrat-parse } ;
|
{ $see-also compile } ;
|
||||||
|
|
||||||
HELP: with-packrat
|
|
||||||
{ $values
|
|
||||||
{ "quot" "a quotation with stack effect ( input -- result )" }
|
|
||||||
{ "result" "the result of the quotation" }
|
|
||||||
}
|
|
||||||
{ $description
|
|
||||||
"Calls the quotation with a packrat cache in scope. Usually the quotation will "
|
|
||||||
"call " { $link parse } " or call a word produced by " { $link compile } "."
|
|
||||||
"The cache is used to avoid the possible exponential time performace that pegs "
|
|
||||||
"can have, instead giving linear time at the cost of increased memory usage. "
|
|
||||||
"Use of this packrat option also allows direct and indirect recursion to "
|
|
||||||
"be handled in the parser without entering an infinite loop." }
|
|
||||||
{ $see-also compile parse packrat-parse packrat-call } ;
|
|
||||||
|
|
||||||
HELP: packrat-parse
|
|
||||||
{ $values
|
|
||||||
{ "input" "a string" }
|
|
||||||
{ "parser" "a parser" }
|
|
||||||
{ "result" "a parse-result or f" }
|
|
||||||
}
|
|
||||||
{ $description
|
|
||||||
"Compiles and calls the parser with a packrat cache in scope." }
|
|
||||||
{ $see-also compile parse packrat-call with-packrat } ;
|
|
||||||
|
|
||||||
HELP: packrat-call
|
|
||||||
{ $values
|
|
||||||
{ "input" "a string" }
|
|
||||||
{ "quot" "a quotation with stack effect ( input -- result )" }
|
|
||||||
{ "result" "a parse-result or f" }
|
|
||||||
}
|
|
||||||
{ $description
|
|
||||||
"Calls the compiled parser with a packrat cache in scope." }
|
|
||||||
{ $see-also compile packrat-call packrat-parse with-packrat } ;
|
|
||||||
|
|
||||||
HELP: compile
|
HELP: compile
|
||||||
{ $values
|
{ $values
|
||||||
|
@ -54,11 +20,12 @@ HELP: compile
|
||||||
{ "word" "a word" }
|
{ "word" "a word" }
|
||||||
}
|
}
|
||||||
{ $description
|
{ $description
|
||||||
"Compile the parser to a word. The word will have stack effect ( input -- result )."
|
"Compile the parser to a word. The word will have stack effect ( -- result )."
|
||||||
"The mapping from parser to compiled word is kept in a cache. If you later change "
|
"The mapping from parser to compiled word is kept in a cache. If you later change "
|
||||||
"the definition of a parser you'll need to clear this cache with "
|
"the definition of a parser you'll need to clear this cache with "
|
||||||
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again." }
|
{ $link reset-compiled-parsers } " before using " { $link compile } " on that parser again."
|
||||||
{ $see-also compile with-packrat reset-compiled-parsers packrat-call packrat-parse } ;
|
}
|
||||||
|
{ $see-also parse } ;
|
||||||
|
|
||||||
HELP: reset-compiled-parsers
|
HELP: reset-compiled-parsers
|
||||||
{ $description
|
{ $description
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
!
|
!
|
||||||
USING: kernel tools.test strings namespaces arrays sequences peg peg.private ;
|
USING: kernel tools.test strings namespaces arrays sequences peg peg.private accessors words ;
|
||||||
IN: peg.tests
|
IN: peg.tests
|
||||||
|
|
||||||
{ f } [
|
{ f } [
|
||||||
|
@ -168,31 +168,18 @@ IN: peg.tests
|
||||||
"1+1" swap parse parse-result-ast
|
"1+1" swap parse parse-result-ast
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
{ V{ "1" "-" "1" } V{ "1" "+" "1" } } [
|
|
||||||
[
|
|
||||||
[
|
|
||||||
[ "1" token , "-" token , "1" token , ] seq* ,
|
|
||||||
[ "1" token , "+" token , "1" token , ] seq* ,
|
|
||||||
] choice*
|
|
||||||
"1-1" over parse parse-result-ast swap
|
|
||||||
] with-packrat
|
|
||||||
[
|
|
||||||
"1+1" swap parse parse-result-ast
|
|
||||||
] with-packrat
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
: expr ( -- parser )
|
: expr ( -- parser )
|
||||||
#! Test direct left recursion. Currently left recursion should cause a
|
#! Test direct left recursion. Currently left recursion should cause a
|
||||||
#! failure of that parser.
|
#! failure of that parser.
|
||||||
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
|
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
|
||||||
|
|
||||||
[
|
{ V{ V{ "1" "+" "1" } "+" "1" } } [
|
||||||
#! Not using packrat, so recursion causes data stack overflow
|
"1+1+1" expr parse parse-result-ast
|
||||||
"1+1" expr parse parse-result-ast
|
|
||||||
] must-fail
|
|
||||||
|
|
||||||
{ "1" } [
|
|
||||||
#! Using packrat, so expr fails, causing the 2nd choice to be used.
|
|
||||||
"1+1" expr [ parse ] with-packrat parse-result-ast
|
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
{ t } [
|
||||||
|
#! Ensure a circular parser doesn't loop infinitely
|
||||||
|
[ f , "a" token , ] seq*
|
||||||
|
dup parsers>>
|
||||||
|
dupd 0 swap set-nth compile word?
|
||||||
|
] unit-test
|
|
@ -7,6 +7,8 @@ USING: kernel sequences strings namespaces math assocs shuffle
|
||||||
combinators.cleave locals ;
|
combinators.cleave locals ;
|
||||||
IN: peg
|
IN: peg
|
||||||
|
|
||||||
|
USE: prettyprint
|
||||||
|
|
||||||
TUPLE: parse-result remaining ast ;
|
TUPLE: parse-result remaining ast ;
|
||||||
|
|
||||||
SYMBOL: ignore
|
SYMBOL: ignore
|
||||||
|
@ -15,6 +17,183 @@ SYMBOL: ignore
|
||||||
parse-result construct-boa ;
|
parse-result construct-boa ;
|
||||||
|
|
||||||
SYMBOL: packrat
|
SYMBOL: packrat
|
||||||
|
SYMBOL: pos
|
||||||
|
SYMBOL: input
|
||||||
|
SYMBOL: fail
|
||||||
|
SYMBOL: lrstack
|
||||||
|
SYMBOL: heads
|
||||||
|
|
||||||
|
TUPLE: memo-entry ans pos ;
|
||||||
|
C: <memo-entry> memo-entry
|
||||||
|
|
||||||
|
TUPLE: left-recursion seed rule head next ;
|
||||||
|
C: <left-recursion> left-recursion
|
||||||
|
|
||||||
|
TUPLE: peg-head rule involved-set eval-set ;
|
||||||
|
C: <head> peg-head
|
||||||
|
|
||||||
|
: rule-parser ( rule -- parser )
|
||||||
|
#! A rule is the parser compiled down to a word. It has
|
||||||
|
#! a "peg" property containing the original parser.
|
||||||
|
"peg" word-prop ;
|
||||||
|
|
||||||
|
: input-slice ( -- slice )
|
||||||
|
#! Return a slice of the input from the current parse position
|
||||||
|
input get pos get tail-slice ;
|
||||||
|
|
||||||
|
: input-from ( input -- n )
|
||||||
|
#! Return the index from the original string that the
|
||||||
|
#! input slice is based on.
|
||||||
|
dup slice? [ slice-from ] [ drop 0 ] if ;
|
||||||
|
|
||||||
|
: input-cache ( parser -- cache )
|
||||||
|
#! From the packrat cache, obtain the cache for the parser
|
||||||
|
#! that maps the position to the parser result.
|
||||||
|
id>> packrat get [ drop H{ } clone ] cache ;
|
||||||
|
|
||||||
|
: eval-rule ( rule -- ast )
|
||||||
|
#! Evaluate a rule, return an ast resulting from it.
|
||||||
|
#! Return fail if the rule failed. The rule has
|
||||||
|
#! stack effect ( input -- parse-result )
|
||||||
|
pos get swap
|
||||||
|
execute
|
||||||
|
! drop f f <parse-result>
|
||||||
|
[
|
||||||
|
nip
|
||||||
|
[ ast>> ] [ remaining>> ] bi
|
||||||
|
input-from pos set
|
||||||
|
] [
|
||||||
|
pos set
|
||||||
|
fail
|
||||||
|
] if* ; inline
|
||||||
|
|
||||||
|
: memo ( pos rule -- memo-entry )
|
||||||
|
#! Return the result from the memo cache.
|
||||||
|
rule-parser input-cache at ;
|
||||||
|
|
||||||
|
: set-memo ( memo-entry pos rule -- )
|
||||||
|
#! Store an entry in the cache
|
||||||
|
rule-parser input-cache set-at ;
|
||||||
|
|
||||||
|
:: (grow-lr) ( r p m h -- )
|
||||||
|
p pos set
|
||||||
|
h involved-set>> clone h (>>eval-set)
|
||||||
|
r eval-rule
|
||||||
|
dup fail = pos get m pos>> <= or [
|
||||||
|
drop
|
||||||
|
] [
|
||||||
|
m (>>ans)
|
||||||
|
pos get m (>>pos)
|
||||||
|
r p m h (grow-lr)
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
:: grow-lr ( r p m h -- ast )
|
||||||
|
h p heads get set-at
|
||||||
|
r p m h (grow-lr)
|
||||||
|
p heads get delete-at
|
||||||
|
m pos>> pos set m ans>>
|
||||||
|
; inline
|
||||||
|
|
||||||
|
:: (setup-lr) ( r l s -- )
|
||||||
|
s head>> l head>> eq? [
|
||||||
|
l head>> s (>>head)
|
||||||
|
l head>> [ s rule>> add ] change-involved-set drop
|
||||||
|
r l s next>> (setup-lr)
|
||||||
|
] unless ;
|
||||||
|
|
||||||
|
:: setup-lr ( r l -- )
|
||||||
|
l head>> [
|
||||||
|
r V{ } clone V{ } clone <head> l (>>head)
|
||||||
|
] unless
|
||||||
|
r l lrstack get (setup-lr) ;
|
||||||
|
|
||||||
|
:: lr-answer ( r p m -- ast )
|
||||||
|
[let* |
|
||||||
|
h [ m ans>> head>> ]
|
||||||
|
|
|
||||||
|
h rule>> r eq? [
|
||||||
|
m ans>> seed>> m (>>ans)
|
||||||
|
m ans>> fail = [
|
||||||
|
fail
|
||||||
|
] [
|
||||||
|
r p m h grow-lr
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
m ans>> seed>>
|
||||||
|
] if
|
||||||
|
] ; inline
|
||||||
|
|
||||||
|
:: recall ( r p -- memo-entry )
|
||||||
|
[let* |
|
||||||
|
m [ p r memo ]
|
||||||
|
h [ p heads get at ]
|
||||||
|
|
|
||||||
|
h [
|
||||||
|
m r h involved-set>> h rule>> add member? not and [
|
||||||
|
fail p <memo-entry>
|
||||||
|
] [
|
||||||
|
r h eval-set>> member? [
|
||||||
|
h [ r swap remove ] change-eval-set drop
|
||||||
|
r eval-rule
|
||||||
|
m (>>ans)
|
||||||
|
pos get m (>>pos)
|
||||||
|
m
|
||||||
|
] [
|
||||||
|
m
|
||||||
|
] if
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
m
|
||||||
|
] if
|
||||||
|
] ; inline
|
||||||
|
|
||||||
|
:: apply-non-memo-rule ( r p -- ast )
|
||||||
|
[let* |
|
||||||
|
lr [ fail r f lrstack get <left-recursion> ]
|
||||||
|
m [ lr lrstack set lr p <memo-entry> dup p r set-memo ]
|
||||||
|
ans [ r eval-rule ]
|
||||||
|
|
|
||||||
|
lrstack get next>> lrstack set
|
||||||
|
pos get m (>>pos)
|
||||||
|
lr head>> [
|
||||||
|
ans lr (>>seed)
|
||||||
|
r p m lr-answer
|
||||||
|
] [
|
||||||
|
ans m (>>ans)
|
||||||
|
ans
|
||||||
|
] if
|
||||||
|
] ; inline
|
||||||
|
|
||||||
|
:: apply-memo-rule ( r m -- ast )
|
||||||
|
m pos>> pos set
|
||||||
|
m ans>> left-recursion? [
|
||||||
|
r m ans>> setup-lr
|
||||||
|
m ans>> seed>>
|
||||||
|
] [
|
||||||
|
m ans>>
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
:: apply-rule ( r p -- ast )
|
||||||
|
[let* |
|
||||||
|
m [ r p recall ]
|
||||||
|
|
|
||||||
|
m [
|
||||||
|
r m apply-memo-rule
|
||||||
|
] [
|
||||||
|
r p apply-non-memo-rule
|
||||||
|
] if
|
||||||
|
] ; inline
|
||||||
|
|
||||||
|
: with-packrat ( input quot -- result )
|
||||||
|
#! Run the quotation with a packrat cache active.
|
||||||
|
swap [
|
||||||
|
input set
|
||||||
|
0 pos set
|
||||||
|
f lrstack set
|
||||||
|
H{ } clone heads set
|
||||||
|
H{ } clone packrat set
|
||||||
|
] H{ } make-assoc swap bind ;
|
||||||
|
|
||||||
|
|
||||||
: compiled-parsers ( -- cache )
|
: compiled-parsers ( -- cache )
|
||||||
\ compiled-parsers get-global [ H{ } clone dup \ compiled-parsers set-global ] unless* ;
|
\ compiled-parsers get-global [ H{ } clone dup \ compiled-parsers set-global ] unless* ;
|
||||||
|
@ -22,67 +201,77 @@ SYMBOL: packrat
|
||||||
: reset-compiled-parsers ( -- )
|
: reset-compiled-parsers ( -- )
|
||||||
H{ } clone \ compiled-parsers set-global ;
|
H{ } clone \ compiled-parsers set-global ;
|
||||||
|
|
||||||
|
reset-compiled-parsers
|
||||||
|
|
||||||
GENERIC: (compile) ( parser -- quot )
|
GENERIC: (compile) ( parser -- quot )
|
||||||
|
|
||||||
: input-from ( input -- n )
|
|
||||||
#! Return the index from the original string that the
|
|
||||||
#! input slice is based on.
|
|
||||||
dup slice? [ slice-from ] [ drop 0 ] if ;
|
|
||||||
|
|
||||||
: input-cache ( quot cache -- cache )
|
|
||||||
#! From the packrat cache, obtain the cache for the parser quotation
|
|
||||||
#! that maps the input string position to the parser result.
|
|
||||||
[ drop H{ } clone ] cache ;
|
|
||||||
|
|
||||||
:: cached-result ( n input-cache input quot -- result )
|
|
||||||
#! Get the cached result for input position n
|
|
||||||
#! from the input cache. If the item is not in the cache,
|
|
||||||
#! call 'quot' with 'input' on the stack to get the result
|
|
||||||
#! and store that in the cache and return it.
|
|
||||||
n input-cache [
|
|
||||||
drop
|
|
||||||
f n input-cache set-at
|
|
||||||
input quot call
|
|
||||||
] cache ; inline
|
|
||||||
|
|
||||||
:: run-packrat-parser ( input quot c -- result )
|
|
||||||
input input-from
|
|
||||||
quot c input-cache
|
|
||||||
input quot cached-result ; inline
|
|
||||||
|
|
||||||
: run-parser ( input quot -- result )
|
|
||||||
#! If a packrat cache is available, use memoization for
|
|
||||||
#! packrat parsing, otherwise do a standard peg call.
|
|
||||||
packrat get [ run-packrat-parser ] [ call ] if* ; inline
|
|
||||||
|
|
||||||
|
:: parser-body ( parser -- quot )
|
||||||
|
#! Return the body of the word that is the compiled version
|
||||||
|
#! of the parser.
|
||||||
|
[let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ]
|
||||||
|
|
|
||||||
|
[
|
||||||
|
rule pos get apply-rule dup fail = [
|
||||||
|
drop f
|
||||||
|
] [
|
||||||
|
input-slice swap <parse-result>
|
||||||
|
] if
|
||||||
|
]
|
||||||
|
] ;
|
||||||
|
|
||||||
: compiled-parser ( parser -- word )
|
: compiled-parser ( parser -- word )
|
||||||
#! Look to see if the given parser has been compiled.
|
#! Look to see if the given parser has been compiled.
|
||||||
#! If not, compile it to a temporary word, cache it,
|
#! If not, compile it to a temporary word, cache it,
|
||||||
#! and return it. Otherwise return the existing one.
|
#! and return it. Otherwise return the existing one.
|
||||||
compiled-parsers [
|
#! Circular parsers are supported by getting the word
|
||||||
(compile) [ run-parser ] curry define-temp
|
#! name and storing it in the cache, before compiling,
|
||||||
] cache ;
|
#! so it is picked up when re-entered.
|
||||||
|
dup id>> compiled-parsers [
|
||||||
|
drop dup gensym swap 2dup id>> compiled-parsers set-at
|
||||||
|
2dup parser-body define
|
||||||
|
dupd "peg" set-word-prop
|
||||||
|
] cache nip ;
|
||||||
|
|
||||||
: compile ( parser -- word )
|
: compile ( parser -- word )
|
||||||
[ compiled-parser ] with-compilation-unit ;
|
[ compiled-parser ] with-compilation-unit ;
|
||||||
|
|
||||||
: parse ( state parser -- result )
|
: parse ( state parser -- result )
|
||||||
compile execute ; inline
|
dup word? [ compile ] unless
|
||||||
|
[ execute ] curry with-packrat ;
|
||||||
: with-packrat ( quot -- result )
|
|
||||||
#! Run the quotation with a packrat cache active.
|
|
||||||
[ H{ } clone packrat ] dip with-variable ; inline
|
|
||||||
|
|
||||||
: packrat-parse ( state parser -- result )
|
|
||||||
[ parse ] with-packrat ;
|
|
||||||
|
|
||||||
: packrat-call ( state quot -- result )
|
|
||||||
with-packrat ; inline
|
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
SYMBOL: id
|
||||||
|
|
||||||
|
: next-id ( -- n )
|
||||||
|
#! Return the next unique id for a parser
|
||||||
|
id get-global [
|
||||||
|
dup 1+ id set-global
|
||||||
|
] [
|
||||||
|
1 id set-global 0
|
||||||
|
] if* ;
|
||||||
|
|
||||||
|
TUPLE: parser id ;
|
||||||
|
M: parser equal? [ id>> ] 2apply = ;
|
||||||
|
C: <parser> parser
|
||||||
|
|
||||||
|
: delegates ( -- cache )
|
||||||
|
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
|
||||||
|
|
||||||
|
: reset-delegates ( -- )
|
||||||
|
H{ } clone \ delegates set-global ;
|
||||||
|
|
||||||
|
reset-delegates
|
||||||
|
|
||||||
|
: init-parser ( parser -- parser )
|
||||||
|
#! Set the delegate for the parser. Equivalent parsers
|
||||||
|
#! get a delegate with the same id.
|
||||||
|
dup clone delegates [
|
||||||
|
drop next-id <parser>
|
||||||
|
] cache over set-delegate ;
|
||||||
|
|
||||||
TUPLE: token-parser symbol ;
|
TUPLE: token-parser symbol ;
|
||||||
M: token-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
MATCH-VARS: ?token ;
|
MATCH-VARS: ?token ;
|
||||||
|
|
||||||
|
@ -95,16 +284,15 @@ MATCH-VARS: ?token ;
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
M: token-parser (compile) ( parser -- quot )
|
M: token-parser (compile) ( parser -- quot )
|
||||||
symbol>> [ parse-token ] curry ;
|
[ \ input-slice , symbol>> , \ parse-token , ] [ ] make ;
|
||||||
|
|
||||||
TUPLE: satisfy-parser quot ;
|
TUPLE: satisfy-parser quot ;
|
||||||
M: satisfy-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
MATCH-VARS: ?quot ;
|
MATCH-VARS: ?quot ;
|
||||||
|
|
||||||
: satisfy-pattern ( -- quot )
|
: satisfy-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup empty? [
|
input-slice dup empty? [
|
||||||
drop f
|
drop f
|
||||||
] [
|
] [
|
||||||
unclip-slice dup ?quot call [
|
unclip-slice dup ?quot call [
|
||||||
|
@ -119,13 +307,12 @@ M: satisfy-parser (compile) ( parser -- quot )
|
||||||
quot>> \ ?quot satisfy-pattern match-replace ;
|
quot>> \ ?quot satisfy-pattern match-replace ;
|
||||||
|
|
||||||
TUPLE: range-parser min max ;
|
TUPLE: range-parser min max ;
|
||||||
M: range-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
MATCH-VARS: ?min ?max ;
|
MATCH-VARS: ?min ?max ;
|
||||||
|
|
||||||
: range-pattern ( -- quot )
|
: range-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup empty? [
|
input-slice dup empty? [
|
||||||
drop f
|
drop f
|
||||||
] [
|
] [
|
||||||
0 over nth dup
|
0 over nth dup
|
||||||
|
@ -141,12 +328,11 @@ M: range-parser (compile) ( parser -- quot )
|
||||||
T{ range-parser _ ?min ?max } range-pattern match-replace ;
|
T{ range-parser _ ?min ?max } range-pattern match-replace ;
|
||||||
|
|
||||||
TUPLE: seq-parser parsers ;
|
TUPLE: seq-parser parsers ;
|
||||||
M: seq-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: seq-pattern ( -- quot )
|
: seq-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup [
|
dup [
|
||||||
dup remaining>> ?quot [
|
?quot [
|
||||||
[ remaining>> swap (>>remaining) ] 2keep
|
[ remaining>> swap (>>remaining) ] 2keep
|
||||||
ast>> dup ignore = [
|
ast>> dup ignore = [
|
||||||
drop
|
drop
|
||||||
|
@ -163,34 +349,27 @@ M: seq-parser equal? 2drop f ;
|
||||||
|
|
||||||
M: seq-parser (compile) ( parser -- quot )
|
M: seq-parser (compile) ( parser -- quot )
|
||||||
[
|
[
|
||||||
[ V{ } clone <parse-result> ] %
|
[ input-slice V{ } clone <parse-result> ] %
|
||||||
parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
|
parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: choice-parser parsers ;
|
TUPLE: choice-parser parsers ;
|
||||||
M: choice-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: choice-pattern ( -- quot )
|
: choice-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup [
|
[ ?quot ] unless*
|
||||||
|
|
||||||
] [
|
|
||||||
drop dup ?quot
|
|
||||||
] if
|
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
M: choice-parser (compile) ( parser -- quot )
|
M: choice-parser (compile) ( parser -- quot )
|
||||||
[
|
[
|
||||||
f ,
|
f ,
|
||||||
parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
|
parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
|
||||||
\ nip ,
|
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: repeat0-parser p1 ;
|
TUPLE: repeat0-parser p1 ;
|
||||||
M: repeat0-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: (repeat0) ( quot result -- result )
|
: (repeat0) ( quot result -- result )
|
||||||
2dup remaining>> swap call [
|
over call [
|
||||||
[ remaining>> swap (>>remaining) ] 2keep
|
[ remaining>> swap (>>remaining) ] 2keep
|
||||||
ast>> swap [ ast>> push ] keep
|
ast>> swap [ ast>> push ] keep
|
||||||
(repeat0)
|
(repeat0)
|
||||||
|
@ -205,12 +384,11 @@ M: repeat0-parser equal? 2drop f ;
|
||||||
|
|
||||||
M: repeat0-parser (compile) ( parser -- quot )
|
M: repeat0-parser (compile) ( parser -- quot )
|
||||||
[
|
[
|
||||||
[ V{ } clone <parse-result> ] %
|
[ input-slice V{ } clone <parse-result> ] %
|
||||||
p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
|
p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: repeat1-parser p1 ;
|
TUPLE: repeat1-parser p1 ;
|
||||||
M: repeat1-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: repeat1-pattern ( -- quot )
|
: repeat1-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
|
@ -225,27 +403,25 @@ M: repeat1-parser equal? 2drop f ;
|
||||||
|
|
||||||
M: repeat1-parser (compile) ( parser -- quot )
|
M: repeat1-parser (compile) ( parser -- quot )
|
||||||
[
|
[
|
||||||
[ V{ } clone <parse-result> ] %
|
[ input-slice V{ } clone <parse-result> ] %
|
||||||
p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
|
p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: optional-parser p1 ;
|
TUPLE: optional-parser p1 ;
|
||||||
M: optional-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: optional-pattern ( -- quot )
|
: optional-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup ?quot swap f <parse-result> or
|
?quot [ input-slice f <parse-result> ] unless*
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
M: optional-parser (compile) ( parser -- quot )
|
M: optional-parser (compile) ( parser -- quot )
|
||||||
p1>> compiled-parser \ ?quot optional-pattern match-replace ;
|
p1>> compiled-parser \ ?quot optional-pattern match-replace ;
|
||||||
|
|
||||||
TUPLE: ensure-parser p1 ;
|
TUPLE: ensure-parser p1 ;
|
||||||
M: ensure-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: ensure-pattern ( -- quot )
|
: ensure-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup ?quot [
|
input-slice ?quot [
|
||||||
ignore <parse-result>
|
ignore <parse-result>
|
||||||
] [
|
] [
|
||||||
drop f
|
drop f
|
||||||
|
@ -256,11 +432,10 @@ M: ensure-parser (compile) ( parser -- quot )
|
||||||
p1>> compiled-parser \ ?quot ensure-pattern match-replace ;
|
p1>> compiled-parser \ ?quot ensure-pattern match-replace ;
|
||||||
|
|
||||||
TUPLE: ensure-not-parser p1 ;
|
TUPLE: ensure-not-parser p1 ;
|
||||||
M: ensure-not-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
: ensure-not-pattern ( -- quot )
|
: ensure-not-pattern ( -- quot )
|
||||||
[
|
[
|
||||||
dup ?quot [
|
input-slice ?quot [
|
||||||
drop f
|
drop f
|
||||||
] [
|
] [
|
||||||
ignore <parse-result>
|
ignore <parse-result>
|
||||||
|
@ -271,7 +446,6 @@ M: ensure-not-parser (compile) ( parser -- quot )
|
||||||
p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ;
|
p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ;
|
||||||
|
|
||||||
TUPLE: action-parser p1 quot ;
|
TUPLE: action-parser p1 quot ;
|
||||||
M: action-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
MATCH-VARS: ?action ;
|
MATCH-VARS: ?action ;
|
||||||
|
|
||||||
|
@ -284,7 +458,7 @@ MATCH-VARS: ?action ;
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
M: action-parser (compile) ( parser -- quot )
|
M: action-parser (compile) ( parser -- quot )
|
||||||
{ [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip
|
[ p1>> compiled-parser ] [ quot>> ] bi
|
||||||
2array { ?quot ?action } action-pattern match-replace ;
|
2array { ?quot ?action } action-pattern match-replace ;
|
||||||
|
|
||||||
: left-trim-slice ( string -- string )
|
: left-trim-slice ( string -- string )
|
||||||
|
@ -295,15 +469,13 @@ M: action-parser (compile) ( parser -- quot )
|
||||||
] unless ;
|
] unless ;
|
||||||
|
|
||||||
TUPLE: sp-parser p1 ;
|
TUPLE: sp-parser p1 ;
|
||||||
M: sp-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
M: sp-parser (compile) ( parser -- quot )
|
M: sp-parser (compile) ( parser -- quot )
|
||||||
[
|
[
|
||||||
\ left-trim-slice , p1>> compiled-parser ,
|
\ input-slice , \ left-trim-slice , \ input-from , \ pos , \ set , p1>> compiled-parser ,
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: delay-parser quot ;
|
TUPLE: delay-parser quot ;
|
||||||
M: delay-parser equal? 2drop f ;
|
|
||||||
|
|
||||||
M: delay-parser (compile) ( parser -- quot )
|
M: delay-parser (compile) ( parser -- quot )
|
||||||
#! For efficiency we memoize the quotation.
|
#! For efficiency we memoize the quotation.
|
||||||
|
@ -317,71 +489,71 @@ M: delay-parser (compile) ( parser -- quot )
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
MEMO: token ( string -- parser )
|
: token ( string -- parser )
|
||||||
token-parser construct-boa ;
|
token-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: satisfy ( quot -- parser )
|
: satisfy ( quot -- parser )
|
||||||
satisfy-parser construct-boa ;
|
satisfy-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: range ( min max -- parser )
|
: range ( min max -- parser )
|
||||||
range-parser construct-boa ;
|
range-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: seq ( seq -- parser )
|
: seq ( seq -- parser )
|
||||||
seq-parser construct-boa ;
|
seq-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: 2seq ( parser1 parser2 -- parser )
|
: 2seq ( parser1 parser2 -- parser )
|
||||||
2array seq ;
|
2array seq ;
|
||||||
|
|
||||||
MEMO: 3seq ( parser1 parser2 parser3 -- parser )
|
: 3seq ( parser1 parser2 parser3 -- parser )
|
||||||
3array seq ;
|
3array seq ;
|
||||||
|
|
||||||
MEMO: 4seq ( parser1 parser2 parser3 parser4 -- parser )
|
: 4seq ( parser1 parser2 parser3 parser4 -- parser )
|
||||||
4array seq ;
|
4array seq ;
|
||||||
|
|
||||||
: seq* ( quot -- paser )
|
: seq* ( quot -- paser )
|
||||||
{ } make seq ; inline
|
{ } make seq ; inline
|
||||||
|
|
||||||
MEMO: choice ( seq -- parser )
|
: choice ( seq -- parser )
|
||||||
choice-parser construct-boa ;
|
choice-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: 2choice ( parser1 parser2 -- parser )
|
: 2choice ( parser1 parser2 -- parser )
|
||||||
2array choice ;
|
2array choice ;
|
||||||
|
|
||||||
MEMO: 3choice ( parser1 parser2 parser3 -- parser )
|
: 3choice ( parser1 parser2 parser3 -- parser )
|
||||||
3array choice ;
|
3array choice ;
|
||||||
|
|
||||||
MEMO: 4choice ( parser1 parser2 parser3 parser4 -- parser )
|
: 4choice ( parser1 parser2 parser3 parser4 -- parser )
|
||||||
4array choice ;
|
4array choice ;
|
||||||
|
|
||||||
: choice* ( quot -- paser )
|
: choice* ( quot -- paser )
|
||||||
{ } make choice ; inline
|
{ } make choice ; inline
|
||||||
|
|
||||||
MEMO: repeat0 ( parser -- parser )
|
: repeat0 ( parser -- parser )
|
||||||
repeat0-parser construct-boa ;
|
repeat0-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: repeat1 ( parser -- parser )
|
: repeat1 ( parser -- parser )
|
||||||
repeat1-parser construct-boa ;
|
repeat1-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: optional ( parser -- parser )
|
: optional ( parser -- parser )
|
||||||
optional-parser construct-boa ;
|
optional-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: ensure ( parser -- parser )
|
: ensure ( parser -- parser )
|
||||||
ensure-parser construct-boa ;
|
ensure-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: ensure-not ( parser -- parser )
|
: ensure-not ( parser -- parser )
|
||||||
ensure-not-parser construct-boa ;
|
ensure-not-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: action ( parser quot -- parser )
|
: action ( parser quot -- parser )
|
||||||
action-parser construct-boa ;
|
action-parser construct-boa init-parser ;
|
||||||
|
|
||||||
MEMO: sp ( parser -- parser )
|
: sp ( parser -- parser )
|
||||||
sp-parser construct-boa ;
|
sp-parser construct-boa init-parser ;
|
||||||
|
|
||||||
: hide ( parser -- parser )
|
: hide ( parser -- parser )
|
||||||
[ drop ignore ] action ;
|
[ drop ignore ] action ;
|
||||||
|
|
||||||
MEMO: delay ( quot -- parser )
|
: delay ( quot -- parser )
|
||||||
delay-parser construct-boa ;
|
delay-parser construct-boa init-parser ;
|
||||||
|
|
||||||
: PEG:
|
: PEG:
|
||||||
(:) [
|
(:) [
|
||||||
|
|
Loading…
Reference in New Issue