factor/extra/peg/peg.factor

587 lines
14 KiB
Factor
Raw Normal View History

2008-03-29 00:37:52 -04:00
! Copyright (C) 2007, 2008 Chris Double.
2007-11-19 22:36:38 -05:00
! See http://factorcode.org/license.txt for BSD license.
2008-06-18 01:35:19 -04:00
USING: kernel sequences strings fry namespaces math assocs shuffle debugger io
vectors arrays math.parser math.order
2008-04-07 23:16:51 -04:00
unicode.categories compiler.units parser
2008-04-05 00:25:04 -04:00
words quotations effects memoize accessors locals effects splitting ;
2007-11-19 22:36:38 -05:00
IN: peg
2008-03-28 06:20:43 -04:00
USE: prettyprint
2007-11-20 21:31:23 -05:00
TUPLE: parse-result remaining ast ;
2007-11-19 22:36:38 -05:00
TUPLE: parser id compiled ;
2008-03-30 00:48:01 -04:00
2008-03-29 21:36:58 -04:00
M: parser equal? [ id>> ] bi@ = ;
2008-03-30 00:48:01 -04:00
M: parser hashcode* id>> hashcode* ;
C: <parser> parser
2007-11-26 18:22:33 -05:00
SYMBOL: ignore
2007-11-20 21:31:23 -05:00
: <parse-result> ( remaining ast -- parse-result )
parse-result boa ;
2007-11-19 22:36:38 -05:00
2008-03-26 18:23:58 -04:00
SYMBOL: packrat
2008-03-28 06:20:43 -04:00
SYMBOL: pos
SYMBOL: input
SYMBOL: fail
SYMBOL: lrstack
SYMBOL: heads
2008-03-28 06:20:43 -04:00
2008-04-08 20:52:49 -04:00
: failed? ( obj -- ? )
fail = ;
: delegates ( -- cache )
\ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;
: reset-pegs ( -- )
H{ } clone \ delegates set-global ;
reset-pegs
2008-03-28 06:20:43 -04:00
TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry
2008-03-27 22:51:18 -04:00
TUPLE: left-recursion seed rule head next ;
2008-03-28 08:17:54 -04:00
C: <left-recursion> left-recursion
TUPLE: peg-head rule involved-set eval-set ;
C: <head> peg-head
2008-03-28 06:20:43 -04:00
: rule-parser ( rule -- parser )
#! A rule is the parser compiled down to a word. It has
#! a "peg" property containing the original parser.
"peg" word-prop ;
: input-slice ( -- slice )
#! Return a slice of the input from the current parse position
input get pos get tail-slice ;
2008-03-27 22:51:18 -04:00
: input-from ( input -- n )
#! Return the index from the original string that the
#! input slice is based on.
dup slice? [ slice-from ] [ drop 0 ] if ;
2008-03-28 06:20:43 -04:00
: input-cache ( parser -- cache )
#! From the packrat cache, obtain the cache for the parser
#! that maps the position to the parser result.
id>> packrat get [ drop H{ } clone ] cache ;
2008-04-08 20:52:49 -04:00
: process-rule-result ( p result -- result )
[
nip [ ast>> ] [ remaining>> ] bi input-from pos set
] [
pos set fail
] if* ;
2008-03-28 06:20:43 -04:00
: eval-rule ( rule -- ast )
#! Evaluate a rule, return an ast resulting from it.
#! Return fail if the rule failed. The rule has
#! stack effect ( input -- parse-result )
2008-04-08 20:52:49 -04:00
pos get swap execute process-rule-result ; inline
2008-03-20 10:05:21 -04:00
2008-03-28 06:20:43 -04:00
: memo ( pos rule -- memo-entry )
#! Return the result from the memo cache.
rule-parser input-cache at ;
2008-03-27 00:45:59 -04:00
2008-03-28 06:20:43 -04:00
: set-memo ( memo-entry pos rule -- )
#! Store an entry in the cache
rule-parser input-cache set-at ;
2008-04-08 20:52:49 -04:00
: update-m ( ast m -- )
swap >>ans pos get >>pos drop ;
: stop-growth? ( ast m -- ? )
[ failed? pos get ] dip
pos>> <= or ;
: setup-growth ( h p -- )
pos set dup involved-set>> clone >>eval-set drop ;
2008-04-10 22:46:11 -04:00
: (grow-lr) ( h p r m -- )
>r >r [ setup-growth ] 2keep r> r>
>r dup eval-rule r> swap
dup pick stop-growth? [
4drop drop
2008-03-28 08:17:54 -04:00
] [
2008-04-10 22:46:11 -04:00
over update-m
(grow-lr)
2008-03-28 23:11:08 -04:00
] if ; inline
2008-03-28 08:17:54 -04:00
2008-04-10 22:46:11 -04:00
: grow-lr ( h p r m -- ast )
>r >r [ heads get set-at ] 2keep r> r>
pick over >r >r (grow-lr) r> r>
swap heads get delete-at
dup pos>> pos set ans>>
2008-03-28 23:11:08 -04:00
; inline
2008-03-28 08:17:54 -04:00
:: (setup-lr) ( r l s -- )
s head>> l head>> eq? [
l head>> s (>>head)
l head>> [ s rule>> suffix ] change-involved-set drop
r l s next>> (setup-lr)
] unless ;
:: setup-lr ( r l -- )
l head>> [
r V{ } clone V{ } clone <head> l (>>head)
] unless
r l lrstack get (setup-lr) ;
:: lr-answer ( r p m -- ast )
[let* |
h [ m ans>> head>> ]
|
h rule>> r eq? [
m ans>> seed>> m (>>ans)
2008-04-08 20:52:49 -04:00
m ans>> failed? [
fail
] [
2008-04-08 20:52:49 -04:00
h p r m grow-lr
] if
] [
m ans>> seed>>
] if
2008-03-28 23:11:08 -04:00
] ; inline
:: recall ( r p -- memo-entry )
[let* |
m [ p r memo ]
h [ p heads get at ]
|
h [
m r h involved-set>> h rule>> suffix member? not and [
fail p <memo-entry>
] [
r h eval-set>> member? [
h [ r swap remove ] change-eval-set drop
r eval-rule
2008-04-08 20:52:49 -04:00
m update-m
m
] [
m
] if
] if
] [
m
] if
2008-03-28 23:11:08 -04:00
] ; inline
2008-03-28 06:20:43 -04:00
:: apply-non-memo-rule ( r p -- ast )
2008-03-27 22:51:18 -04:00
[let* |
lr [ fail r f lrstack get <left-recursion> ]
m [ lr lrstack set lr p <memo-entry> dup p r set-memo ]
2008-03-28 06:20:43 -04:00
ans [ r eval-rule ]
2008-03-27 22:51:18 -04:00
|
lrstack get next>> lrstack set
2008-03-28 07:49:39 -04:00
pos get m (>>pos)
lr head>> [
ans lr (>>seed)
r p m lr-answer
2008-03-28 08:17:54 -04:00
] [
ans m (>>ans)
2008-03-28 08:17:54 -04:00
ans
] if
2008-03-28 23:11:08 -04:00
] ; inline
2008-03-27 22:51:18 -04:00
2008-04-04 09:07:17 -04:00
: apply-memo-rule ( r m -- ast )
[ ans>> ] [ pos>> ] bi pos set
dup left-recursion? [
[ setup-lr ] keep seed>>
] [
2008-04-04 09:07:17 -04:00
nip
] if ;
2008-03-27 22:51:18 -04:00
2008-04-04 08:56:37 -04:00
: apply-rule ( r p -- ast )
2dup recall [
nip apply-memo-rule
] [
apply-non-memo-rule
] if* ; inline
2008-03-27 22:51:18 -04:00
2008-03-28 06:20:43 -04:00
: with-packrat ( input quot -- result )
#! Run the quotation with a packrat cache active.
swap [
input set
0 pos set
f lrstack set
H{ } clone heads set
2008-03-28 06:20:43 -04:00
H{ } clone packrat set
2008-03-28 23:24:13 -04:00
] H{ } make-assoc swap bind ; inline
2008-03-27 22:51:18 -04:00
2008-03-28 06:20:43 -04:00
GENERIC: (compile) ( parser -- quot )
2008-03-27 00:45:59 -04:00
2008-04-08 20:52:49 -04:00
: execute-parser ( word -- result )
pos get apply-rule dup failed? [
drop f
] [
input-slice swap <parse-result>
] if ; inline
2008-03-26 00:38:30 -04:00
2008-04-08 20:52:49 -04:00
: parser-body ( parser -- quot )
#! Return the body of the word that is the compiled version
#! of the parser.
2008-04-08 20:52:49 -04:00
gensym 2dup swap (compile) 0 1 <effect> define-declared swap dupd "peg" set-word-prop
[ execute-parser ] curry ;
2008-03-20 10:05:21 -04:00
: compiled-parser ( parser -- word )
#! Look to see if the given parser has been compiled.
2008-03-20 10:05:21 -04:00
#! If not, compile it to a temporary word, cache it,
#! and return it. Otherwise return the existing one.
2008-03-27 21:10:33 -04:00
#! Circular parsers are supported by getting the word
#! name and storing it in the cache, before compiling,
#! so it is picked up when re-entered.
dup compiled>> [
nip
] [
gensym tuck >>compiled 2dup parser-body 0 1 <effect> define-declared dupd "peg" set-word-prop
] if* ;
2008-03-20 10:05:21 -04:00
2008-04-14 06:42:45 -04:00
SYMBOL: delayed
: fixup-delayed ( -- )
#! Work through all delayed parsers and recompile their
#! words to have the correct bodies.
delayed get [
call compiled-parser 1quotation 0 1 <effect> define-declared
] assoc-each ;
: compile ( parser -- word )
2008-04-14 06:42:45 -04:00
[
H{ } clone delayed [
compiled-parser fixup-delayed
] with-variable
] with-compilation-unit ;
2008-03-20 10:05:21 -04:00
2008-03-28 23:24:13 -04:00
: compiled-parse ( state word -- result )
swap [ execute ] with-packrat ; inline
2008-04-06 21:02:56 -04:00
: parse ( input parser -- result )
2008-03-28 23:24:13 -04:00
dup word? [ compile ] unless compiled-parse ;
2007-11-28 18:35:45 -05:00
<PRIVATE
SYMBOL: id
: next-id ( -- n )
#! Return the next unique id for a parser
id get-global [
dup 1+ id set-global
] [
1 id set-global 0
] if* ;
: init-parser ( parser -- parser )
#! Set the delegate for the parser. Equivalent parsers
#! get a delegate with the same id.
dup clone delegates [
drop next-id f <parser>
] cache over set-delegate ;
2007-11-19 22:36:38 -05:00
TUPLE: token-parser symbol ;
: parse-token ( input string -- result )
#! Parse the string, returning a parse result
2008-04-05 00:25:04 -04:00
dup >r ?head-slice [
r> <parse-result>
] [
2008-04-05 00:25:04 -04:00
r> 2drop f
] if ;
2008-03-20 10:05:21 -04:00
M: token-parser (compile) ( parser -- quot )
2008-04-05 00:25:04 -04:00
symbol>> '[ input-slice , parse-token ] ;
2008-03-28 06:20:43 -04:00
TUPLE: satisfy-parser quot ;
2008-04-05 00:15:43 -04:00
: parse-satisfy ( input quot -- result )
swap dup empty? [
2drop f
] [
unclip-slice rot dupd call [
<parse-result>
] [
2drop f
] if
] if ; inline
2008-03-20 10:05:21 -04:00
M: satisfy-parser (compile) ( parser -- quot )
2008-04-05 00:15:43 -04:00
quot>> '[ input-slice , parse-satisfy ] ;
2007-11-26 21:08:16 -05:00
2007-11-19 22:36:38 -05:00
TUPLE: range-parser min max ;
2008-04-05 00:30:10 -04:00
: parse-range ( input min max -- result )
pick empty? [
3drop f
] [
pick first -rot between? [
unclip-slice <parse-result>
] [
drop f
2008-04-05 00:30:10 -04:00
] if
] if ;
2008-03-20 10:05:21 -04:00
M: range-parser (compile) ( parser -- quot )
2008-04-05 00:30:10 -04:00
[ min>> ] [ max>> ] bi '[ input-slice , , parse-range ] ;
2007-11-19 22:36:38 -05:00
TUPLE: seq-parser parsers ;
2008-04-05 00:51:42 -04:00
: ignore? ( ast -- bool )
ignore = ;
2008-04-05 00:25:04 -04:00
2008-04-05 00:51:42 -04:00
: calc-seq-result ( prev-result current-result -- next-result )
[
2008-04-05 00:51:42 -04:00
[ remaining>> swap (>>remaining) ] 2keep
ast>> dup ignore? [
drop
] [
2008-04-05 00:51:42 -04:00
swap [ ast>> push ] keep
] if
] [
drop f
] if* ;
: parse-seq-element ( result quot -- result )
over [
call calc-seq-result
] [
2drop f
] if ; inline
2007-11-19 22:36:38 -05:00
2008-03-20 10:05:21 -04:00
M: seq-parser (compile) ( parser -- quot )
[
2008-03-28 06:20:43 -04:00
[ input-slice V{ } clone <parse-result> ] %
2008-04-05 00:51:42 -04:00
parsers>> [ compiled-parser 1quotation , \ parse-seq-element , ] each
] [ ] make ;
2007-11-19 22:36:38 -05:00
2007-11-19 23:58:11 -05:00
TUPLE: choice-parser parsers ;
2008-03-20 10:05:21 -04:00
M: choice-parser (compile) ( parser -- quot )
2008-03-28 06:20:43 -04:00
[
f ,
2008-04-05 00:54:18 -04:00
parsers>> [ compiled-parser 1quotation , \ unless* , ] each
] [ ] make ;
2007-11-19 23:58:11 -05:00
2007-11-20 21:01:44 -05:00
TUPLE: repeat0-parser p1 ;
: (repeat) ( quot result -- result )
2008-03-28 06:20:43 -04:00
over call [
2008-03-25 23:08:14 -04:00
[ remaining>> swap (>>remaining) ] 2keep
ast>> swap [ ast>> push ] keep
(repeat)
] [
2007-11-20 21:01:44 -05:00
nip
] if* ; inline
2007-11-20 21:01:44 -05:00
2008-03-20 10:05:21 -04:00
M: repeat0-parser (compile) ( parser -- quot )
p1>> compiled-parser 1quotation '[
input-slice V{ } clone <parse-result> , swap (repeat)
] ;
2007-11-20 21:01:44 -05:00
TUPLE: repeat1-parser p1 ;
: repeat1-empty-check ( result -- result )
[
dup ast>> empty? [ drop f ] when
] [
f
] if* ;
2008-03-20 10:05:21 -04:00
M: repeat1-parser (compile) ( parser -- quot )
p1>> compiled-parser 1quotation '[
input-slice V{ } clone <parse-result> , swap (repeat) repeat1-empty-check
] ;
2007-11-20 21:01:44 -05:00
2007-11-20 21:50:47 -05:00
TUPLE: optional-parser p1 ;
2008-04-05 01:05:09 -04:00
: check-optional ( result -- result )
[ input-slice f <parse-result> ] unless* ;
2008-03-20 10:05:21 -04:00
M: optional-parser (compile) ( parser -- quot )
2008-04-05 01:05:09 -04:00
p1>> compiled-parser 1quotation '[ @ check-optional ] ;
2007-11-20 21:50:47 -05:00
2008-03-30 23:34:59 -04:00
TUPLE: semantic-parser p1 quot ;
2008-04-05 01:19:11 -04:00
: check-semantic ( result quot -- result )
over [
over ast>> swap call [ drop f ] unless
] [
drop
] if ; inline
2008-03-30 23:34:59 -04:00
M: semantic-parser (compile) ( parser -- quot )
2008-04-05 01:19:11 -04:00
[ p1>> compiled-parser 1quotation ] [ quot>> ] bi
'[ @ , check-semantic ] ;
2008-03-30 23:34:59 -04:00
2007-11-20 22:06:02 -05:00
TUPLE: ensure-parser p1 ;
2008-04-05 01:30:11 -04:00
: check-ensure ( old-input result -- result )
[ ignore <parse-result> ] [ drop f ] if ;
2008-03-20 10:05:21 -04:00
M: ensure-parser (compile) ( parser -- quot )
2008-04-05 01:30:11 -04:00
p1>> compiled-parser 1quotation '[ input-slice @ check-ensure ] ;
2007-11-20 22:06:02 -05:00
2007-11-20 22:11:49 -05:00
TUPLE: ensure-not-parser p1 ;
2008-04-05 01:30:11 -04:00
: check-ensure-not ( old-input result -- result )
[ drop f ] [ ignore <parse-result> ] if ;
2008-03-20 10:05:21 -04:00
M: ensure-not-parser (compile) ( parser -- quot )
2008-04-05 01:30:11 -04:00
p1>> compiled-parser 1quotation '[ input-slice @ check-ensure-not ] ;
2007-11-20 22:11:49 -05:00
2007-11-20 22:21:23 -05:00
TUPLE: action-parser p1 quot ;
2008-04-05 01:33:50 -04:00
: check-action ( result quot -- result )
over [
over ast>> swap call >>ast
] [
drop
] if ; inline
2008-03-20 10:05:21 -04:00
M: action-parser (compile) ( parser -- quot )
2008-04-05 01:36:17 -04:00
[ p1>> compiled-parser 1quotation ] [ quot>> ] bi '[ @ , check-action ] ;
2007-11-20 22:21:23 -05:00
2007-11-26 21:36:26 -05:00
: left-trim-slice ( string -- string )
#! Return a new string without any leading whitespace
#! from the original string.
dup empty? [
dup first blank? [ rest-slice left-trim-slice ] when
2007-11-26 21:36:26 -05:00
] unless ;
TUPLE: sp-parser p1 ;
2008-03-20 10:05:21 -04:00
M: sp-parser (compile) ( parser -- quot )
2008-04-05 01:36:17 -04:00
p1>> compiled-parser 1quotation '[
input-slice left-trim-slice input-from pos set @
] ;
2007-11-26 21:36:26 -05:00
2007-11-26 22:16:21 -05:00
TUPLE: delay-parser quot ;
2008-03-20 10:05:21 -04:00
M: delay-parser (compile) ( parser -- quot )
#! For efficiency we memoize the quotation.
#! This way it is run only once and the
#! parser constructed once at run time.
2008-04-14 06:42:45 -04:00
quot>> gensym [ delayed get set-at ] keep 1quotation ;
2007-11-26 22:16:21 -05:00
2008-03-29 00:42:21 -04:00
TUPLE: box-parser quot ;
M: box-parser (compile) ( parser -- quot )
#! Calls the quotation at compile time
#! to produce the parser to be compiled.
#! This differs from 'delay' which calls
2008-03-30 07:53:33 -04:00
#! it at run time. Due to using the runtime
#! environment at compile time, this parser
#! must not be cached, so we clear out the
#! delgates cache.
f >>compiled quot>> call compiled-parser 1quotation ;
2008-03-29 00:42:21 -04:00
2007-11-26 18:22:33 -05:00
PRIVATE>
: token ( string -- parser )
token-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: satisfy ( quot -- parser )
satisfy-parser boa init-parser ;
2007-11-26 21:08:16 -05:00
: range ( min max -- parser )
range-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: seq ( seq -- parser )
seq-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: 2seq ( parser1 parser2 -- parser )
2008-02-26 16:17:17 -05:00
2array seq ;
: 3seq ( parser1 parser2 parser3 -- parser )
2008-02-26 16:17:17 -05:00
3array seq ;
: 4seq ( parser1 parser2 parser3 parser4 -- parser )
4array seq ;
2008-03-27 18:30:46 -04:00
: seq* ( quot -- paser )
2008-02-13 16:39:37 -05:00
{ } make seq ; inline
: choice ( seq -- parser )
choice-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: 2choice ( parser1 parser2 -- parser )
2array choice ;
: 3choice ( parser1 parser2 parser3 -- parser )
3array choice ;
: 4choice ( parser1 parser2 parser3 parser4 -- parser )
4array choice ;
2008-03-27 18:30:46 -04:00
: choice* ( quot -- paser )
2008-02-13 16:39:37 -05:00
{ } make choice ; inline
: repeat0 ( parser -- parser )
repeat0-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: repeat1 ( parser -- parser )
repeat1-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: optional ( parser -- parser )
optional-parser boa init-parser ;
2008-03-30 23:34:59 -04:00
: semantic ( parser quot -- parser )
semantic-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: ensure ( parser -- parser )
ensure-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: ensure-not ( parser -- parser )
ensure-not-parser boa init-parser ;
2007-11-26 18:22:33 -05:00
: action ( parser quot -- parser )
action-parser boa init-parser ;
2007-11-26 21:36:26 -05:00
: sp ( parser -- parser )
sp-parser boa init-parser ;
2007-11-26 21:45:00 -05:00
: hide ( parser -- parser )
2007-11-26 21:45:00 -05:00
[ drop ignore ] action ;
2007-11-26 22:16:21 -05:00
: delay ( quot -- parser )
delay-parser boa init-parser ;
2008-03-03 17:57:30 -05:00
2008-03-29 00:42:21 -04:00
: box ( quot -- parser )
2008-03-30 07:53:33 -04:00
#! because a box has its quotation run at compile time
#! it must always have a new parser delgate created,
#! not a cached one. This is because the same box,
#! compiled twice can have a different compiled word
#! due to running at compile time.
#! Why the [ ] action at the end? Box parsers don't get
#! memoized during parsing due to all box parsers being
#! unique. This breaks left recursion detection during the
#! parse. The action adds an indirection with a parser type
#! that gets memoized and fixes this. Need to rethink how
#! to fix boxes so this isn't needed...
box-parser boa next-id f <parser> over set-delegate [ ] action ;
2008-03-29 00:42:21 -04:00
2008-06-18 01:35:19 -04:00
ERROR: parse-failed input word ;
M: parse-failed error.
"The " write dup word>> pprint " word could not parse the following input:" print nl
input>> . ;
2008-03-03 17:57:30 -05:00
: PEG:
2008-06-18 01:35:19 -04:00
(:)
2008-06-18 02:18:39 -04:00
[let | def [ ] word [ ] |
2008-03-03 17:57:30 -05:00
[
2008-06-18 01:35:19 -04:00
[
2008-06-18 02:18:39 -04:00
[let | compiled-def [ def call compile ] |
2008-06-18 01:58:29 -04:00
[
dup compiled-def compiled-parse
[ ast>> ] [ word parse-failed ] ?if
]
word swap define
2008-06-18 01:35:19 -04:00
]
] with-compilation-unit
] over push-all
] ; parsing