First cut at peg style packrat parser
parent
0714bb7a86
commit
aacf88a72d
|
@ -0,0 +1,11 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax peg ;
|
||||
|
||||
HELP: token
|
||||
{ $values
|
||||
{ "string" "a string" } }
|
||||
{ $description
|
||||
"A parser generator that returns a parser that matches the given string." }
|
||||
{ $example "\"begin foo end\" \"begin\" token parse" "result-here" } ;
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
!
|
||||
USING: kernel tools.test strings namespaces arrays peg ;
|
||||
IN: temporary
|
||||
|
||||
{ 0 1 2 } [
|
||||
0 next-id set-global get-next-id get-next-id get-next-id
|
||||
] unit-test
|
||||
|
||||
{ "0123456789" } [
|
||||
"0123456789" 0 <parse-state> 0 state-tail parse-state-input >string
|
||||
] unit-test
|
||||
|
||||
{ "56789" } [
|
||||
"0123456789" 5 <parse-state> 0 state-tail parse-state-input >string
|
||||
] unit-test
|
||||
|
||||
{ "789" } [
|
||||
"0123456789" 5 <parse-state> 2 state-tail parse-state-input >string
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"endbegin" 0 <parse-state> "begin" token parse
|
||||
] unit-test
|
||||
|
||||
{ "begin" "begin" "end" } [
|
||||
"beginend" 0 <parse-state> "begin" token parse
|
||||
{ parse-result-matched parse-result-ast parse-result-remaining } get-slots
|
||||
parse-state-input >string
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"" 0 <parse-state> CHAR: a CHAR: z range parse
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"1bcd" 0 <parse-state> CHAR: a CHAR: z range parse
|
||||
] unit-test
|
||||
|
||||
{ CHAR: a } [
|
||||
"abcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ CHAR: z } [
|
||||
"zbcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
|
||||
] unit-test
|
||||
|
||||
{ f } [
|
||||
"bad" 0 <parse-state> "a" token "b" token 2array seq parse
|
||||
] unit-test
|
||||
|
||||
{ "go" } [
|
||||
"good" 0 <parse-state> "g" token "o" token 2array seq parse parse-result-matched
|
||||
] unit-test
|
|
@ -0,0 +1,92 @@
|
|||
! Copyright (C) 2007 Chris Double.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences strings namespaces math assocs combinators.lib ;
|
||||
IN: peg
|
||||
|
||||
TUPLE: parse-state input cache ;
|
||||
|
||||
: <parse-state> ( input index -- state )
|
||||
tail-slice { set-parse-state-input } parse-state construct ;
|
||||
|
||||
: get-cached ( pid state -- result )
|
||||
tuck parse-state-cache at [
|
||||
swap parse-state-input slice-from swap nth
|
||||
] [
|
||||
drop f
|
||||
] if* ;
|
||||
|
||||
: state-tail ( state n -- state )
|
||||
dupd [ parse-state-cache ] dipd
|
||||
[ parse-state-input ] dip tail-slice
|
||||
{ set-parse-state-cache set-parse-state-input } parse-state construct ;
|
||||
|
||||
TUPLE: parse-result remaining matched ast ;
|
||||
|
||||
: <parse-result> ( remaining matched ast -- parse-result )
|
||||
parse-result construct-boa ;
|
||||
|
||||
SYMBOL: next-id
|
||||
|
||||
: get-next-id ( -- number )
|
||||
next-id get-global 0 or dup 1+ next-id set-global ;
|
||||
|
||||
TUPLE: parser id ;
|
||||
|
||||
: init-parser ( parser -- parser )
|
||||
get-next-id parser construct-boa over set-delegate ;
|
||||
|
||||
GENERIC: parse ( state parser -- result )
|
||||
|
||||
TUPLE: token-parser symbol ;
|
||||
|
||||
M: token-parser parse ( state parser -- result )
|
||||
token-parser-symbol 2dup >r parse-state-input r> head? [
|
||||
dup >r length state-tail r> dup <parse-result>
|
||||
] [
|
||||
2drop f
|
||||
] if ;
|
||||
|
||||
: token ( string -- parser )
|
||||
token-parser construct-boa init-parser ;
|
||||
|
||||
TUPLE: range-parser min max ;
|
||||
|
||||
M: range-parser parse ( state parser -- result )
|
||||
over parse-state-input empty? [
|
||||
2drop f
|
||||
] [
|
||||
0 pick parse-state-input nth dup rot
|
||||
{ range-parser-min range-parser-max } get-slots between? [
|
||||
[ 1 state-tail ] dip dup <parse-result>
|
||||
] [
|
||||
2drop f
|
||||
] if
|
||||
] if ;
|
||||
|
||||
: range ( min max -- parser )
|
||||
range-parser construct-boa init-parser ;
|
||||
|
||||
TUPLE: seq-parser parsers ;
|
||||
|
||||
: do-seq-parser ( result parser -- result )
|
||||
[ dup parse-result-remaining ] dip parse [
|
||||
[ parse-result-remaining swap set-parse-result-remaining ] 2keep
|
||||
[ parse-result-ast swap parse-result-ast push ] 2keep
|
||||
parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep
|
||||
|
||||
] [
|
||||
drop f
|
||||
] if* ;
|
||||
|
||||
: (seq-parser) ( result parsers -- result )
|
||||
dup empty? not pick and [
|
||||
unclip swap [ do-seq-parser ] dip (seq-parser)
|
||||
] [
|
||||
drop
|
||||
] if ;
|
||||
|
||||
M: seq-parser parse ( state parser -- result )
|
||||
seq-parser-parsers [ "" V{ } clone <parse-result> ] dip (seq-parser) ;
|
||||
|
||||
: seq ( seq -- parser )
|
||||
seq-parser construct-boa init-parser ;
|
Loading…
Reference in New Issue