First cut at peg style packrat parser

release
Chris Double 2007-11-20 16:36:38 +13:00
parent 0714bb7a86
commit aacf88a72d
3 changed files with 158 additions and 0 deletions

11
extra/peg/peg-docs.factor Normal file
View File

@ -0,0 +1,11 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax peg ;
HELP: token
{ $values
{ "string" "a string" } }
{ $description
"A parser generator that returns a parser that matches the given string." }
{ $example "\"begin foo end\" \"begin\" token parse" "result-here" } ;

View File

@ -0,0 +1,55 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test strings namespaces arrays peg ;
IN: temporary
{ 0 1 2 } [
0 next-id set-global get-next-id get-next-id get-next-id
] unit-test
{ "0123456789" } [
"0123456789" 0 <parse-state> 0 state-tail parse-state-input >string
] unit-test
{ "56789" } [
"0123456789" 5 <parse-state> 0 state-tail parse-state-input >string
] unit-test
{ "789" } [
"0123456789" 5 <parse-state> 2 state-tail parse-state-input >string
] unit-test
{ f } [
"endbegin" 0 <parse-state> "begin" token parse
] unit-test
{ "begin" "begin" "end" } [
"beginend" 0 <parse-state> "begin" token parse
{ parse-result-matched parse-result-ast parse-result-remaining } get-slots
parse-state-input >string
] unit-test
{ f } [
"" 0 <parse-state> CHAR: a CHAR: z range parse
] unit-test
{ f } [
"1bcd" 0 <parse-state> CHAR: a CHAR: z range parse
] unit-test
{ CHAR: a } [
"abcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
] unit-test
{ CHAR: z } [
"zbcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
] unit-test
{ f } [
"bad" 0 <parse-state> "a" token "b" token 2array seq parse
] unit-test
{ "go" } [
"good" 0 <parse-state> "g" token "o" token 2array seq parse parse-result-matched
] unit-test

92
extra/peg/peg.factor Normal file
View File

@ -0,0 +1,92 @@
! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs combinators.lib ;
IN: peg
TUPLE: parse-state input cache ;
: <parse-state> ( input index -- state )
tail-slice { set-parse-state-input } parse-state construct ;
: get-cached ( pid state -- result )
tuck parse-state-cache at [
swap parse-state-input slice-from swap nth
] [
drop f
] if* ;
: state-tail ( state n -- state )
dupd [ parse-state-cache ] dipd
[ parse-state-input ] dip tail-slice
{ set-parse-state-cache set-parse-state-input } parse-state construct ;
TUPLE: parse-result remaining matched ast ;
: <parse-result> ( remaining matched ast -- parse-result )
parse-result construct-boa ;
SYMBOL: next-id
: get-next-id ( -- number )
next-id get-global 0 or dup 1+ next-id set-global ;
TUPLE: parser id ;
: init-parser ( parser -- parser )
get-next-id parser construct-boa over set-delegate ;
GENERIC: parse ( state parser -- result )
TUPLE: token-parser symbol ;
M: token-parser parse ( state parser -- result )
token-parser-symbol 2dup >r parse-state-input r> head? [
dup >r length state-tail r> dup <parse-result>
] [
2drop f
] if ;
: token ( string -- parser )
token-parser construct-boa init-parser ;
TUPLE: range-parser min max ;
M: range-parser parse ( state parser -- result )
over parse-state-input empty? [
2drop f
] [
0 pick parse-state-input nth dup rot
{ range-parser-min range-parser-max } get-slots between? [
[ 1 state-tail ] dip dup <parse-result>
] [
2drop f
] if
] if ;
: range ( min max -- parser )
range-parser construct-boa init-parser ;
TUPLE: seq-parser parsers ;
: do-seq-parser ( result parser -- result )
[ dup parse-result-remaining ] dip parse [
[ parse-result-remaining swap set-parse-result-remaining ] 2keep
[ parse-result-ast swap parse-result-ast push ] 2keep
parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep
] [
drop f
] if* ;
: (seq-parser) ( result parsers -- result )
dup empty? not pick and [
unclip swap [ do-seq-parser ] dip (seq-parser)
] [
drop
] if ;
M: seq-parser parse ( state parser -- result )
seq-parser-parsers [ "" V{ } clone <parse-result> ] dip (seq-parser) ;
: seq ( seq -- parser )
seq-parser construct-boa init-parser ;