peg: add 'got' slot to parse-error, so you get a little more info about why the parsing fails
parent
84663ca054
commit
3ed3e10074
|
@ -0,0 +1,7 @@
|
||||||
|
USING: arrays continuations debugger io.streams.string peg tools.test ;
|
||||||
|
IN: peg.debugger.tests
|
||||||
|
|
||||||
|
{ "Peg parsing error at character position 0.\nExpected 'A' or 'B'\nGot 'xxxx'\n" } [
|
||||||
|
[ "xxxx" "A" token "B" token 2array choice parse ] [ ] recover
|
||||||
|
[ error. ] with-string-writer
|
||||||
|
] unit-test
|
|
@ -1,12 +1,13 @@
|
||||||
USING: io kernel accessors math.parser sequences prettyprint
|
USING: formatting io kernel accessors math.parser sequences prettyprint
|
||||||
debugger peg ;
|
debugger peg ;
|
||||||
IN: peg.debugger
|
IN: peg.debugger
|
||||||
|
|
||||||
|
|
||||||
M: parse-error error.
|
M: parse-error error.
|
||||||
"Peg parsing error at character position " write dup position>> number>string write
|
[ position>> ] [ messages>> " or " join ] [ got>> ] tri
|
||||||
"." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ;
|
"Peg parsing error at character position %d.\nExpected %s\nGot '%s'\n"
|
||||||
|
printf ;
|
||||||
|
|
||||||
M: parse-failed error.
|
M: parse-failed error.
|
||||||
"The " write dup word>> pprint " word could not parse the following input:" print nl
|
"The " write dup word>> pprint " word could not parse the following input:" print nl
|
||||||
input>> . ;
|
input>> . ;
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
! Copyright (C) 2007 Chris Double.
|
! Copyright (C) 2007 Chris Double.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
!
|
!
|
||||||
USING: kernel tools.test strings namespaces make arrays sequences
|
USING: continuations kernel tools.test strings namespaces make arrays
|
||||||
peg peg.private peg.parsers words math accessors ;
|
sequences peg peg.private peg.parsers words math accessors ;
|
||||||
IN: peg.tests
|
IN: peg.tests
|
||||||
|
|
||||||
[ ] [ reset-pegs ] unit-test
|
[ ] [ reset-pegs ] unit-test
|
||||||
|
@ -50,11 +50,11 @@ IN: peg.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
[
|
||||||
"cbcd" "a" token "b" token 2array choice parse
|
"cbcd" "a" token "b" token 2array choice parse
|
||||||
] must-fail
|
] must-fail
|
||||||
|
|
||||||
[
|
[
|
||||||
"" "a" token "b" token 2array choice parse
|
"" "a" token "b" token 2array choice parse
|
||||||
] must-fail
|
] must-fail
|
||||||
|
|
||||||
{ 0 } [
|
{ 0 } [
|
||||||
|
@ -98,7 +98,7 @@ IN: peg.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
[
|
||||||
"bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
|
"bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
|
||||||
] must-fail
|
] must-fail
|
||||||
|
|
||||||
{ t } [
|
{ t } [
|
||||||
|
@ -158,7 +158,7 @@ IN: peg.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
[
|
||||||
"a]" "[" token hide "a" token "]" token hide 3array seq parse
|
"a]" "[" token hide "a" token "]" token hide 3array seq parse
|
||||||
] must-fail
|
] must-fail
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ IN: peg.tests
|
||||||
"1+1" swap parse
|
"1+1" swap parse
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
: expr ( -- parser )
|
: expr ( -- parser )
|
||||||
#! Test direct left recursion. Currently left recursion should cause a
|
#! Test direct left recursion. Currently left recursion should cause a
|
||||||
#! failure of that parser.
|
#! failure of that parser.
|
||||||
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
|
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
|
||||||
|
@ -188,7 +188,7 @@ IN: peg.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[
|
[
|
||||||
"A" [ drop t ] satisfy [ 66 >= ] semantic parse
|
"A" [ drop t ] satisfy [ 66 >= ] semantic parse
|
||||||
] must-fail
|
] must-fail
|
||||||
|
|
||||||
{ CHAR: B } [
|
{ CHAR: B } [
|
||||||
|
@ -206,3 +206,13 @@ USE: compiler
|
||||||
[ ] [ enable-optimizer ] unit-test
|
[ ] [ enable-optimizer ] unit-test
|
||||||
|
|
||||||
[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
|
[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
|
||||||
|
|
||||||
|
{
|
||||||
|
T{ parse-error
|
||||||
|
{ position 0 }
|
||||||
|
{ got "fbcd" }
|
||||||
|
{ messages V{ "'a'" "'b'" } }
|
||||||
|
}
|
||||||
|
} [
|
||||||
|
[ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover
|
||||||
|
] unit-test
|
||||||
|
|
|
@ -9,7 +9,7 @@ FROM: namespaces => set ;
|
||||||
IN: peg
|
IN: peg
|
||||||
|
|
||||||
TUPLE: parse-result remaining ast ;
|
TUPLE: parse-result remaining ast ;
|
||||||
TUPLE: parse-error position messages ;
|
TUPLE: parse-error position got messages ;
|
||||||
TUPLE: parser peg compiled id ;
|
TUPLE: parser peg compiled id ;
|
||||||
|
|
||||||
M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
|
M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
|
||||||
|
@ -20,6 +20,10 @@ C: <parse-error> parse-error
|
||||||
|
|
||||||
SYMBOL: error-stack
|
SYMBOL: error-stack
|
||||||
|
|
||||||
|
: merge-overlapping-errors ( a b -- c )
|
||||||
|
dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip
|
||||||
|
<parse-error> ;
|
||||||
|
|
||||||
: (merge-errors) ( a b -- c )
|
: (merge-errors) ( a b -- c )
|
||||||
{
|
{
|
||||||
{ [ over position>> not ] [ nip ] }
|
{ [ over position>> not ] [ nip ] }
|
||||||
|
@ -28,7 +32,7 @@ SYMBOL: error-stack
|
||||||
2dup [ position>> ] compare {
|
2dup [ position>> ] compare {
|
||||||
{ +lt+ [ nip ] }
|
{ +lt+ [ nip ] }
|
||||||
{ +gt+ [ drop ] }
|
{ +gt+ [ drop ] }
|
||||||
{ +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] }
|
{ +eq+ [ merge-overlapping-errors ] }
|
||||||
} case
|
} case
|
||||||
]
|
]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
@ -40,7 +44,7 @@ SYMBOL: error-stack
|
||||||
drop
|
drop
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: add-error ( remaining message -- )
|
: add-error ( position got message -- )
|
||||||
<parse-error> error-stack get push ;
|
<parse-error> error-stack get push ;
|
||||||
|
|
||||||
SYMBOL: ignore
|
SYMBOL: ignore
|
||||||
|
@ -81,7 +85,7 @@ SYMBOL: lrstack
|
||||||
: reset-pegs ( -- )
|
: reset-pegs ( -- )
|
||||||
H{ } clone \ peg-cache set-global ;
|
H{ } clone \ peg-cache set-global ;
|
||||||
|
|
||||||
reset-pegs
|
reset-pegs
|
||||||
|
|
||||||
#! An entry in the table of memoized parse results
|
#! An entry in the table of memoized parse results
|
||||||
#! ast = an AST produced from the parse
|
#! ast = an AST produced from the parse
|
||||||
|
@ -90,10 +94,10 @@ reset-pegs
|
||||||
#! pos = the position in the input string of this entry
|
#! pos = the position in the input string of this entry
|
||||||
TUPLE: memo-entry ans pos ;
|
TUPLE: memo-entry ans pos ;
|
||||||
|
|
||||||
TUPLE: left-recursion seed rule-id head next ;
|
TUPLE: left-recursion seed rule-id head next ;
|
||||||
TUPLE: peg-head rule-id involved-set eval-set ;
|
TUPLE: peg-head rule-id involved-set eval-set ;
|
||||||
|
|
||||||
: rule-id ( word -- id )
|
: rule-id ( word -- id )
|
||||||
#! A rule is the parser compiled down to a word. It has
|
#! A rule is the parser compiled down to a word. It has
|
||||||
#! a "peg-id" property containing the id of the original parser.
|
#! a "peg-id" property containing the id of the original parser.
|
||||||
"peg-id" word-prop ;
|
"peg-id" word-prop ;
|
||||||
|
@ -112,7 +116,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
|
||||||
nip [ ast>> ] [ remaining>> ] bi input-from pos set
|
nip [ ast>> ] [ remaining>> ] bi input-from pos set
|
||||||
] [
|
] [
|
||||||
pos set fail
|
pos set fail
|
||||||
] if* ;
|
] if* ;
|
||||||
|
|
||||||
: eval-rule ( rule -- ast )
|
: eval-rule ( rule -- ast )
|
||||||
#! Evaluate a rule, return an ast resulting from it.
|
#! Evaluate a rule, return an ast resulting from it.
|
||||||
|
@ -132,7 +136,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
|
||||||
swap >>ans pos get >>pos drop ;
|
swap >>ans pos get >>pos drop ;
|
||||||
|
|
||||||
: stop-growth? ( ast m -- ? )
|
: stop-growth? ( ast m -- ? )
|
||||||
[ failed? pos get ] dip
|
[ failed? pos get ] dip
|
||||||
pos>> <= or ;
|
pos>> <= or ;
|
||||||
|
|
||||||
: setup-growth ( h p -- )
|
: setup-growth ( h p -- )
|
||||||
|
@ -272,7 +276,7 @@ GENERIC: (compile) ( peg -- quot )
|
||||||
#! If not, compile it to a temporary word, cache it,
|
#! If not, compile it to a temporary word, cache it,
|
||||||
#! and return it. Otherwise return the existing one.
|
#! and return it. Otherwise return the existing one.
|
||||||
#! Circular parsers are supported by getting the word
|
#! Circular parsers are supported by getting the word
|
||||||
#! name and storing it in the cache, before compiling,
|
#! name and storing it in the cache, before compiling,
|
||||||
#! so it is picked up when re-entered.
|
#! so it is picked up when re-entered.
|
||||||
dup compiled>> [
|
dup compiled>> [
|
||||||
nip
|
nip
|
||||||
|
@ -333,9 +337,9 @@ TUPLE: token-parser symbol ;
|
||||||
: parse-token ( input string -- result )
|
: parse-token ( input string -- result )
|
||||||
#! Parse the string, returning a parse result
|
#! Parse the string, returning a parse result
|
||||||
[ ?head-slice ] keep swap [
|
[ ?head-slice ] keep swap [
|
||||||
<parse-result> f f add-error
|
<parse-result> f f f add-error
|
||||||
] [
|
] [
|
||||||
[ drop pos get "token '" ] dip append "'" append 1vector add-error f
|
[ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
M: token-parser (compile) ( peg -- quot )
|
M: token-parser (compile) ( peg -- quot )
|
||||||
|
@ -404,7 +408,7 @@ M: seq-parser (compile) ( peg -- quot )
|
||||||
[
|
[
|
||||||
parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
|
parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
|
||||||
[ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
|
[ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
|
||||||
] { } make , \ 1&& ,
|
] { } make , \ 1&& ,
|
||||||
] [ ] make ;
|
] [ ] make ;
|
||||||
|
|
||||||
TUPLE: choice-parser parsers ;
|
TUPLE: choice-parser parsers ;
|
||||||
|
@ -421,7 +425,7 @@ TUPLE: repeat0-parser p1 ;
|
||||||
|
|
||||||
: (repeat) ( quot: ( -- result ) result -- result )
|
: (repeat) ( quot: ( -- result ) result -- result )
|
||||||
over call [
|
over call [
|
||||||
[ remaining>> swap remaining<< ] 2keep
|
[ remaining>> swap remaining<< ] 2keep
|
||||||
ast>> swap [ ast>> push ] keep
|
ast>> swap [ ast>> push ] keep
|
||||||
(repeat)
|
(repeat)
|
||||||
] [
|
] [
|
||||||
|
@ -430,7 +434,7 @@ TUPLE: repeat0-parser p1 ;
|
||||||
|
|
||||||
M: repeat0-parser (compile) ( peg -- quot )
|
M: repeat0-parser (compile) ( peg -- quot )
|
||||||
p1>> compile-parser-quot '[
|
p1>> compile-parser-quot '[
|
||||||
input-slice V{ } clone <parse-result> _ swap (repeat)
|
input-slice V{ } clone <parse-result> _ swap (repeat)
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
TUPLE: repeat1-parser p1 ;
|
TUPLE: repeat1-parser p1 ;
|
||||||
|
@ -443,8 +447,8 @@ TUPLE: repeat1-parser p1 ;
|
||||||
] if* ;
|
] if* ;
|
||||||
|
|
||||||
M: repeat1-parser (compile) ( peg -- quot )
|
M: repeat1-parser (compile) ( peg -- quot )
|
||||||
p1>> compile-parser-quot '[
|
p1>> compile-parser-quot '[
|
||||||
input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
|
input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
TUPLE: optional-parser p1 ;
|
TUPLE: optional-parser p1 ;
|
||||||
|
@ -500,16 +504,16 @@ TUPLE: sp-parser p1 ;
|
||||||
|
|
||||||
M: sp-parser (compile) ( peg -- quot )
|
M: sp-parser (compile) ( peg -- quot )
|
||||||
p1>> compile-parser-quot '[
|
p1>> compile-parser-quot '[
|
||||||
input-slice [ blank? ] trim-head-slice input-from pos set @
|
input-slice [ blank? ] trim-head-slice input-from pos set @
|
||||||
] ;
|
] ;
|
||||||
|
|
||||||
TUPLE: delay-parser quot ;
|
TUPLE: delay-parser quot ;
|
||||||
|
|
||||||
M: delay-parser (compile) ( peg -- quot )
|
M: delay-parser (compile) ( peg -- quot )
|
||||||
#! For efficiency we memoize the quotation.
|
#! For efficiency we memoize the quotation.
|
||||||
#! This way it is run only once and the
|
#! This way it is run only once and the
|
||||||
#! parser constructed once at run time.
|
#! parser constructed once at run time.
|
||||||
quot>> gensym [ delayed get set-at ] keep 1quotation ;
|
quot>> gensym [ delayed get set-at ] keep 1quotation ;
|
||||||
|
|
||||||
TUPLE: box-parser quot ;
|
TUPLE: box-parser quot ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue