peg: add 'got' slot to parse-error, so you get a little more info about why the parsing fails

db4
Björn Lindqvist 2014-10-21 16:27:33 +02:00 committed by Doug Coleman
parent 84663ca054
commit 3ed3e10074
4 changed files with 55 additions and 33 deletions

View File

@ -0,0 +1,7 @@
USING: arrays continuations debugger io.streams.string peg tools.test ;
IN: peg.debugger.tests
{ "Peg parsing error at character position 0.\nExpected 'A' or 'B'\nGot 'xxxx'\n" } [
[ "xxxx" "A" token "B" token 2array choice parse ] [ ] recover
[ error. ] with-string-writer
] unit-test

View File

@ -1,12 +1,13 @@
USING: io kernel accessors math.parser sequences prettyprint USING: formatting io kernel accessors math.parser sequences prettyprint
debugger peg ; debugger peg ;
IN: peg.debugger IN: peg.debugger
M: parse-error error. M: parse-error error.
"Peg parsing error at character position " write dup position>> number>string write [ position>> ] [ messages>> " or " join ] [ got>> ] tri
"." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ; "Peg parsing error at character position %d.\nExpected %s\nGot '%s'\n"
printf ;
M: parse-failed error. M: parse-failed error.
"The " write dup word>> pprint " word could not parse the following input:" print nl "The " write dup word>> pprint " word could not parse the following input:" print nl
input>> . ; input>> . ;

View File

@ -1,8 +1,8 @@
! Copyright (C) 2007 Chris Double. ! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test strings namespaces make arrays sequences USING: continuations kernel tools.test strings namespaces make arrays
peg peg.private peg.parsers words math accessors ; sequences peg peg.private peg.parsers words math accessors ;
IN: peg.tests IN: peg.tests
[ ] [ reset-pegs ] unit-test [ ] [ reset-pegs ] unit-test
@ -50,11 +50,11 @@ IN: peg.tests
] unit-test ] unit-test
[ [
"cbcd" "a" token "b" token 2array choice parse "cbcd" "a" token "b" token 2array choice parse
] must-fail ] must-fail
[ [
"" "a" token "b" token 2array choice parse "" "a" token "b" token 2array choice parse
] must-fail ] must-fail
{ 0 } [ { 0 } [
@ -98,7 +98,7 @@ IN: peg.tests
] unit-test ] unit-test
[ [
"bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
] must-fail ] must-fail
{ t } [ { t } [
@ -158,7 +158,7 @@ IN: peg.tests
] unit-test ] unit-test
[ [
"a]" "[" token hide "a" token "]" token hide 3array seq parse "a]" "[" token hide "a" token "]" token hide 3array seq parse
] must-fail ] must-fail
@ -171,7 +171,7 @@ IN: peg.tests
"1+1" swap parse "1+1" swap parse
] unit-test ] unit-test
: expr ( -- parser ) : expr ( -- parser )
#! Test direct left recursion. Currently left recursion should cause a #! Test direct left recursion. Currently left recursion should cause a
#! failure of that parser. #! failure of that parser.
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ; [ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
@ -188,7 +188,7 @@ IN: peg.tests
] unit-test ] unit-test
[ [
"A" [ drop t ] satisfy [ 66 >= ] semantic parse "A" [ drop t ] satisfy [ 66 >= ] semantic parse
] must-fail ] must-fail
{ CHAR: B } [ { CHAR: B } [
@ -206,3 +206,13 @@ USE: compiler
[ ] [ enable-optimizer ] unit-test [ ] [ enable-optimizer ] unit-test
[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test [ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
{
T{ parse-error
{ position 0 }
{ got "fbcd" }
{ messages V{ "'a'" "'b'" } }
}
} [
[ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover
] unit-test

View File

@ -9,7 +9,7 @@ FROM: namespaces => set ;
IN: peg IN: peg
TUPLE: parse-result remaining ast ; TUPLE: parse-result remaining ast ;
TUPLE: parse-error position messages ; TUPLE: parse-error position got messages ;
TUPLE: parser peg compiled id ; TUPLE: parser peg compiled id ;
M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ; M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
@ -20,6 +20,10 @@ C: <parse-error> parse-error
SYMBOL: error-stack SYMBOL: error-stack
: merge-overlapping-errors ( a b -- c )
dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip
<parse-error> ;
: (merge-errors) ( a b -- c ) : (merge-errors) ( a b -- c )
{ {
{ [ over position>> not ] [ nip ] } { [ over position>> not ] [ nip ] }
@ -28,7 +32,7 @@ SYMBOL: error-stack
2dup [ position>> ] compare { 2dup [ position>> ] compare {
{ +lt+ [ nip ] } { +lt+ [ nip ] }
{ +gt+ [ drop ] } { +gt+ [ drop ] }
{ +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] } { +eq+ [ merge-overlapping-errors ] }
} case } case
] ]
} cond ; } cond ;
@ -40,7 +44,7 @@ SYMBOL: error-stack
drop drop
] if ; ] if ;
: add-error ( remaining message -- ) : add-error ( position got message -- )
<parse-error> error-stack get push ; <parse-error> error-stack get push ;
SYMBOL: ignore SYMBOL: ignore
@ -81,7 +85,7 @@ SYMBOL: lrstack
: reset-pegs ( -- ) : reset-pegs ( -- )
H{ } clone \ peg-cache set-global ; H{ } clone \ peg-cache set-global ;
reset-pegs reset-pegs
#! An entry in the table of memoized parse results #! An entry in the table of memoized parse results
#! ast = an AST produced from the parse #! ast = an AST produced from the parse
@ -90,10 +94,10 @@ reset-pegs
#! pos = the position in the input string of this entry #! pos = the position in the input string of this entry
TUPLE: memo-entry ans pos ; TUPLE: memo-entry ans pos ;
TUPLE: left-recursion seed rule-id head next ; TUPLE: left-recursion seed rule-id head next ;
TUPLE: peg-head rule-id involved-set eval-set ; TUPLE: peg-head rule-id involved-set eval-set ;
: rule-id ( word -- id ) : rule-id ( word -- id )
#! A rule is the parser compiled down to a word. It has #! A rule is the parser compiled down to a word. It has
#! a "peg-id" property containing the id of the original parser. #! a "peg-id" property containing the id of the original parser.
"peg-id" word-prop ; "peg-id" word-prop ;
@ -112,7 +116,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
nip [ ast>> ] [ remaining>> ] bi input-from pos set nip [ ast>> ] [ remaining>> ] bi input-from pos set
] [ ] [
pos set fail pos set fail
] if* ; ] if* ;
: eval-rule ( rule -- ast ) : eval-rule ( rule -- ast )
#! Evaluate a rule, return an ast resulting from it. #! Evaluate a rule, return an ast resulting from it.
@ -132,7 +136,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
swap >>ans pos get >>pos drop ; swap >>ans pos get >>pos drop ;
: stop-growth? ( ast m -- ? ) : stop-growth? ( ast m -- ? )
[ failed? pos get ] dip [ failed? pos get ] dip
pos>> <= or ; pos>> <= or ;
: setup-growth ( h p -- ) : setup-growth ( h p -- )
@ -272,7 +276,7 @@ GENERIC: (compile) ( peg -- quot )
#! If not, compile it to a temporary word, cache it, #! If not, compile it to a temporary word, cache it,
#! and return it. Otherwise return the existing one. #! and return it. Otherwise return the existing one.
#! Circular parsers are supported by getting the word #! Circular parsers are supported by getting the word
#! name and storing it in the cache, before compiling, #! name and storing it in the cache, before compiling,
#! so it is picked up when re-entered. #! so it is picked up when re-entered.
dup compiled>> [ dup compiled>> [
nip nip
@ -333,9 +337,9 @@ TUPLE: token-parser symbol ;
: parse-token ( input string -- result ) : parse-token ( input string -- result )
#! Parse the string, returning a parse result #! Parse the string, returning a parse result
[ ?head-slice ] keep swap [ [ ?head-slice ] keep swap [
<parse-result> f f add-error <parse-result> f f f add-error
] [ ] [
[ drop pos get "token '" ] dip append "'" append 1vector add-error f [ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f
] if ; ] if ;
M: token-parser (compile) ( peg -- quot ) M: token-parser (compile) ( peg -- quot )
@ -404,7 +408,7 @@ M: seq-parser (compile) ( peg -- quot )
[ [
parsers>> unclip compile-parser-quot [ parse-seq-element ] curry , parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
[ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each [ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
] { } make , \ 1&& , ] { } make , \ 1&& ,
] [ ] make ; ] [ ] make ;
TUPLE: choice-parser parsers ; TUPLE: choice-parser parsers ;
@ -421,7 +425,7 @@ TUPLE: repeat0-parser p1 ;
: (repeat) ( quot: ( -- result ) result -- result ) : (repeat) ( quot: ( -- result ) result -- result )
over call [ over call [
[ remaining>> swap remaining<< ] 2keep [ remaining>> swap remaining<< ] 2keep
ast>> swap [ ast>> push ] keep ast>> swap [ ast>> push ] keep
(repeat) (repeat)
] [ ] [
@ -430,7 +434,7 @@ TUPLE: repeat0-parser p1 ;
M: repeat0-parser (compile) ( peg -- quot ) M: repeat0-parser (compile) ( peg -- quot )
p1>> compile-parser-quot '[ p1>> compile-parser-quot '[
input-slice V{ } clone <parse-result> _ swap (repeat) input-slice V{ } clone <parse-result> _ swap (repeat)
] ; ] ;
TUPLE: repeat1-parser p1 ; TUPLE: repeat1-parser p1 ;
@ -443,8 +447,8 @@ TUPLE: repeat1-parser p1 ;
] if* ; ] if* ;
M: repeat1-parser (compile) ( peg -- quot ) M: repeat1-parser (compile) ( peg -- quot )
p1>> compile-parser-quot '[ p1>> compile-parser-quot '[
input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
] ; ] ;
TUPLE: optional-parser p1 ; TUPLE: optional-parser p1 ;
@ -500,16 +504,16 @@ TUPLE: sp-parser p1 ;
M: sp-parser (compile) ( peg -- quot ) M: sp-parser (compile) ( peg -- quot )
p1>> compile-parser-quot '[ p1>> compile-parser-quot '[
input-slice [ blank? ] trim-head-slice input-from pos set @ input-slice [ blank? ] trim-head-slice input-from pos set @
] ; ] ;
TUPLE: delay-parser quot ; TUPLE: delay-parser quot ;
M: delay-parser (compile) ( peg -- quot ) M: delay-parser (compile) ( peg -- quot )
#! For efficiency we memoize the quotation. #! For efficiency we memoize the quotation.
#! This way it is run only once and the #! This way it is run only once and the
#! parser constructed once at run time. #! parser constructed once at run time.
quot>> gensym [ delayed get set-at ] keep 1quotation ; quot>> gensym [ delayed get set-at ] keep 1quotation ;
TUPLE: box-parser quot ; TUPLE: box-parser quot ;