diff --git a/basis/peg/debugger/debugger-tests.factor b/basis/peg/debugger/debugger-tests.factor new file mode 100644 index 0000000000..7968abfad1 --- /dev/null +++ b/basis/peg/debugger/debugger-tests.factor @@ -0,0 +1,7 @@ +USING: arrays continuations debugger io.streams.string peg tools.test ; +IN: peg.debugger.tests + +{ "Peg parsing error at character position 0.\nExpected 'A' or 'B'\nGot 'xxxx'\n" } [ + [ "xxxx" "A" token "B" token 2array choice parse ] [ ] recover + [ error. ] with-string-writer +] unit-test diff --git a/basis/peg/debugger/debugger.factor b/basis/peg/debugger/debugger.factor index 7e751b5110..32e9c18201 100644 --- a/basis/peg/debugger/debugger.factor +++ b/basis/peg/debugger/debugger.factor @@ -1,12 +1,13 @@ -USING: io kernel accessors math.parser sequences prettyprint +USING: formatting io kernel accessors math.parser sequences prettyprint debugger peg ; IN: peg.debugger + M: parse-error error. - "Peg parsing error at character position " write dup position>> number>string write - "." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ; + [ position>> ] [ messages>> " or " join ] [ got>> ] tri + "Peg parsing error at character position %d.\nExpected %s\nGot '%s'\n" + printf ; M: parse-failed error. - "The " write dup word>> pprint " word could not parse the following input:" print nl - input>> . ; - + "The " write dup word>> pprint " word could not parse the following input:" print nl + input>> . ; diff --git a/basis/peg/peg-tests.factor b/basis/peg/peg-tests.factor index ebfdebfcb1..2a568c1e9d 100644 --- a/basis/peg/peg-tests.factor +++ b/basis/peg/peg-tests.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test strings namespaces make arrays sequences - peg peg.private peg.parsers words math accessors ; +USING: continuations kernel tools.test strings namespaces make arrays +sequences peg peg.private peg.parsers words math accessors ; IN: peg.tests [ ] [ reset-pegs ] unit-test @@ -50,11 +50,11 @@ IN: peg.tests ] unit-test [ - "cbcd" "a" token "b" token 2array choice parse + "cbcd" "a" token "b" token 2array choice parse ] must-fail [ - "" "a" token "b" token 2array choice parse + "" "a" token "b" token 2array choice parse ] must-fail { 0 } [ @@ -98,7 +98,7 @@ IN: peg.tests ] unit-test [ - "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse + "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse ] must-fail { t } [ @@ -158,7 +158,7 @@ IN: peg.tests ] unit-test [ - "a]" "[" token hide "a" token "]" token hide 3array seq parse + "a]" "[" token hide "a" token "]" token hide 3array seq parse ] must-fail @@ -171,7 +171,7 @@ IN: peg.tests "1+1" swap parse ] unit-test -: expr ( -- parser ) +: expr ( -- parser ) #! Test direct left recursion. Currently left recursion should cause a #! failure of that parser. [ expr ] delay "+" token "1" token 3seq "1" token 2choice ; @@ -188,7 +188,7 @@ IN: peg.tests ] unit-test [ - "A" [ drop t ] satisfy [ 66 >= ] semantic parse + "A" [ drop t ] satisfy [ 66 >= ] semantic parse ] must-fail { CHAR: B } [ @@ -206,3 +206,13 @@ USE: compiler [ ] [ enable-optimizer ] unit-test [ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test + +{ + T{ parse-error + { position 0 } + { got "fbcd" } + { messages V{ "'a'" "'b'" } } + } +} [ + [ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover +] unit-test diff --git a/basis/peg/peg.factor b/basis/peg/peg.factor index 6ccd93f3a7..caa2ebb6c0 100644 --- a/basis/peg/peg.factor +++ b/basis/peg/peg.factor @@ -9,7 +9,7 @@ FROM: namespaces => set ; IN: peg TUPLE: parse-result remaining ast ; -TUPLE: parse-error position messages ; +TUPLE: parse-error position got messages ; TUPLE: parser peg compiled id ; M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ; @@ -20,6 +20,10 @@ C: parse-error SYMBOL: error-stack +: merge-overlapping-errors ( a b -- c ) + dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip + ; + : (merge-errors) ( a b -- c ) { { [ over position>> not ] [ nip ] } @@ -28,7 +32,7 @@ SYMBOL: error-stack 2dup [ position>> ] compare { { +lt+ [ nip ] } { +gt+ [ drop ] } - { +eq+ [ messages>> over messages>> union [ position>> ] dip ] } + { +eq+ [ merge-overlapping-errors ] } } case ] } cond ; @@ -40,7 +44,7 @@ SYMBOL: error-stack drop ] if ; -: add-error ( remaining message -- ) +: add-error ( position got message -- ) error-stack get push ; SYMBOL: ignore @@ -81,7 +85,7 @@ SYMBOL: lrstack : reset-pegs ( -- ) H{ } clone \ peg-cache set-global ; -reset-pegs +reset-pegs #! An entry in the table of memoized parse results #! ast = an AST produced from the parse @@ -90,10 +94,10 @@ reset-pegs #! pos = the position in the input string of this entry TUPLE: memo-entry ans pos ; -TUPLE: left-recursion seed rule-id head next ; +TUPLE: left-recursion seed rule-id head next ; TUPLE: peg-head rule-id involved-set eval-set ; -: rule-id ( word -- id ) +: rule-id ( word -- id ) #! A rule is the parser compiled down to a word. It has #! a "peg-id" property containing the id of the original parser. "peg-id" word-prop ; @@ -112,7 +116,7 @@ TUPLE: peg-head rule-id involved-set eval-set ; nip [ ast>> ] [ remaining>> ] bi input-from pos set ] [ pos set fail - ] if* ; + ] if* ; : eval-rule ( rule -- ast ) #! Evaluate a rule, return an ast resulting from it. @@ -132,7 +136,7 @@ TUPLE: peg-head rule-id involved-set eval-set ; swap >>ans pos get >>pos drop ; : stop-growth? ( ast m -- ? ) - [ failed? pos get ] dip + [ failed? pos get ] dip pos>> <= or ; : setup-growth ( h p -- ) @@ -272,7 +276,7 @@ GENERIC: (compile) ( peg -- quot ) #! If not, compile it to a temporary word, cache it, #! and return it. Otherwise return the existing one. #! Circular parsers are supported by getting the word - #! name and storing it in the cache, before compiling, + #! name and storing it in the cache, before compiling, #! so it is picked up when re-entered. dup compiled>> [ nip @@ -333,9 +337,9 @@ TUPLE: token-parser symbol ; : parse-token ( input string -- result ) #! Parse the string, returning a parse result [ ?head-slice ] keep swap [ - f f add-error + f f f add-error ] [ - [ drop pos get "token '" ] dip append "'" append 1vector add-error f + [ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f ] if ; M: token-parser (compile) ( peg -- quot ) @@ -404,7 +408,7 @@ M: seq-parser (compile) ( peg -- quot ) [ parsers>> unclip compile-parser-quot [ parse-seq-element ] curry , [ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each - ] { } make , \ 1&& , + ] { } make , \ 1&& , ] [ ] make ; TUPLE: choice-parser parsers ; @@ -421,7 +425,7 @@ TUPLE: repeat0-parser p1 ; : (repeat) ( quot: ( -- result ) result -- result ) over call [ - [ remaining>> swap remaining<< ] 2keep + [ remaining>> swap remaining<< ] 2keep ast>> swap [ ast>> push ] keep (repeat) ] [ @@ -430,7 +434,7 @@ TUPLE: repeat0-parser p1 ; M: repeat0-parser (compile) ( peg -- quot ) p1>> compile-parser-quot '[ - input-slice V{ } clone _ swap (repeat) + input-slice V{ } clone _ swap (repeat) ] ; TUPLE: repeat1-parser p1 ; @@ -443,8 +447,8 @@ TUPLE: repeat1-parser p1 ; ] if* ; M: repeat1-parser (compile) ( peg -- quot ) - p1>> compile-parser-quot '[ - input-slice V{ } clone _ swap (repeat) repeat1-empty-check + p1>> compile-parser-quot '[ + input-slice V{ } clone _ swap (repeat) repeat1-empty-check ] ; TUPLE: optional-parser p1 ; @@ -500,16 +504,16 @@ TUPLE: sp-parser p1 ; M: sp-parser (compile) ( peg -- quot ) p1>> compile-parser-quot '[ - input-slice [ blank? ] trim-head-slice input-from pos set @ + input-slice [ blank? ] trim-head-slice input-from pos set @ ] ; TUPLE: delay-parser quot ; M: delay-parser (compile) ( peg -- quot ) #! For efficiency we memoize the quotation. - #! This way it is run only once and the + #! This way it is run only once and the #! parser constructed once at run time. - quot>> gensym [ delayed get set-at ] keep 1quotation ; + quot>> gensym [ delayed get set-at ] keep 1quotation ; TUPLE: box-parser quot ;