peg: add 'got' slot to parse-error, so you get a little more info about why the parsing fails
							parent
							
								
									84663ca054
								
							
						
					
					
						commit
						3ed3e10074
					
				| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
USING: arrays continuations debugger io.streams.string peg tools.test ;
 | 
			
		||||
IN: peg.debugger.tests
 | 
			
		||||
 | 
			
		||||
{ "Peg parsing error at character position 0.\nExpected 'A' or 'B'\nGot 'xxxx'\n" } [
 | 
			
		||||
    [ "xxxx" "A" token "B" token 2array choice parse ] [ ] recover
 | 
			
		||||
    [ error. ] with-string-writer
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -1,12 +1,13 @@
 | 
			
		|||
USING: io kernel accessors math.parser sequences prettyprint
 | 
			
		||||
USING: formatting io kernel accessors math.parser sequences prettyprint
 | 
			
		||||
debugger peg ;
 | 
			
		||||
IN: peg.debugger
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
M: parse-error error.
 | 
			
		||||
  "Peg parsing error at character position " write dup position>> number>string write 
 | 
			
		||||
  "." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ;
 | 
			
		||||
    [ position>> ] [ messages>> " or " join ] [ got>> ] tri
 | 
			
		||||
    "Peg parsing error at character position %d.\nExpected %s\nGot '%s'\n"
 | 
			
		||||
    printf ;
 | 
			
		||||
 | 
			
		||||
M: parse-failed error.
 | 
			
		||||
  "The " write dup word>> pprint " word could not parse the following input:" print nl
 | 
			
		||||
  input>> . ;
 | 
			
		||||
 | 
			
		||||
    "The " write dup word>> pprint " word could not parse the following input:" print nl
 | 
			
		||||
    input>> . ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
!
 | 
			
		||||
USING: kernel tools.test strings namespaces make arrays sequences 
 | 
			
		||||
       peg peg.private peg.parsers words math accessors ;
 | 
			
		||||
USING: continuations kernel tools.test strings namespaces make arrays
 | 
			
		||||
sequences peg peg.private peg.parsers words math accessors ;
 | 
			
		||||
IN: peg.tests
 | 
			
		||||
 | 
			
		||||
[ ] [ reset-pegs ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -50,11 +50,11 @@ IN: peg.tests
 | 
			
		|||
] unit-test
 | 
			
		||||
 | 
			
		||||
[
 | 
			
		||||
    "cbcd" "a" token "b" token 2array choice parse 
 | 
			
		||||
    "cbcd" "a" token "b" token 2array choice parse
 | 
			
		||||
] must-fail
 | 
			
		||||
 | 
			
		||||
[
 | 
			
		||||
    "" "a" token "b" token 2array choice parse 
 | 
			
		||||
    "" "a" token "b" token 2array choice parse
 | 
			
		||||
] must-fail
 | 
			
		||||
 | 
			
		||||
{ 0 } [
 | 
			
		||||
| 
						 | 
				
			
			@ -98,7 +98,7 @@ IN: peg.tests
 | 
			
		|||
] unit-test
 | 
			
		||||
 | 
			
		||||
[
 | 
			
		||||
    "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse 
 | 
			
		||||
    "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
 | 
			
		||||
] must-fail
 | 
			
		||||
 | 
			
		||||
{ t } [
 | 
			
		||||
| 
						 | 
				
			
			@ -158,7 +158,7 @@ IN: peg.tests
 | 
			
		|||
] unit-test
 | 
			
		||||
 | 
			
		||||
[
 | 
			
		||||
    "a]" "[" token hide "a" token "]" token hide 3array seq parse 
 | 
			
		||||
    "a]" "[" token hide "a" token "]" token hide 3array seq parse
 | 
			
		||||
] must-fail
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -171,7 +171,7 @@ IN: peg.tests
 | 
			
		|||
    "1+1" swap parse
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
: expr ( -- parser ) 
 | 
			
		||||
: expr ( -- parser )
 | 
			
		||||
    #! Test direct left recursion. Currently left recursion should cause a
 | 
			
		||||
    #! failure of that parser.
 | 
			
		||||
    [ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
 | 
			
		||||
| 
						 | 
				
			
			@ -188,7 +188,7 @@ IN: peg.tests
 | 
			
		|||
] unit-test
 | 
			
		||||
 | 
			
		||||
[
 | 
			
		||||
    "A" [ drop t ] satisfy [ 66 >= ] semantic parse 
 | 
			
		||||
    "A" [ drop t ] satisfy [ 66 >= ] semantic parse
 | 
			
		||||
] must-fail
 | 
			
		||||
 | 
			
		||||
{ CHAR: B } [
 | 
			
		||||
| 
						 | 
				
			
			@ -206,3 +206,13 @@ USE: compiler
 | 
			
		|||
[ ] [ enable-optimizer ] unit-test
 | 
			
		||||
 | 
			
		||||
[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
 | 
			
		||||
 | 
			
		||||
{
 | 
			
		||||
    T{ parse-error
 | 
			
		||||
       { position 0 }
 | 
			
		||||
       { got "fbcd" }
 | 
			
		||||
       { messages V{ "'a'" "'b'" } }
 | 
			
		||||
    }
 | 
			
		||||
} [
 | 
			
		||||
    [ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,7 +9,7 @@ FROM: namespaces => set ;
 | 
			
		|||
IN: peg
 | 
			
		||||
 | 
			
		||||
TUPLE: parse-result remaining ast ;
 | 
			
		||||
TUPLE: parse-error position messages ; 
 | 
			
		||||
TUPLE: parse-error position got messages ;
 | 
			
		||||
TUPLE: parser peg compiled id ;
 | 
			
		||||
 | 
			
		||||
M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
 | 
			
		||||
| 
						 | 
				
			
			@ -20,6 +20,10 @@ C: <parse-error>  parse-error
 | 
			
		|||
 | 
			
		||||
SYMBOL: error-stack
 | 
			
		||||
 | 
			
		||||
: merge-overlapping-errors ( a b -- c )
 | 
			
		||||
    dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip
 | 
			
		||||
    <parse-error> ;
 | 
			
		||||
 | 
			
		||||
: (merge-errors) ( a b -- c )
 | 
			
		||||
    {
 | 
			
		||||
        { [ over position>> not ] [ nip ] }
 | 
			
		||||
| 
						 | 
				
			
			@ -28,7 +32,7 @@ SYMBOL: error-stack
 | 
			
		|||
            2dup [ position>> ] compare {
 | 
			
		||||
                { +lt+ [ nip ] }
 | 
			
		||||
                { +gt+ [ drop ] }
 | 
			
		||||
                { +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] }
 | 
			
		||||
                { +eq+ [ merge-overlapping-errors ] }
 | 
			
		||||
            } case
 | 
			
		||||
        ]
 | 
			
		||||
    } cond ;
 | 
			
		||||
| 
						 | 
				
			
			@ -40,7 +44,7 @@ SYMBOL: error-stack
 | 
			
		|||
        drop
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
: add-error ( remaining message -- )
 | 
			
		||||
: add-error ( position got message -- )
 | 
			
		||||
    <parse-error> error-stack get push ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: ignore
 | 
			
		||||
| 
						 | 
				
			
			@ -81,7 +85,7 @@ SYMBOL: lrstack
 | 
			
		|||
: reset-pegs ( -- )
 | 
			
		||||
    H{ } clone \ peg-cache set-global ;
 | 
			
		||||
 | 
			
		||||
reset-pegs 
 | 
			
		||||
reset-pegs
 | 
			
		||||
 | 
			
		||||
#! An entry in the table of memoized parse results
 | 
			
		||||
#! ast = an AST produced from the parse
 | 
			
		||||
| 
						 | 
				
			
			@ -90,10 +94,10 @@ reset-pegs
 | 
			
		|||
#! pos = the position in the input string of this entry
 | 
			
		||||
TUPLE: memo-entry ans pos ;
 | 
			
		||||
 | 
			
		||||
TUPLE: left-recursion seed rule-id head next ; 
 | 
			
		||||
TUPLE: left-recursion seed rule-id head next ;
 | 
			
		||||
TUPLE: peg-head rule-id involved-set eval-set ;
 | 
			
		||||
 | 
			
		||||
: rule-id ( word -- id ) 
 | 
			
		||||
: rule-id ( word -- id )
 | 
			
		||||
    #! A rule is the parser compiled down to a word. It has
 | 
			
		||||
    #! a "peg-id" property containing the id of the original parser.
 | 
			
		||||
    "peg-id" word-prop ;
 | 
			
		||||
| 
						 | 
				
			
			@ -112,7 +116,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
 | 
			
		|||
        nip [ ast>> ] [ remaining>> ] bi input-from pos set
 | 
			
		||||
    ] [
 | 
			
		||||
        pos set fail
 | 
			
		||||
    ] if* ; 
 | 
			
		||||
    ] if* ;
 | 
			
		||||
 | 
			
		||||
: eval-rule ( rule -- ast )
 | 
			
		||||
    #! Evaluate a rule, return an ast resulting from it.
 | 
			
		||||
| 
						 | 
				
			
			@ -132,7 +136,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
 | 
			
		|||
    swap >>ans pos get >>pos drop ;
 | 
			
		||||
 | 
			
		||||
: stop-growth? ( ast m -- ? )
 | 
			
		||||
    [ failed? pos get ] dip 
 | 
			
		||||
    [ failed? pos get ] dip
 | 
			
		||||
    pos>> <= or ;
 | 
			
		||||
 | 
			
		||||
: setup-growth ( h p -- )
 | 
			
		||||
| 
						 | 
				
			
			@ -272,7 +276,7 @@ GENERIC: (compile) ( peg -- quot )
 | 
			
		|||
    #! If not, compile it to a temporary word, cache it,
 | 
			
		||||
    #! and return it. Otherwise return the existing one.
 | 
			
		||||
    #! Circular parsers are supported by getting the word
 | 
			
		||||
    #! name and storing it in the cache, before compiling, 
 | 
			
		||||
    #! name and storing it in the cache, before compiling,
 | 
			
		||||
    #! so it is picked up when re-entered.
 | 
			
		||||
    dup compiled>> [
 | 
			
		||||
        nip
 | 
			
		||||
| 
						 | 
				
			
			@ -333,9 +337,9 @@ TUPLE: token-parser symbol ;
 | 
			
		|||
: parse-token ( input string -- result )
 | 
			
		||||
    #! Parse the string, returning a parse result
 | 
			
		||||
    [ ?head-slice ] keep swap [
 | 
			
		||||
        <parse-result> f f add-error
 | 
			
		||||
        <parse-result> f f f add-error
 | 
			
		||||
    ] [
 | 
			
		||||
        [ drop pos get "token '" ] dip append "'" append 1vector add-error f
 | 
			
		||||
        [ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
M: token-parser (compile) ( peg -- quot )
 | 
			
		||||
| 
						 | 
				
			
			@ -404,7 +408,7 @@ M: seq-parser (compile) ( peg -- quot )
 | 
			
		|||
        [
 | 
			
		||||
            parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
 | 
			
		||||
            [ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
 | 
			
		||||
        ] { } make , \ 1&& , 
 | 
			
		||||
        ] { } make , \ 1&& ,
 | 
			
		||||
    ] [ ] make ;
 | 
			
		||||
 | 
			
		||||
TUPLE: choice-parser parsers ;
 | 
			
		||||
| 
						 | 
				
			
			@ -421,7 +425,7 @@ TUPLE: repeat0-parser p1 ;
 | 
			
		|||
 | 
			
		||||
: (repeat) ( quot: ( -- result ) result -- result )
 | 
			
		||||
    over call [
 | 
			
		||||
        [ remaining>> swap remaining<< ] 2keep 
 | 
			
		||||
        [ remaining>> swap remaining<< ] 2keep
 | 
			
		||||
        ast>> swap [ ast>> push ] keep
 | 
			
		||||
        (repeat)
 | 
			
		||||
    ] [
 | 
			
		||||
| 
						 | 
				
			
			@ -430,7 +434,7 @@ TUPLE: repeat0-parser p1 ;
 | 
			
		|||
 | 
			
		||||
M: repeat0-parser (compile) ( peg -- quot )
 | 
			
		||||
    p1>> compile-parser-quot '[
 | 
			
		||||
        input-slice V{ } clone <parse-result> _ swap (repeat) 
 | 
			
		||||
        input-slice V{ } clone <parse-result> _ swap (repeat)
 | 
			
		||||
    ] ;
 | 
			
		||||
 | 
			
		||||
TUPLE: repeat1-parser p1 ;
 | 
			
		||||
| 
						 | 
				
			
			@ -443,8 +447,8 @@ TUPLE: repeat1-parser p1 ;
 | 
			
		|||
    ] if* ;
 | 
			
		||||
 | 
			
		||||
M: repeat1-parser (compile) ( peg -- quot )
 | 
			
		||||
    p1>> compile-parser-quot '[ 
 | 
			
		||||
        input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check  
 | 
			
		||||
    p1>> compile-parser-quot '[
 | 
			
		||||
        input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
 | 
			
		||||
    ] ;
 | 
			
		||||
 | 
			
		||||
TUPLE: optional-parser p1 ;
 | 
			
		||||
| 
						 | 
				
			
			@ -500,16 +504,16 @@ TUPLE: sp-parser p1 ;
 | 
			
		|||
 | 
			
		||||
M: sp-parser (compile) ( peg -- quot )
 | 
			
		||||
    p1>> compile-parser-quot '[
 | 
			
		||||
        input-slice [ blank? ] trim-head-slice input-from pos set @ 
 | 
			
		||||
        input-slice [ blank? ] trim-head-slice input-from pos set @
 | 
			
		||||
    ] ;
 | 
			
		||||
 | 
			
		||||
TUPLE: delay-parser quot ;
 | 
			
		||||
 | 
			
		||||
M: delay-parser (compile) ( peg -- quot )
 | 
			
		||||
    #! For efficiency we memoize the quotation.
 | 
			
		||||
    #! This way it is run only once and the 
 | 
			
		||||
    #! This way it is run only once and the
 | 
			
		||||
    #! parser constructed once at run time.
 | 
			
		||||
    quot>> gensym [ delayed get set-at ] keep 1quotation ; 
 | 
			
		||||
    quot>> gensym [ delayed get set-at ] keep 1quotation ;
 | 
			
		||||
 | 
			
		||||
TUPLE: box-parser quot ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue