diff --git a/basis/peg/ebnf/ebnf-tests.factor b/basis/peg/ebnf/ebnf-tests.factor index a6d3cf0b21..cc83a55c7e 100644 --- a/basis/peg/ebnf/ebnf-tests.factor +++ b/basis/peg/ebnf/ebnf-tests.factor @@ -3,7 +3,7 @@ ! USING: kernel tools.test peg peg.ebnf words math math.parser sequences accessors peg.parsers parser namespaces arrays - strings eval ; + strings eval unicode.data multiline ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ @@ -520,3 +520,13 @@ Tok = Spaces (Number | Special ) { "\\" } [ "\\" [EBNF foo="\\" EBNF] ] unit-test + +[ "USE: peg.ebnf [EBNF EBNF]" eval ] must-fail + +[ <" USE: peg.ebnf [EBNF + lol = a + lol = b + EBNF] "> eval +] [ + error>> [ redefined-rule? ] [ name>> "lol" = ] bi and +] must-fail-with diff --git a/basis/peg/ebnf/ebnf.factor b/basis/peg/ebnf/ebnf.factor index 9f730831e7..b50ba685b8 100644 --- a/basis/peg/ebnf/ebnf.factor +++ b/basis/peg/ebnf/ebnf.factor @@ -5,13 +5,18 @@ sequences quotations vectors namespaces make math assocs continuations peg peg.parsers unicode.categories multiline splitting accessors effects sequences.deep peg.search combinators.short-circuit lexer io.streams.string stack-checker -io combinators parser ; +io combinators parser summary ; IN: peg.ebnf : rule ( name word -- parser ) #! Given an EBNF word produced from EBNF: return the EBNF rule "ebnf-parser" word-prop at ; +ERROR: no-rule rule parser ; + +: lookup-rule ( rule parser -- rule' ) + 2dup rule [ 2nip ] [ no-rule ] if* ; + TUPLE: tokenizer any one many ; : default-tokenizer ( -- tokenizer ) @@ -34,8 +39,13 @@ TUPLE: tokenizer any one many ; : reset-tokenizer ( -- ) default-tokenizer \ tokenizer set-global ; +ERROR: no-tokenizer name ; + +M: no-tokenizer summary + drop "Tokenizer not found" ; + SYNTAX: TOKENIZER: - scan search [ "Tokenizer not found" throw ] unless* + scan dup search [ nip ] [ no-tokenizer ] if* execute( -- tokenizer ) \ tokenizer set-global ; TUPLE: ebnf-non-terminal symbol ; @@ -258,7 +268,7 @@ DEFER: 'choice' "]]" token ensure-not , "]?" token ensure-not , [ drop t ] satisfy , - ] seq* [ first ] action repeat0 [ >string ] action ; + ] seq* repeat0 [ concat >string ] action ; : 'ensure-not' ( -- parser ) #! Parses the '!' syntax to ensure that @@ -367,15 +377,16 @@ M: ebnf-tokenizer (transform) ( ast -- parser ) (transform) dup parser-tokenizer \ tokenizer set-global ] if ; + +ERROR: redefined-rule name ; + +M: redefined-rule summary + name>> "Rule '" "' defined more than once" surround ; M: ebnf-rule (transform) ( ast -- parser ) dup elements>> (transform) [ - swap symbol>> dup get parser? [ - "Rule '" over append "' defined more than once" append throw - ] [ - set - ] if + swap symbol>> dup get parser? [ redefined-rule ] [ set ] if ] keep ; M: ebnf-sequence (transform) ( ast -- parser ) @@ -466,14 +477,18 @@ ERROR: bad-effect quot effect ; { [ dup (( -- b )) effect<= ] [ drop [ drop ] prepose ] } [ bad-effect ] } cond ; + +: ebnf-transform ( ast -- parser quot ) + [ parser>> (transform) ] + [ code>> insert-escapes ] + [ parser>> ] tri build-locals + [ string-lines parse-lines ] call( string -- quot ) ; M: ebnf-action (transform) ( ast -- parser ) - [ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals - [ string-lines parse-lines ] call( string -- quot ) check-action-effect action ; + ebnf-transform check-action-effect action ; M: ebnf-semantic (transform) ( ast -- parser ) - [ parser>> (transform) ] [ code>> insert-escapes ] [ parser>> ] tri build-locals - [ string-lines parse-lines ] call( string -- quot ) semantic ; + ebnf-transform semantic ; M: ebnf-var (transform) ( ast -- parser ) parser>> (transform) ; @@ -481,19 +496,20 @@ M: ebnf-var (transform) ( ast -- parser ) M: ebnf-terminal (transform) ( ast -- parser ) symbol>> tokenizer one>> call( symbol -- parser ) ; +ERROR: ebnf-foreign-not-found name ; + +M: ebnf-foreign-not-found summary + name>> "Foreign word '" "' not found" surround ; + M: ebnf-foreign (transform) ( ast -- parser ) - dup word>> search - [ "Foreign word '" swap word>> append "' not found" append throw ] unless* + dup word>> search [ word>> ebnf-foreign-not-found ] unless* swap rule>> [ main ] unless* over rule [ nip ] [ execute( -- parser ) ] if* ; -: parser-not-found ( name -- * ) - [ - "Parser '" % % "' not found." % - ] "" make throw ; +ERROR: parser-not-found name ; M: ebnf-non-terminal (transform) ( ast -- parser ) symbol>> [ @@ -504,16 +520,16 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) 'ebnf' parse transform ; : check-parse-result ( result -- result ) - dup [ - dup remaining>> [ blank? ] trim empty? [ + [ + dup remaining>> [ blank? ] trim [ [ "Unable to fully parse EBNF. Left to parse was: " % remaining>> % ] "" make throw - ] unless + ] unless-empty ] [ "Could not parse EBNF" throw - ] if ; + ] if* ; : parse-ebnf ( string -- hashtable ) 'ebnf' (parse) check-parse-result ast>> transform ; @@ -522,14 +538,18 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) parse-ebnf dup dup parser [ main swap at compile ] with-variable [ compiled-parse ] curry [ with-scope ast>> ] curry ; -SYNTAX: " reset-tokenizer parse-multiline-string parse-ebnf main swap at +SYNTAX: " + reset-tokenizer parse-multiline-string parse-ebnf main swap at parsed reset-tokenizer ; -SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip +SYNTAX: [EBNF + "EBNF]" + reset-tokenizer parse-multiline-string ebnf>quot nip parsed \ call parsed reset-tokenizer ; SYNTAX: EBNF: reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string - ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop + ebnf>quot swapd + (( input -- ast )) define-declared "ebnf-parser" set-word-prop reset-tokenizer ; -