From b6b5f12732914b36a0939f1f0221eaf29f9867d0 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 20 Jun 2008 22:29:53 +1200 Subject: [PATCH] Provide ability to plug in tokenizers to ebnf parsers --- extra/peg/ebnf/ebnf.factor | 49 ++++++++++++++++--- .../peg/javascript/tokenizer/tokenizer.factor | 2 +- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 8f36218b61..921ba7be67 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -11,6 +11,31 @@ IN: peg.ebnf #! Given an EBNF word produced from EBNF: return the EBNF rule "ebnf-parser" word-prop at ; +TUPLE: tokenizer any one many ; + +: default-tokenizer ( -- tokenizer ) + T{ tokenizer f + [ [ drop t ] satisfy ] + [ token ] + [ [ = ] curry satisfy ] + } ; + +: parser-tokenizer ( parser -- tokenizer ) + 1quotation [ [ = ] curry satisfy ] dup tokenizer boa ; + +: rule-tokenizer ( name word -- tokenizer ) + rule parser-tokenizer ; + +: tokenizer ( -- word ) + \ tokenizer get [ default-tokenizer ] unless* ; + +: reset-tokenizer ( -- ) + default-tokenizer \ tokenizer set-global ; + +: TOKENIZER: + scan search [ "Tokenizer not found" throw ] unless* + execute \ tokenizer set-global ; parsing + TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-foreign word rule ; @@ -344,7 +369,7 @@ M: ebnf-choice (transform) ( ast -- parser ) options>> [ (transform) ] map choice ; M: ebnf-any-character (transform) ( ast -- parser ) - drop any-char ; + drop [ tokenizer any>> call ] box ; M: ebnf-range (transform) ( ast -- parser ) pattern>> range-pattern ; @@ -435,7 +460,7 @@ M: ebnf-var (transform) ( ast -- parser ) parser>> (transform) ; M: ebnf-terminal (transform) ( ast -- parser ) - symbol>> [ token ] keep [ = ] curry satisfy 2choice ; + symbol>> [ tokenizer one>> call ] curry box ; M: ebnf-foreign (transform) ( ast -- parser ) dup word>> search @@ -476,10 +501,22 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) parse-result-ast transform dup dup parser [ main swap at compile ] with-variable [ compiled-parse ] curry [ with-scope ] curry ; -: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing +: [EBNF + scan { + { "+" [ scan-word execute "" swap ] } + [ " " append default-tokenizer ] + } case \ tokenizer [ + [ "EBNF]" parse-multiline-string ] [ drop "" ] recover append ebnf>quot nip parsed + ] with-variable ; parsing : EBNF: - CREATE-WORD dup - ";EBNF" parse-multiline-string - ebnf>quot swapd 1 1 define-declared "ebnf-parser" set-word-prop ; parsing + CREATE-WORD scan { + { "+" [ scan-word execute "" swap ] } + [ " " append default-tokenizer ] + } case \ tokenizer [ + dupd [ ";EBNF" parse-multiline-string ] [ drop "" ] recover append + ebnf>quot swapd 1 1 define-declared "ebnf-parser" set-word-prop + ] with-variable ; parsing + + diff --git a/extra/peg/javascript/tokenizer/tokenizer.factor b/extra/peg/javascript/tokenizer/tokenizer.factor index a1cff8a3db..5bf6a639fa 100644 --- a/extra/peg/javascript/tokenizer/tokenizer.factor +++ b/extra/peg/javascript/tokenizer/tokenizer.factor @@ -8,7 +8,7 @@ IN: peg.javascript.tokenizer USE: prettyprint -EBNF: tokenizer +EBNF: javascript-tokenizer Letter = [a-zA-Z] Digit = [0-9] Digits = Digit+