Provide ability to plug in tokenizers to ebnf parsers
parent
7239c4d79f
commit
b6b5f12732
|
@ -11,6 +11,31 @@ IN: peg.ebnf
|
||||||
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
#! Given an EBNF word produced from EBNF: return the EBNF rule
|
||||||
"ebnf-parser" word-prop at ;
|
"ebnf-parser" word-prop at ;
|
||||||
|
|
||||||
|
TUPLE: tokenizer any one many ;
|
||||||
|
|
||||||
|
: default-tokenizer ( -- tokenizer )
|
||||||
|
T{ tokenizer f
|
||||||
|
[ [ drop t ] satisfy ]
|
||||||
|
[ token ]
|
||||||
|
[ [ = ] curry satisfy ]
|
||||||
|
} ;
|
||||||
|
|
||||||
|
: parser-tokenizer ( parser -- tokenizer )
|
||||||
|
1quotation [ [ = ] curry satisfy ] dup tokenizer boa ;
|
||||||
|
|
||||||
|
: rule-tokenizer ( name word -- tokenizer )
|
||||||
|
rule parser-tokenizer ;
|
||||||
|
|
||||||
|
: tokenizer ( -- word )
|
||||||
|
\ tokenizer get [ default-tokenizer ] unless* ;
|
||||||
|
|
||||||
|
: reset-tokenizer ( -- )
|
||||||
|
default-tokenizer \ tokenizer set-global ;
|
||||||
|
|
||||||
|
: TOKENIZER:
|
||||||
|
scan search [ "Tokenizer not found" throw ] unless*
|
||||||
|
execute \ tokenizer set-global ; parsing
|
||||||
|
|
||||||
TUPLE: ebnf-non-terminal symbol ;
|
TUPLE: ebnf-non-terminal symbol ;
|
||||||
TUPLE: ebnf-terminal symbol ;
|
TUPLE: ebnf-terminal symbol ;
|
||||||
TUPLE: ebnf-foreign word rule ;
|
TUPLE: ebnf-foreign word rule ;
|
||||||
|
@ -344,7 +369,7 @@ M: ebnf-choice (transform) ( ast -- parser )
|
||||||
options>> [ (transform) ] map choice ;
|
options>> [ (transform) ] map choice ;
|
||||||
|
|
||||||
M: ebnf-any-character (transform) ( ast -- parser )
|
M: ebnf-any-character (transform) ( ast -- parser )
|
||||||
drop any-char ;
|
drop [ tokenizer any>> call ] box ;
|
||||||
|
|
||||||
M: ebnf-range (transform) ( ast -- parser )
|
M: ebnf-range (transform) ( ast -- parser )
|
||||||
pattern>> range-pattern ;
|
pattern>> range-pattern ;
|
||||||
|
@ -435,7 +460,7 @@ M: ebnf-var (transform) ( ast -- parser )
|
||||||
parser>> (transform) ;
|
parser>> (transform) ;
|
||||||
|
|
||||||
M: ebnf-terminal (transform) ( ast -- parser )
|
M: ebnf-terminal (transform) ( ast -- parser )
|
||||||
symbol>> [ token ] keep [ = ] curry satisfy 2choice ;
|
symbol>> [ tokenizer one>> call ] curry box ;
|
||||||
|
|
||||||
M: ebnf-foreign (transform) ( ast -- parser )
|
M: ebnf-foreign (transform) ( ast -- parser )
|
||||||
dup word>> search
|
dup word>> search
|
||||||
|
@ -476,10 +501,22 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
||||||
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
parse-result-ast transform dup dup parser [ main swap at compile ] with-variable
|
||||||
[ compiled-parse ] curry [ with-scope ] curry ;
|
[ compiled-parse ] curry [ with-scope ] curry ;
|
||||||
|
|
||||||
: [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing
|
: [EBNF
|
||||||
|
scan {
|
||||||
|
{ "+" [ scan-word execute "" swap ] }
|
||||||
|
[ " " append default-tokenizer ]
|
||||||
|
} case \ tokenizer [
|
||||||
|
[ "EBNF]" parse-multiline-string ] [ drop "" ] recover append ebnf>quot nip parsed
|
||||||
|
] with-variable ; parsing
|
||||||
|
|
||||||
: EBNF:
|
: EBNF:
|
||||||
CREATE-WORD dup
|
CREATE-WORD scan {
|
||||||
";EBNF" parse-multiline-string
|
{ "+" [ scan-word execute "" swap ] }
|
||||||
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop ; parsing
|
[ " " append default-tokenizer ]
|
||||||
|
} case \ tokenizer [
|
||||||
|
dupd [ ";EBNF" parse-multiline-string ] [ drop "" ] recover append
|
||||||
|
ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop
|
||||||
|
] with-variable ; parsing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ IN: peg.javascript.tokenizer
|
||||||
|
|
||||||
USE: prettyprint
|
USE: prettyprint
|
||||||
|
|
||||||
EBNF: tokenizer
|
EBNF: javascript-tokenizer
|
||||||
Letter = [a-zA-Z]
|
Letter = [a-zA-Z]
|
||||||
Digit = [0-9]
|
Digit = [0-9]
|
||||||
Digits = Digit+
|
Digits = Digit+
|
||||||
|
|
Loading…
Reference in New Issue