Fix terminal parser with non-default tokenizers

db4
Chris Double 2008-06-21 01:49:07 +12:00
parent 5433553571
commit 6b83ab9d90
5 changed files with 19 additions and 13 deletions

View File

@ -511,4 +511,8 @@ Tok = Spaces (Number | Special )
tokenizer=spaces (number | operator) tokenizer=spaces (number | operator)
main= . . . main= . . .
EBNF] call ast>> EBNF] call ast>>
] unit-test
{ "++" } [
"++--" [EBNF tokenizer=("++" | "--") main="++" EBNF] call ast>>
] unit-test ] unit-test

View File

@ -15,13 +15,14 @@ TUPLE: tokenizer any one many ;
: default-tokenizer ( -- tokenizer ) : default-tokenizer ( -- tokenizer )
T{ tokenizer f T{ tokenizer f
[ [ drop t ] satisfy ] [ any-char ]
[ token ] [ token ]
[ [ = ] curry satisfy ] [ [ = ] curry any-char swap semantic ]
} ; } ;
: parser-tokenizer ( parser -- tokenizer ) : parser-tokenizer ( parser -- tokenizer )
1quotation [ [ = ] curry satisfy ] dup tokenizer boa ; [ 1quotation ] keep
[ swap [ = ] curry semantic ] curry dup tokenizer boa ;
: rule-tokenizer ( name word -- tokenizer ) : rule-tokenizer ( name word -- tokenizer )
rule parser-tokenizer ; rule parser-tokenizer ;

View File

@ -5,7 +5,7 @@ IN: peg.javascript
: parse-javascript ( string -- ast ) : parse-javascript ( string -- ast )
tokenize-javascript [ tokenize-javascript [
ast>> parse-javascript [ ast>> javascript [
ast>> ast>>
] [ ] [
"Unable to parse JavaScript" throw "Unable to parse JavaScript" throw

View File

@ -1,11 +1,11 @@
! Copyright (C) 2008 Chris Double. ! Copyright (C) 2008 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
! !
USING: kernel tools.test peg peg.javascript.ast peg.javascript.tokenizer USING: kernel tools.test peg peg.javascript.ast peg.javascript.parser
peg.javascript.parser accessors multiline sequences math ; accessors multiline sequences math ;
IN: peg.javascript.parser.tests IN: peg.javascript.parser.tests
\ parse-javascript must-infer \ javascript must-infer
{ {
T{ T{
@ -23,14 +23,14 @@ IN: peg.javascript.parser.tests
} }
} }
} [ } [
"123; 'hello'; foo(x);" tokenize-javascript ast>> parse-javascript ast>> "123; 'hello'; foo(x);" javascript ast>>
] unit-test ] unit-test
{ t } [ { t } [
<" <"
var x=5 var x=5
var y=10 var y=10
"> tokenize-javascript ast>> parse-javascript remaining>> length zero? "> javascript remaining>> length zero?
] unit-test ] unit-test
@ -41,7 +41,7 @@ function foldl(f, initial, seq) {
initial = f(initial, seq[i]); initial = f(initial, seq[i]);
return initial; return initial;
} }
"> tokenize-javascript ast>> parse-javascript remaining>> length zero? "> javascript remaining>> length zero?
] unit-test ] unit-test
{ t } [ { t } [
@ -52,6 +52,6 @@ ParseState.prototype.from = function(index) {
r.length = this.length - index; r.length = this.length - index;
return r; return r;
} }
"> tokenize-javascript ast>> parse-javascript remaining>> length zero? "> javascript remaining>> length zero?
] unit-test ] unit-test

View File

@ -1,12 +1,13 @@
! Copyright (C) 2008 Chris Double. ! Copyright (C) 2008 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors peg peg.ebnf peg.javascript.ast ; USING: kernel accessors peg peg.ebnf peg.javascript.ast peg.javascript.tokenizer ;
IN: peg.javascript.parser IN: peg.javascript.parser
#! Grammar for JavaScript. Based on OMeta-JS example from: #! Grammar for JavaScript. Based on OMeta-JS example from:
#! http://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler #! http://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler
EBNF: parse-javascript EBNF: javascript
tokenizer = <foreign tokenize-javascript Tok>
End = !(.) End = !(.)
Space = " " | "\t" | "\n" Space = " " | "\t" | "\n"
Spaces = Space* => [[ ignore ]] Spaces = Space* => [[ ignore ]]