factor/extra/peg/javascript/tokenizer/tokenizer.factor

80 lines
3.5 KiB
Factor
Raw Normal View History

2008-06-18 08:42:11 -04:00
! Copyright (C) 2008 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings arrays math.parser peg peg.ebnf peg.javascript.ast ;
IN: peg.javascript.tokenizer
! Grammar for JavaScript. Based on OMeta-JS example from:
! http://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler
2008-06-18 08:42:11 -04:00
USE: prettyprint
EBNF: tokenize-javascript
2008-06-18 08:42:11 -04:00
Letter = [a-zA-Z]
Digit = [0-9]
Digits = Digit+
SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
MultiLineComment = "/*" (!("*/") .)* "*/" => [[ ignore ]]
2015-07-16 11:44:59 -04:00
Space = [ \t\r\n] | SingleLineComment | MultiLineComment
2008-06-18 08:42:11 -04:00
Spaces = Space* => [[ ignore ]]
NameFirst = Letter | "$" => [[ CHAR: $ ]] | "_" => [[ CHAR: _ ]]
2008-06-18 08:42:11 -04:00
NameRest = NameFirst | Digit
iName = NameFirst NameRest* => [[ first2 swap prefix >string ]]
Keyword = ("break"
| "case"
| "catch"
| "continue"
| "default"
| "delete"
| "do"
| "else"
| "finally"
| "for"
| "function"
| "if"
| "in"
| "instanceof"
| "new"
| "return"
| "switch"
| "this"
| "throw"
| "try"
| "typeof"
| "var"
| "void"
| "while"
| "with") !(NameRest)
Name = !(Keyword) iName => [[ ast-name boa ]]
2011-10-16 16:01:58 -04:00
Number = Digits:ws '.' Digits:fs => [[ ws "." fs 3array "" concat-as string>number ast-number boa ]]
2008-06-18 08:42:11 -04:00
| Digits => [[ >string string>number ast-number boa ]]
2015-07-16 11:44:59 -04:00
EscapeChar = "\\n" => [[ 10 ]]
2008-06-18 08:42:11 -04:00
| "\\r" => [[ 13 ]]
| "\\t" => [[ 9 ]]
StringChars1 = (EscapeChar | !('"""') .)* => [[ >string ]]
StringChars2 = (EscapeChar | !('"') .)* => [[ >string ]]
StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]]
Str = '"""' StringChars1:cs '"""' => [[ cs ast-string boa ]]
| '"' StringChars2:cs '"' => [[ cs ast-string boa ]]
| "'" StringChars3:cs "'" => [[ cs ast-string boa ]]
2008-06-29 23:20:12 -04:00
RegExpFlags = NameRest* => [[ >string ]]
2015-07-16 11:44:59 -04:00
NonTerminator = !([\n\r]) .
2008-06-29 23:20:12 -04:00
BackslashSequence = "\\" NonTerminator => [[ second ]]
2015-07-16 11:44:59 -04:00
RegExpFirstChar = !([*\\/]) NonTerminator
2008-06-29 22:32:20 -04:00
| BackslashSequence
2015-07-16 11:44:59 -04:00
RegExpChar = !([\\/]) NonTerminator
2008-06-29 22:32:20 -04:00
| BackslashSequence
RegExpChars = RegExpChar*
2008-06-29 23:20:12 -04:00
RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]]
RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]]
2008-06-30 03:05:18 -04:00
Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
| "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">="
| ">>>=" | ">>>" | ">>=" | ">>" | ">" | "<=" | "<<=" | "<<"
| "<" | "++" | "+=" | "+" | "--" | "-=" | "-" | "*="
| "*" | "/=" | "/" | "%=" | "%" | "&&=" | "&&" | "||="
| "||" | "." | "!" | "&=" | "&" | "|=" | "|" | "^="
| "^"
2008-06-18 08:42:11 -04:00
Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special )
2015-07-16 11:44:59 -04:00
Toks = Tok* Spaces
2008-06-18 08:42:11 -04:00
;EBNF