peg.javascript.tokenizer: support hex and unicode escapes.

locals-and-roots
John Benediktsson 2016-06-12 06:41:59 -07:00
parent 29a2a25954
commit e7fa92baac
2 changed files with 18 additions and 9 deletions

View File

@ -31,3 +31,7 @@ IN: peg.javascript.tokenizer.tests
{
V{ T{ ast-string { value "\b\f\n\r\t\v'\"\\" } } }
} [ "\"\\b\\f\\n\\r\\t\\v\\'\\\"\\\\\"" tokenize-javascript ] unit-test
{
V{ T{ ast-string { value "abc" } } }
} [ "\"\\x61\\u0062\\u{63}\"" tokenize-javascript ] unit-test

View File

@ -48,15 +48,20 @@ Name = !(Keyword) iName => [[ ast-name boa ]]
Number = Digits:ws '.' Digits:fs => [[ ws "." fs 3array "" concat-as string>number ast-number boa ]]
| Digits => [[ >string string>number ast-number boa ]]
EscapeChar = "\\b" => [[ CHAR: \b ]]
| "\\f" => [[ CHAR: \f ]]
| "\\n" => [[ CHAR: \n ]]
| "\\r" => [[ CHAR: \r ]]
| "\\t" => [[ CHAR: \t ]]
| "\\v" => [[ CHAR: \v ]]
| "\\'" => [[ CHAR: ' ]]
| "\\\"" => [[ CHAR: " ]]
| "\\\\" => [[ CHAR: \\ ]]
SingleEscapeChar = "b" => [[ CHAR: \b ]]
| "f" => [[ CHAR: \f ]]
| "n" => [[ CHAR: \n ]]
| "r" => [[ CHAR: \r ]]
| "t" => [[ CHAR: \t ]]
| "v" => [[ CHAR: \v ]]
| "'" => [[ CHAR: ' ]]
| "\"" => [[ CHAR: " ]]
| "\\" => [[ CHAR: \\ ]]
HexDigit = [0-9a-fA-F]
HexEscapeChar = "x" (HexDigit HexDigit):d => [[ d hex> ]]
UnicodeEscapeChar = "u" (HexDigit HexDigit HexDigit HexDigit):d => [[ d hex> ]]
| "u{" HexDigit+:d "}" => [[ d hex> ]]
EscapeChar = "\\" (SingleEscapeChar | HexEscapeChar | UnicodeEscapeChar):c => [[ c ]]
StringChars1 = (EscapeChar | !('"""') .)* => [[ >string ]]
StringChars2 = (EscapeChar | !('"') .)* => [[ >string ]]
StringChars3 = (EscapeChar | !("'") .)* => [[ >string ]]