regexp: try again to fix the issue with backslashes.

the new simple rule is inside R/ syntax all backslashes should be
escaped to allow easy regexp literal tokenization:

R/ [\/]/

but in the constructor <regexp> they shouldn't be:

"[/]" <regexp>

When the regexp is prettyprinted we make sure to visually escape the
backslashes in the raw regexp.
windows-drag
John Benediktsson 2019-03-21 13:29:15 -07:00
parent ca9f74e993
commit 26abdf4a23
8 changed files with 33 additions and 23 deletions

View File

@ -7,10 +7,10 @@ unicode multiline ;
IN: globs
: not-path-separator ( -- sep )
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
os windows? R/ [^\/\\]/ R/ [^\/]/ ? ; foldable
: wild-path-separator ( -- sep )
os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable
os windows? R/ [^\/\\][\/\\]|[^\/\\]/ R/ [^\/][\/]|[^\/]/ ? ; foldable
EBNF: <glob> [=[

View File

@ -3,7 +3,8 @@
USING: accessors arrays assocs combinators
combinators.short-circuit interval-maps kernel locals
math.parser memoize multiline peg.ebnf regexp.ast regexp.classes
sequences sets splitting strings unicode unicode.data unicode.script ;
sequences sets splitting strings unicode unicode.data
unicode.script ;
IN: regexp.parser
: allowed-char? ( ch -- ? )
@ -70,13 +71,14 @@ MEMO: simple-category-table ( -- table )
: lookup-escape ( char -- ast )
{
{ CHAR: t [ CHAR: \t ] }
{ CHAR: a [ CHAR: \a ] }
{ CHAR: e [ CHAR: \e ] }
{ CHAR: f [ CHAR: \f ] }
{ CHAR: n [ CHAR: \n ] }
{ CHAR: r [ CHAR: \r ] }
{ CHAR: f [ 0xc ] }
{ CHAR: a [ 0x7 ] }
{ CHAR: e [ 0x1b ] }
{ CHAR: \\ [ CHAR: \\ ] }
{ CHAR: t [ CHAR: \t ] }
{ CHAR: v [ CHAR: \v ] }
{ CHAR: 0 [ CHAR: \0 ] }
{ CHAR: w [ c-identifier-class <primitive-class> ] }
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }

View File

@ -7,7 +7,7 @@ IN: regexp.prettyprint
M: regexp pprint*
[
[
[ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ]
[ raw>> "/" "\\/" replace "R/ " % % "/" % ]
[ options>> options>string % ] bi
] "" make
] keep present-text ;

View File

@ -49,6 +49,9 @@ IN: regexp.tests
{ t } [ "a" ".+" <regexp> matches? ] unit-test
{ t } [ "ab" ".+" <regexp> matches? ] unit-test
{ t } [ "\0" "[\\0]" <regexp> matches? ] unit-test
{ f } [ "0" "[\\0]" <regexp> matches? ] unit-test
{ t } [ " " "[\\s]" <regexp> matches? ] unit-test
{ f } [ "a" "[\\s]" <regexp> matches? ] unit-test
{ f } [ " " "[\\S]" <regexp> matches? ] unit-test
@ -335,6 +338,10 @@ unit-test
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
{ "abc" } [ "a/ \\bc" "/.*\\" <regexp> "" re-replace ] unit-test
{ "ac" } [ "a/ \\bc" R/ \/.*\\./ "" re-replace ] unit-test
{ "abc" } [ "a/ \\bc" R/ \/.*\\/ "" re-replace ] unit-test
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
{ t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test

View File

@ -200,10 +200,11 @@ PRIVATE>
: take-until ( lexer -- string )
dup skip-blank [
dupd [
[ CHAR: / -rot index-from ] keep
over [ "Unterminated regexp" throw ] unless
2dup [ 1 - ] dip nth CHAR: \\ =
[ [ [ 1 + ] dip ] when ] keep
[ [ "\\/" member? ] find-from ] keep swap [
CHAR: \ = [ [ 2 + ] dip t ] [ f ] if
] [
"Unterminated regexp" throw
] if*
] loop over [ subseq ] dip 1 +
] change-lexer-column ;

View File

@ -65,7 +65,7 @@ IN: validators
v-regexp ;
: v-url ( str -- str )
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
"URL" R/ (?:ftp|http|https):\/\/\S+/ v-regexp ;
: v-captcha ( str -- str )
dup empty? [ "must remain blank" throw ] unless ;

View File

@ -283,11 +283,11 @@ CONSTANT: sky H{
CONSTANT: re-timestamp R/ \d{6}Z/
CONSTANT: re-station R/ \w{4}/
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/
CONSTANT: re-temperature R/ [M]?\d{2}\/([M]?\d{2})?/
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/
CONSTANT: re-visibility R/ [MP]?\d+(\/\d+)?SM/
CONSTANT: re-rvr R/ R\d{2}[RLC]?\/\d{4}(V\d{4})?FT/
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
CONSTANT: re-altimeter R/ [AQ]\d{4}/
@ -519,23 +519,23 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
{ [ dup R/ 4\/\d{3}/ matches? ] [ parse-snow-depth ] }
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
{ [ dup R/ 6[\d\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
{ [ dup R/ 8\/\d{3}/ matches? ] [ parse-cloud-cover ] }
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
{ [ dup R/ \d{3}\d{2,3}\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
{ [ dup R/ [^\/]+(\/[^\/]+)+/ matches? ] [ ] }
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
{ [ dup re-weather matches? ] [ parse-weather ] }

View File

@ -13,7 +13,7 @@ IN: xkcd
: comic-image ( url -- image )
http-get nip
R/ \/\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
R/ \/\/imgs\.xkcd\.com\/comics\/[^\.]+\.(png|jpg)/
first-match >string "http:" prepend load-http-image ;
: comic-image. ( url -- )