regexp: try again to fix the issue with backslashes.

the new simple rule is inside R/ syntax all backslashes should be
escaped to allow easy regexp literal tokenization:

R/ [\/]/

but in the constructor <regexp> they shouldn't be:

"[/]" <regexp>

When the regexp is prettyprinted we make sure to visually escape the
backslashes in the raw regexp.
windows-drag
John Benediktsson 2019-03-21 13:29:15 -07:00
parent ca9f74e993
commit 26abdf4a23
8 changed files with 33 additions and 23 deletions

View File

@ -7,10 +7,10 @@ unicode multiline ;
IN: globs IN: globs
: not-path-separator ( -- sep ) : not-path-separator ( -- sep )
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable os windows? R/ [^\/\\]/ R/ [^\/]/ ? ; foldable
: wild-path-separator ( -- sep ) : wild-path-separator ( -- sep )
os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable os windows? R/ [^\/\\][\/\\]|[^\/\\]/ R/ [^\/][\/]|[^\/]/ ? ; foldable
EBNF: <glob> [=[ EBNF: <glob> [=[

View File

@ -3,7 +3,8 @@
USING: accessors arrays assocs combinators USING: accessors arrays assocs combinators
combinators.short-circuit interval-maps kernel locals combinators.short-circuit interval-maps kernel locals
math.parser memoize multiline peg.ebnf regexp.ast regexp.classes math.parser memoize multiline peg.ebnf regexp.ast regexp.classes
sequences sets splitting strings unicode unicode.data unicode.script ; sequences sets splitting strings unicode unicode.data
unicode.script ;
IN: regexp.parser IN: regexp.parser
: allowed-char? ( ch -- ? ) : allowed-char? ( ch -- ? )
@ -70,13 +71,14 @@ MEMO: simple-category-table ( -- table )
: lookup-escape ( char -- ast ) : lookup-escape ( char -- ast )
{ {
{ CHAR: t [ CHAR: \t ] } { CHAR: a [ CHAR: \a ] }
{ CHAR: e [ CHAR: \e ] }
{ CHAR: f [ CHAR: \f ] }
{ CHAR: n [ CHAR: \n ] } { CHAR: n [ CHAR: \n ] }
{ CHAR: r [ CHAR: \r ] } { CHAR: r [ CHAR: \r ] }
{ CHAR: f [ 0xc ] } { CHAR: t [ CHAR: \t ] }
{ CHAR: a [ 0x7 ] } { CHAR: v [ CHAR: \v ] }
{ CHAR: e [ 0x1b ] } { CHAR: 0 [ CHAR: \0 ] }
{ CHAR: \\ [ CHAR: \\ ] }
{ CHAR: w [ c-identifier-class <primitive-class> ] } { CHAR: w [ c-identifier-class <primitive-class> ] }
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] } { CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }

View File

@ -7,7 +7,7 @@ IN: regexp.prettyprint
M: regexp pprint* M: regexp pprint*
[ [
[ [
[ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ] [ raw>> "/" "\\/" replace "R/ " % % "/" % ]
[ options>> options>string % ] bi [ options>> options>string % ] bi
] "" make ] "" make
] keep present-text ; ] keep present-text ;

View File

@ -49,6 +49,9 @@ IN: regexp.tests
{ t } [ "a" ".+" <regexp> matches? ] unit-test { t } [ "a" ".+" <regexp> matches? ] unit-test
{ t } [ "ab" ".+" <regexp> matches? ] unit-test { t } [ "ab" ".+" <regexp> matches? ] unit-test
{ t } [ "\0" "[\\0]" <regexp> matches? ] unit-test
{ f } [ "0" "[\\0]" <regexp> matches? ] unit-test
{ t } [ " " "[\\s]" <regexp> matches? ] unit-test { t } [ " " "[\\s]" <regexp> matches? ] unit-test
{ f } [ "a" "[\\s]" <regexp> matches? ] unit-test { f } [ "a" "[\\s]" <regexp> matches? ] unit-test
{ f } [ " " "[\\S]" <regexp> matches? ] unit-test { f } [ " " "[\\S]" <regexp> matches? ] unit-test
@ -335,6 +338,10 @@ unit-test
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test { "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test { "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
{ "abc" } [ "a/ \\bc" "/.*\\" <regexp> "" re-replace ] unit-test
{ "ac" } [ "a/ \\bc" R/ \/.*\\./ "" re-replace ] unit-test
{ "abc" } [ "a/ \\bc" R/ \/.*\\/ "" re-replace ] unit-test
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test { "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test { "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
{ t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test { t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test

View File

@ -200,10 +200,11 @@ PRIVATE>
: take-until ( lexer -- string ) : take-until ( lexer -- string )
dup skip-blank [ dup skip-blank [
dupd [ dupd [
[ CHAR: / -rot index-from ] keep [ [ "\\/" member? ] find-from ] keep swap [
over [ "Unterminated regexp" throw ] unless CHAR: \ = [ [ 2 + ] dip t ] [ f ] if
2dup [ 1 - ] dip nth CHAR: \\ = ] [
[ [ [ 1 + ] dip ] when ] keep "Unterminated regexp" throw
] if*
] loop over [ subseq ] dip 1 + ] loop over [ subseq ] dip 1 +
] change-lexer-column ; ] change-lexer-column ;

View File

@ -65,7 +65,7 @@ IN: validators
v-regexp ; v-regexp ;
: v-url ( str -- str ) : v-url ( str -- str )
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ; "URL" R/ (?:ftp|http|https):\/\/\S+/ v-regexp ;
: v-captcha ( str -- str ) : v-captcha ( str -- str )
dup empty? [ "must remain blank" throw ] unless ; dup empty? [ "must remain blank" throw ] unless ;

View File

@ -283,11 +283,11 @@ CONSTANT: sky H{
CONSTANT: re-timestamp R/ \d{6}Z/ CONSTANT: re-timestamp R/ \d{6}Z/
CONSTANT: re-station R/ \w{4}/ CONSTANT: re-station R/ \w{4}/
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/ CONSTANT: re-temperature R/ [M]?\d{2}\/([M]?\d{2})?/
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/ CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/ CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/ CONSTANT: re-visibility R/ [MP]?\d+(\/\d+)?SM/
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/ CONSTANT: re-rvr R/ R\d{2}[RLC]?\/\d{4}(V\d{4})?FT/
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/ CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/ CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
CONSTANT: re-altimeter R/ [AQ]\d{4}/ CONSTANT: re-altimeter R/ [AQ]\d{4}/
@ -519,23 +519,23 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] } { [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] } { [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] } { [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] } { [ dup R/ 4\/\d{3}/ matches? ] [ parse-snow-depth ] }
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] } { [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] } { [ dup R/ 6[\d\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] } { [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] } { [ dup R/ 8\/\d{3}/ matches? ] [ parse-cloud-cover ] }
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] } { [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] } { [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] } { [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] } { [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] } { [ dup R/ \d{3}\d{2,3}\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] } { [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] } { [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] } { [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] } { [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] } { [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] } { [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] } { [ dup R/ [^\/]+(\/[^\/]+)+/ matches? ] [ ] }
{ [ dup R/ \d+.\d+/ matches? ] [ ] } { [ dup R/ \d+.\d+/ matches? ] [ ] }
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] } { [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
{ [ dup re-weather matches? ] [ parse-weather ] } { [ dup re-weather matches? ] [ parse-weather ] }

View File

@ -13,7 +13,7 @@ IN: xkcd
: comic-image ( url -- image ) : comic-image ( url -- image )
http-get nip http-get nip
R/ \/\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/ R/ \/\/imgs\.xkcd\.com\/comics\/[^\.]+\.(png|jpg)/
first-match >string "http:" prepend load-http-image ; first-match >string "http:" prepend load-http-image ;
: comic-image. ( url -- ) : comic-image. ( url -- )