regexp: try again to fix the issue with backslashes.
the new simple rule is inside R/ syntax all backslashes should be escaped to allow easy regexp literal tokenization: R/ [\/]/ but in the constructor <regexp> they shouldn't be: "[/]" <regexp> When the regexp is prettyprinted we make sure to visually escape the backslashes in the raw regexp.windows-drag
parent
ca9f74e993
commit
26abdf4a23
|
@ -7,10 +7,10 @@ unicode multiline ;
|
||||||
IN: globs
|
IN: globs
|
||||||
|
|
||||||
: not-path-separator ( -- sep )
|
: not-path-separator ( -- sep )
|
||||||
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
|
os windows? R/ [^\/\\]/ R/ [^\/]/ ? ; foldable
|
||||||
|
|
||||||
: wild-path-separator ( -- sep )
|
: wild-path-separator ( -- sep )
|
||||||
os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable
|
os windows? R/ [^\/\\][\/\\]|[^\/\\]/ R/ [^\/][\/]|[^\/]/ ? ; foldable
|
||||||
|
|
||||||
EBNF: <glob> [=[
|
EBNF: <glob> [=[
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,8 @@
|
||||||
USING: accessors arrays assocs combinators
|
USING: accessors arrays assocs combinators
|
||||||
combinators.short-circuit interval-maps kernel locals
|
combinators.short-circuit interval-maps kernel locals
|
||||||
math.parser memoize multiline peg.ebnf regexp.ast regexp.classes
|
math.parser memoize multiline peg.ebnf regexp.ast regexp.classes
|
||||||
sequences sets splitting strings unicode unicode.data unicode.script ;
|
sequences sets splitting strings unicode unicode.data
|
||||||
|
unicode.script ;
|
||||||
IN: regexp.parser
|
IN: regexp.parser
|
||||||
|
|
||||||
: allowed-char? ( ch -- ? )
|
: allowed-char? ( ch -- ? )
|
||||||
|
@ -70,13 +71,14 @@ MEMO: simple-category-table ( -- table )
|
||||||
|
|
||||||
: lookup-escape ( char -- ast )
|
: lookup-escape ( char -- ast )
|
||||||
{
|
{
|
||||||
{ CHAR: t [ CHAR: \t ] }
|
{ CHAR: a [ CHAR: \a ] }
|
||||||
|
{ CHAR: e [ CHAR: \e ] }
|
||||||
|
{ CHAR: f [ CHAR: \f ] }
|
||||||
{ CHAR: n [ CHAR: \n ] }
|
{ CHAR: n [ CHAR: \n ] }
|
||||||
{ CHAR: r [ CHAR: \r ] }
|
{ CHAR: r [ CHAR: \r ] }
|
||||||
{ CHAR: f [ 0xc ] }
|
{ CHAR: t [ CHAR: \t ] }
|
||||||
{ CHAR: a [ 0x7 ] }
|
{ CHAR: v [ CHAR: \v ] }
|
||||||
{ CHAR: e [ 0x1b ] }
|
{ CHAR: 0 [ CHAR: \0 ] }
|
||||||
{ CHAR: \\ [ CHAR: \\ ] }
|
|
||||||
|
|
||||||
{ CHAR: w [ c-identifier-class <primitive-class> ] }
|
{ CHAR: w [ c-identifier-class <primitive-class> ] }
|
||||||
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
|
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
|
||||||
|
|
|
@ -7,7 +7,7 @@ IN: regexp.prettyprint
|
||||||
M: regexp pprint*
|
M: regexp pprint*
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
[ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ]
|
[ raw>> "/" "\\/" replace "R/ " % % "/" % ]
|
||||||
[ options>> options>string % ] bi
|
[ options>> options>string % ] bi
|
||||||
] "" make
|
] "" make
|
||||||
] keep present-text ;
|
] keep present-text ;
|
||||||
|
|
|
@ -49,6 +49,9 @@ IN: regexp.tests
|
||||||
{ t } [ "a" ".+" <regexp> matches? ] unit-test
|
{ t } [ "a" ".+" <regexp> matches? ] unit-test
|
||||||
{ t } [ "ab" ".+" <regexp> matches? ] unit-test
|
{ t } [ "ab" ".+" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
{ t } [ "\0" "[\\0]" <regexp> matches? ] unit-test
|
||||||
|
{ f } [ "0" "[\\0]" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ " " "[\\s]" <regexp> matches? ] unit-test
|
{ t } [ " " "[\\s]" <regexp> matches? ] unit-test
|
||||||
{ f } [ "a" "[\\s]" <regexp> matches? ] unit-test
|
{ f } [ "a" "[\\s]" <regexp> matches? ] unit-test
|
||||||
{ f } [ " " "[\\S]" <regexp> matches? ] unit-test
|
{ f } [ " " "[\\S]" <regexp> matches? ] unit-test
|
||||||
|
@ -335,6 +338,10 @@ unit-test
|
||||||
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
|
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
|
||||||
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||||
|
|
||||||
|
{ "abc" } [ "a/ \\bc" "/.*\\" <regexp> "" re-replace ] unit-test
|
||||||
|
{ "ac" } [ "a/ \\bc" R/ \/.*\\./ "" re-replace ] unit-test
|
||||||
|
{ "abc" } [ "a/ \\bc" R/ \/.*\\/ "" re-replace ] unit-test
|
||||||
|
|
||||||
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
{ t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
{ t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -200,10 +200,11 @@ PRIVATE>
|
||||||
: take-until ( lexer -- string )
|
: take-until ( lexer -- string )
|
||||||
dup skip-blank [
|
dup skip-blank [
|
||||||
dupd [
|
dupd [
|
||||||
[ CHAR: / -rot index-from ] keep
|
[ [ "\\/" member? ] find-from ] keep swap [
|
||||||
over [ "Unterminated regexp" throw ] unless
|
CHAR: \ = [ [ 2 + ] dip t ] [ f ] if
|
||||||
2dup [ 1 - ] dip nth CHAR: \\ =
|
] [
|
||||||
[ [ [ 1 + ] dip ] when ] keep
|
"Unterminated regexp" throw
|
||||||
|
] if*
|
||||||
] loop over [ subseq ] dip 1 +
|
] loop over [ subseq ] dip 1 +
|
||||||
] change-lexer-column ;
|
] change-lexer-column ;
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,7 @@ IN: validators
|
||||||
v-regexp ;
|
v-regexp ;
|
||||||
|
|
||||||
: v-url ( str -- str )
|
: v-url ( str -- str )
|
||||||
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
|
"URL" R/ (?:ftp|http|https):\/\/\S+/ v-regexp ;
|
||||||
|
|
||||||
: v-captcha ( str -- str )
|
: v-captcha ( str -- str )
|
||||||
dup empty? [ "must remain blank" throw ] unless ;
|
dup empty? [ "must remain blank" throw ] unless ;
|
||||||
|
|
|
@ -283,11 +283,11 @@ CONSTANT: sky H{
|
||||||
|
|
||||||
CONSTANT: re-timestamp R/ \d{6}Z/
|
CONSTANT: re-timestamp R/ \d{6}Z/
|
||||||
CONSTANT: re-station R/ \w{4}/
|
CONSTANT: re-station R/ \w{4}/
|
||||||
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/
|
CONSTANT: re-temperature R/ [M]?\d{2}\/([M]?\d{2})?/
|
||||||
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
|
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
|
||||||
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
|
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
|
||||||
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/
|
CONSTANT: re-visibility R/ [MP]?\d+(\/\d+)?SM/
|
||||||
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/
|
CONSTANT: re-rvr R/ R\d{2}[RLC]?\/\d{4}(V\d{4})?FT/
|
||||||
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
|
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
|
||||||
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
|
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
|
||||||
CONSTANT: re-altimeter R/ [AQ]\d{4}/
|
CONSTANT: re-altimeter R/ [AQ]\d{4}/
|
||||||
|
@ -519,23 +519,23 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
|
||||||
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
|
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
|
||||||
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
|
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
|
||||||
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
|
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
|
||||||
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
|
{ [ dup R/ 4\/\d{3}/ matches? ] [ parse-snow-depth ] }
|
||||||
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
|
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
|
||||||
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
|
{ [ dup R/ 6[\d\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
|
||||||
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
|
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
|
||||||
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
|
{ [ dup R/ 8\/\d{3}/ matches? ] [ parse-cloud-cover ] }
|
||||||
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
|
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
|
||||||
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
|
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
|
||||||
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
|
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
|
||||||
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
|
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
|
||||||
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
|
{ [ dup R/ \d{3}\d{2,3}\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
|
||||||
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
|
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
|
||||||
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
|
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
|
||||||
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
|
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
|
||||||
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
|
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
|
||||||
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
|
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
|
||||||
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
|
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
|
||||||
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
|
{ [ dup R/ [^\/]+(\/[^\/]+)+/ matches? ] [ ] }
|
||||||
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
|
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
|
||||||
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
||||||
{ [ dup re-weather matches? ] [ parse-weather ] }
|
{ [ dup re-weather matches? ] [ parse-weather ] }
|
||||||
|
|
|
@ -13,7 +13,7 @@ IN: xkcd
|
||||||
|
|
||||||
: comic-image ( url -- image )
|
: comic-image ( url -- image )
|
||||||
http-get nip
|
http-get nip
|
||||||
R/ \/\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
|
R/ \/\/imgs\.xkcd\.com\/comics\/[^\.]+\.(png|jpg)/
|
||||||
first-match >string "http:" prepend load-http-image ;
|
first-match >string "http:" prepend load-http-image ;
|
||||||
|
|
||||||
: comic-image. ( url -- )
|
: comic-image. ( url -- )
|
||||||
|
|
Loading…
Reference in New Issue