regexp: one R/ syntax to rule them all, simpler this way.

locals-and-roots
John Benediktsson 2016-03-30 21:07:43 -07:00
parent 2ec0a139d1
commit cba0a96c10
8 changed files with 55 additions and 71 deletions

View File

@ -7,7 +7,7 @@ strings system unicode.case ;
IN: globs
: not-path-separator ( -- sep )
os windows? R! [^/\\]! R! [^/]! ? ; foldable
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
EBNF: <glob>

View File

@ -10,7 +10,7 @@ IN: regexp.combinators.tests
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
: conj ( -- regexp )
{ R' .*a' R' b.*' } <and> ;
{ R/ .*a/ R/ b.*/ } <and> ;
{ t } [ "bljhasflsda" conj matches? ] unit-test
{ f } [ "bsdfdfs" conj matches? ] unit-test

View File

@ -1,13 +1,13 @@
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel make prettyprint.backend
prettyprint.custom regexp regexp.parser regexp.private ;
prettyprint.custom regexp regexp.parser splitting ;
IN: regexp.prettyprint
M: regexp pprint*
[
[
[ raw>> dup find-regexp-syntax swap % swap % % ]
[ raw>> "R/ " % % "/" % ]
[ options>> options>string % ] bi
] "" make
] keep present-text ;

View File

@ -60,7 +60,7 @@ IN: regexp-tests
{ t } [ "/" "\\/" <regexp> matches? ] unit-test
{ t } [ "a" R' a'i matches? ] unit-test
{ t } [ "a" R/ a/i matches? ] unit-test
{ t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
{ t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
@ -259,11 +259,11 @@ IN: regexp-tests
! Comment inside a regular expression
{ t } [ "ac" "a(?#boo)c" <regexp> matches? ] unit-test
{ } [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval( -- ) ] unit-test
{ } [ "USING: regexp kernel ; R/ -{3}[+]{1,6}(?:!!)?\\s/ drop" eval( -- ) ] unit-test
{ } [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval( -- ) ] unit-test
{ } [ "USING: regexp kernel ; R/ (ftp|http|https):\\/\\/(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(\\/\\|\\/([\\w#!:.?+=&%@!\\-\\/]))?/ drop" eval( -- ) ] unit-test
{ } [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval( -- ) ] unit-test
{ } [ "USING: regexp kernel ; R/ \\*[^\s*][^*]*\\*/ drop" eval( -- ) ] unit-test
{ "ab" } [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
{ "abc" } [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
@ -349,7 +349,7 @@ unit-test
{ f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
! Bug in parsing word
{ t } [ "a" R' a' matches? ] unit-test
{ t } [ "a" R/ a/ matches? ] unit-test
! Testing negation
{ f } [ "a" R/ (?~a)/ matches? ] unit-test

View File

@ -197,42 +197,26 @@ PRIVATE>
<PRIVATE
! The following two should do some caching
: find-regexp-syntax ( string -- prefix suffix )
{
{ "R/ " "/" }
{ "R! " "!" }
{ "R# " "#" }
{ "R' " "'" }
{ "R( " ")" }
{ "R@ " "@" }
{ "R| " "|" }
} swap [ subseq? not nip ] curry assoc-find drop ;
: take-until ( end lexer -- string )
: take-until ( lexer -- string )
dup skip-blank [
[ index-from ] 2keep
[ swapd subseq ]
[ 2drop 1 + ] 3bi
dupd [
[ CHAR: / -rot index-from ] keep
over [ "Unterminated regexp" throw ] unless
2dup [ 1 - ] dip nth CHAR: \\ =
[ [ [ 1 + ] dip ] when ] keep
] loop over [ subseq ] dip 1 +
] change-lexer-column ;
: parse-noblank-token ( lexer -- str/f )
dup still-parsing-line? [ (parse-token) ] [ drop f ] if ;
: parsing-regexp ( accum end -- accum )
: parse-regexp ( accum -- accum )
lexer get [ take-until ] [ parse-noblank-token ] bi
<optioned-regexp> compile-next-match suffix! ;
PRIVATE>
SYNTAX: R! CHAR: ! parsing-regexp ;
SYNTAX: R# CHAR: # parsing-regexp ;
SYNTAX: R' CHAR: ' parsing-regexp ;
SYNTAX: R( CHAR: ) parsing-regexp ;
SYNTAX: R/ CHAR: / parsing-regexp ;
SYNTAX: R@ CHAR: @ parsing-regexp ;
SYNTAX: R| CHAR: | parsing-regexp ;
SYNTAX: R/ parse-regexp ;
USE: vocabs.loader

View File

@ -62,11 +62,11 @@ IN: validators
! From http://www.regular-expressions.info/email.html
320 v-max-length
"e-mail"
R' [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'i
R/ [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}/i
v-regexp ;
: v-url ( str -- str )
"URL" R' (?:ftp|http|https)://\S+' v-regexp ;
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
: v-captcha ( str -- str )
dup empty? [ "must remain blank" throw ] unless ;

View File

@ -277,16 +277,16 @@ CONSTANT: sky H{
unclip [ string>number ] [ CHAR: A = ] bi*
[ 100 /f "%.2f Hg" sprintf ] [ "%s hPa" sprintf ] if ;
CONSTANT: re-timestamp R! \d{6}Z!
CONSTANT: re-station R! \w{4}!
CONSTANT: re-temperature R! [M]?\d{2}/([M]?\d{2})?!
CONSTANT: re-wind R! (VRB|\d{3})\d{2,3}(G\d{2,3})?KT!
CONSTANT: re-wind-variable R! \d{3}V\d{3}!
CONSTANT: re-visibility R! [MP]?\d+(/\d+)?SM!
CONSTANT: re-rvr R! R\d{2}[RLC]?/\d{4}(V\d{4})?FT!
CONSTANT: re-weather R! [+-]?(VC)?(\w{2}|\w{4})!
CONSTANT: re-sky-condition R! (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)!
CONSTANT: re-altimeter R! [AQ]\d{4}!
CONSTANT: re-timestamp R/ \d{6}Z/
CONSTANT: re-station R/ \w{4}/
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
CONSTANT: re-altimeter R/ [AQ]\d{4}/
: find-one ( seq quot: ( elt -- ? ) -- seq elt/f )
dupd find drop [ tail unclip ] [ f ] if* ; inline
@ -462,7 +462,7 @@ CONSTANT: high-clouds H{
: parse-lightning ( str -- str' )
"LTG" ?head drop 2 group [ lightning at ] map " " join ;
CONSTANT: re-recent-weather R! ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+!
CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
: parse-began/ended ( str -- str' )
unclip swap
@ -512,27 +512,27 @@ CONSTANT: re-recent-weather R! ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+!
: parse-remark ( str -- str' )
{
{ [ dup glossary key? ] [ glossary at ] }
{ [ dup R! 1\d{4}! matches? ] [ parse-6hr-max-temp ] }
{ [ dup R! 2\d{4}! matches? ] [ parse-6hr-min-temp ] }
{ [ dup R! 4\d{8}! matches? ] [ parse-24hr-temp ] }
{ [ dup R! 4/\d{3}! matches? ] [ parse-snow-depth ] }
{ [ dup R! 5\d{4}! matches? ] [ parse-1hr-pressure ] }
{ [ dup R! 6[\d/]{4}! matches? ] [ parse-6hr-precipitation ] }
{ [ dup R! 7\d{4}! matches? ] [ parse-24hr-precipitation ] }
{ [ dup R! 8/\d{3}! matches? ] [ parse-cloud-cover ] }
{ [ dup R! 931\d{3}! matches? ] [ parse-6hr-snowfall ] }
{ [ dup R! 933\d{3}! matches? ] [ parse-water-equivalent-snow ] }
{ [ dup R! 98\d{3}! matches? ] [ parse-duration-of-sunshine ] }
{ [ dup R! T\d{4,8}! matches? ] [ parse-1hr-temp ] }
{ [ dup R! \d{3}\d{2,3}/\d{2,4}! matches? ] [ parse-peak-wind ] }
{ [ dup R! P\d{4}! matches? ] [ parse-1hr-precipitation ] }
{ [ dup R! SLP\d{3}! matches? ] [ parse-sea-level-pressure ] }
{ [ dup R! LTG\w+! matches? ] [ parse-lightning ] }
{ [ dup R! PROB\d+! matches? ] [ parse-probability ] }
{ [ dup R! \d{3}V\d{3}! matches? ] [ parse-varying ] }
{ [ dup R! [^-]+(-[^-]+)+! matches? ] [ parse-from-to ] }
{ [ dup R! [^/]+(/[^/]+)+! matches? ] [ ] }
{ [ dup R! \d+.\d+! matches? ] [ ] }
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
{ [ dup re-weather matches? ] [ parse-weather ] }
{ [ dup re-sky-condition matches? ] [ parse-sky-condition ] }
@ -596,12 +596,12 @@ M: string metar.
[ parse-altitude ] [ parse-wind ] bi* prepend
"wind shear " prepend ;
CONSTANT: re-from-timestamp R! FM\d{6}!
CONSTANT: re-from-timestamp R/ FM\d{6}/
: parse-from-timestamp ( str -- str' )
"FM" ?head drop parse-timestamp ;
CONSTANT: re-valid-timestamp R! \d{4}\/\d{4}!
CONSTANT: re-valid-timestamp R/ \d{4}\/\d{4}/
: parse-valid-timestamp ( str -- str' )
"/" split1 [ "00" append parse-timestamp ] bi@ " to " glue ;

View File

@ -13,7 +13,7 @@ IN: xkcd
: comic-image ( url -- image )
http-get nip
R@ http://imgs\.xkcd\.com/comics/[^\.]+\.(png|jpg)@
R/ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
first-match >string load-http-image ;
: comic-image. ( url -- )