regexp: one R/ syntax to rule them all, simpler this way.
parent
2ec0a139d1
commit
cba0a96c10
|
@ -7,7 +7,7 @@ strings system unicode.case ;
|
|||
IN: globs
|
||||
|
||||
: not-path-separator ( -- sep )
|
||||
os windows? R! [^/\\]! R! [^/]! ? ; foldable
|
||||
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
|
||||
|
||||
EBNF: <glob>
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ IN: regexp.combinators.tests
|
|||
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
||||
|
||||
: conj ( -- regexp )
|
||||
{ R' .*a' R' b.*' } <and> ;
|
||||
{ R/ .*a/ R/ b.*/ } <and> ;
|
||||
|
||||
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
||||
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel make prettyprint.backend
|
||||
prettyprint.custom regexp regexp.parser regexp.private ;
|
||||
prettyprint.custom regexp regexp.parser splitting ;
|
||||
IN: regexp.prettyprint
|
||||
|
||||
M: regexp pprint*
|
||||
[
|
||||
[
|
||||
[ raw>> dup find-regexp-syntax swap % swap % % ]
|
||||
[ raw>> "R/ " % % "/" % ]
|
||||
[ options>> options>string % ] bi
|
||||
] "" make
|
||||
] keep present-text ;
|
||||
|
|
|
@ -60,7 +60,7 @@ IN: regexp-tests
|
|||
|
||||
{ t } [ "/" "\\/" <regexp> matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R' a'i matches? ] unit-test
|
||||
{ t } [ "a" R/ a/i matches? ] unit-test
|
||||
|
||||
{ t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
{ t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
|
@ -259,11 +259,11 @@ IN: regexp-tests
|
|||
! Comment inside a regular expression
|
||||
{ t } [ "ac" "a(?#boo)c" <regexp> matches? ] unit-test
|
||||
|
||||
{ } [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval( -- ) ] unit-test
|
||||
{ } [ "USING: regexp kernel ; R/ -{3}[+]{1,6}(?:!!)?\\s/ drop" eval( -- ) ] unit-test
|
||||
|
||||
{ } [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval( -- ) ] unit-test
|
||||
{ } [ "USING: regexp kernel ; R/ (ftp|http|https):\\/\\/(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(\\/\\|\\/([\\w#!:.?+=&%@!\\-\\/]))?/ drop" eval( -- ) ] unit-test
|
||||
|
||||
{ } [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval( -- ) ] unit-test
|
||||
{ } [ "USING: regexp kernel ; R/ \\*[^\s*][^*]*\\*/ drop" eval( -- ) ] unit-test
|
||||
|
||||
{ "ab" } [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||
{ "abc" } [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||
|
@ -349,7 +349,7 @@ unit-test
|
|||
{ f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
{ t } [ "a" R' a' matches? ] unit-test
|
||||
{ t } [ "a" R/ a/ matches? ] unit-test
|
||||
|
||||
! Testing negation
|
||||
{ f } [ "a" R/ (?~a)/ matches? ] unit-test
|
||||
|
|
|
@ -197,42 +197,26 @@ PRIVATE>
|
|||
|
||||
<PRIVATE
|
||||
|
||||
! The following two should do some caching
|
||||
|
||||
: find-regexp-syntax ( string -- prefix suffix )
|
||||
{
|
||||
{ "R/ " "/" }
|
||||
{ "R! " "!" }
|
||||
{ "R# " "#" }
|
||||
{ "R' " "'" }
|
||||
{ "R( " ")" }
|
||||
{ "R@ " "@" }
|
||||
{ "R| " "|" }
|
||||
} swap [ subseq? not nip ] curry assoc-find drop ;
|
||||
|
||||
: take-until ( end lexer -- string )
|
||||
: take-until ( lexer -- string )
|
||||
dup skip-blank [
|
||||
[ index-from ] 2keep
|
||||
[ swapd subseq ]
|
||||
[ 2drop 1 + ] 3bi
|
||||
dupd [
|
||||
[ CHAR: / -rot index-from ] keep
|
||||
over [ "Unterminated regexp" throw ] unless
|
||||
2dup [ 1 - ] dip nth CHAR: \\ =
|
||||
[ [ [ 1 + ] dip ] when ] keep
|
||||
] loop over [ subseq ] dip 1 +
|
||||
] change-lexer-column ;
|
||||
|
||||
: parse-noblank-token ( lexer -- str/f )
|
||||
dup still-parsing-line? [ (parse-token) ] [ drop f ] if ;
|
||||
|
||||
: parsing-regexp ( accum end -- accum )
|
||||
: parse-regexp ( accum -- accum )
|
||||
lexer get [ take-until ] [ parse-noblank-token ] bi
|
||||
<optioned-regexp> compile-next-match suffix! ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
SYNTAX: R! CHAR: ! parsing-regexp ;
|
||||
SYNTAX: R# CHAR: # parsing-regexp ;
|
||||
SYNTAX: R' CHAR: ' parsing-regexp ;
|
||||
SYNTAX: R( CHAR: ) parsing-regexp ;
|
||||
SYNTAX: R/ CHAR: / parsing-regexp ;
|
||||
SYNTAX: R@ CHAR: @ parsing-regexp ;
|
||||
SYNTAX: R| CHAR: | parsing-regexp ;
|
||||
SYNTAX: R/ parse-regexp ;
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
|
|
|
@ -62,11 +62,11 @@ IN: validators
|
|||
! From http://www.regular-expressions.info/email.html
|
||||
320 v-max-length
|
||||
"e-mail"
|
||||
R' [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'i
|
||||
R/ [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}/i
|
||||
v-regexp ;
|
||||
|
||||
: v-url ( str -- str )
|
||||
"URL" R' (?:ftp|http|https)://\S+' v-regexp ;
|
||||
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
|
||||
|
||||
: v-captcha ( str -- str )
|
||||
dup empty? [ "must remain blank" throw ] unless ;
|
||||
|
|
|
@ -277,16 +277,16 @@ CONSTANT: sky H{
|
|||
unclip [ string>number ] [ CHAR: A = ] bi*
|
||||
[ 100 /f "%.2f Hg" sprintf ] [ "%s hPa" sprintf ] if ;
|
||||
|
||||
CONSTANT: re-timestamp R! \d{6}Z!
|
||||
CONSTANT: re-station R! \w{4}!
|
||||
CONSTANT: re-temperature R! [M]?\d{2}/([M]?\d{2})?!
|
||||
CONSTANT: re-wind R! (VRB|\d{3})\d{2,3}(G\d{2,3})?KT!
|
||||
CONSTANT: re-wind-variable R! \d{3}V\d{3}!
|
||||
CONSTANT: re-visibility R! [MP]?\d+(/\d+)?SM!
|
||||
CONSTANT: re-rvr R! R\d{2}[RLC]?/\d{4}(V\d{4})?FT!
|
||||
CONSTANT: re-weather R! [+-]?(VC)?(\w{2}|\w{4})!
|
||||
CONSTANT: re-sky-condition R! (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)!
|
||||
CONSTANT: re-altimeter R! [AQ]\d{4}!
|
||||
CONSTANT: re-timestamp R/ \d{6}Z/
|
||||
CONSTANT: re-station R/ \w{4}/
|
||||
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/
|
||||
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
|
||||
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
|
||||
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/
|
||||
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/
|
||||
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
|
||||
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
|
||||
CONSTANT: re-altimeter R/ [AQ]\d{4}/
|
||||
|
||||
: find-one ( seq quot: ( elt -- ? ) -- seq elt/f )
|
||||
dupd find drop [ tail unclip ] [ f ] if* ; inline
|
||||
|
@ -462,7 +462,7 @@ CONSTANT: high-clouds H{
|
|||
: parse-lightning ( str -- str' )
|
||||
"LTG" ?head drop 2 group [ lightning at ] map " " join ;
|
||||
|
||||
CONSTANT: re-recent-weather R! ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+!
|
||||
CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
|
||||
|
||||
: parse-began/ended ( str -- str' )
|
||||
unclip swap
|
||||
|
@ -512,27 +512,27 @@ CONSTANT: re-recent-weather R! ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+!
|
|||
: parse-remark ( str -- str' )
|
||||
{
|
||||
{ [ dup glossary key? ] [ glossary at ] }
|
||||
{ [ dup R! 1\d{4}! matches? ] [ parse-6hr-max-temp ] }
|
||||
{ [ dup R! 2\d{4}! matches? ] [ parse-6hr-min-temp ] }
|
||||
{ [ dup R! 4\d{8}! matches? ] [ parse-24hr-temp ] }
|
||||
{ [ dup R! 4/\d{3}! matches? ] [ parse-snow-depth ] }
|
||||
{ [ dup R! 5\d{4}! matches? ] [ parse-1hr-pressure ] }
|
||||
{ [ dup R! 6[\d/]{4}! matches? ] [ parse-6hr-precipitation ] }
|
||||
{ [ dup R! 7\d{4}! matches? ] [ parse-24hr-precipitation ] }
|
||||
{ [ dup R! 8/\d{3}! matches? ] [ parse-cloud-cover ] }
|
||||
{ [ dup R! 931\d{3}! matches? ] [ parse-6hr-snowfall ] }
|
||||
{ [ dup R! 933\d{3}! matches? ] [ parse-water-equivalent-snow ] }
|
||||
{ [ dup R! 98\d{3}! matches? ] [ parse-duration-of-sunshine ] }
|
||||
{ [ dup R! T\d{4,8}! matches? ] [ parse-1hr-temp ] }
|
||||
{ [ dup R! \d{3}\d{2,3}/\d{2,4}! matches? ] [ parse-peak-wind ] }
|
||||
{ [ dup R! P\d{4}! matches? ] [ parse-1hr-precipitation ] }
|
||||
{ [ dup R! SLP\d{3}! matches? ] [ parse-sea-level-pressure ] }
|
||||
{ [ dup R! LTG\w+! matches? ] [ parse-lightning ] }
|
||||
{ [ dup R! PROB\d+! matches? ] [ parse-probability ] }
|
||||
{ [ dup R! \d{3}V\d{3}! matches? ] [ parse-varying ] }
|
||||
{ [ dup R! [^-]+(-[^-]+)+! matches? ] [ parse-from-to ] }
|
||||
{ [ dup R! [^/]+(/[^/]+)+! matches? ] [ ] }
|
||||
{ [ dup R! \d+.\d+! matches? ] [ ] }
|
||||
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
|
||||
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
|
||||
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
|
||||
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
|
||||
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
|
||||
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
|
||||
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
|
||||
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
|
||||
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
|
||||
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
|
||||
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
|
||||
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
|
||||
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
|
||||
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
|
||||
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
|
||||
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
|
||||
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
|
||||
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
|
||||
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
|
||||
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
|
||||
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
|
||||
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
||||
{ [ dup re-weather matches? ] [ parse-weather ] }
|
||||
{ [ dup re-sky-condition matches? ] [ parse-sky-condition ] }
|
||||
|
@ -596,12 +596,12 @@ M: string metar.
|
|||
[ parse-altitude ] [ parse-wind ] bi* prepend
|
||||
"wind shear " prepend ;
|
||||
|
||||
CONSTANT: re-from-timestamp R! FM\d{6}!
|
||||
CONSTANT: re-from-timestamp R/ FM\d{6}/
|
||||
|
||||
: parse-from-timestamp ( str -- str' )
|
||||
"FM" ?head drop parse-timestamp ;
|
||||
|
||||
CONSTANT: re-valid-timestamp R! \d{4}\/\d{4}!
|
||||
CONSTANT: re-valid-timestamp R/ \d{4}\/\d{4}/
|
||||
|
||||
: parse-valid-timestamp ( str -- str' )
|
||||
"/" split1 [ "00" append parse-timestamp ] bi@ " to " glue ;
|
||||
|
|
|
@ -13,7 +13,7 @@ IN: xkcd
|
|||
|
||||
: comic-image ( url -- image )
|
||||
http-get nip
|
||||
R@ http://imgs\.xkcd\.com/comics/[^\.]+\.(png|jpg)@
|
||||
R/ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
|
||||
first-match >string load-http-image ;
|
||||
|
||||
: comic-image. ( url -- )
|
||||
|
|
Loading…
Reference in New Issue