regexp: make it use R{{ R[[ R(( for now.
parent
0e71afabe0
commit
3199c23f17
|
@ -7,19 +7,19 @@ in: benchmark.regex-dna
|
||||||
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
||||||
|
|
||||||
: strip-line-breaks ( string -- string' )
|
: strip-line-breaks ( string -- string' )
|
||||||
R/ >.*\n|\n/ "" re-replace ;
|
R[[ >.*\n|\n]] "" re-replace ;
|
||||||
|
|
||||||
: count-patterns ( string -- )
|
: count-patterns ( string -- )
|
||||||
{
|
{
|
||||||
R/ agggtaaa|tttaccct/i
|
R[[ agggtaaa|tttaccct]]i
|
||||||
R/ [cgt]gggtaaa|tttaccc[acg]/i
|
R(( [cgt]gggtaaa|tttaccc[acg]))i
|
||||||
R/ a[act]ggtaaa|tttacc[agt]t/i
|
R[[ a[act]ggtaaa|tttacc[agt]t]]i
|
||||||
R/ ag[act]gtaaa|tttac[agt]ct/i
|
R[[ ag[act]gtaaa|tttac[agt]ct]]i
|
||||||
R/ agg[act]taaa|ttta[agt]cct/i
|
R[[ agg[act]taaa|ttta[agt]cct]]i
|
||||||
R/ aggg[acg]aaa|ttt[cgt]ccct/i
|
R[[ aggg[acg]aaa|ttt[cgt]ccct]]i
|
||||||
R/ agggt[cgt]aa|tt[acg]accct/i
|
R[[ agggt[cgt]aa|tt[acg]accct]]i
|
||||||
R/ agggta[cgt]a|t[acg]taccct/i
|
R[[ agggta[cgt]a|t[acg]taccct]]i
|
||||||
R/ agggtaa[cgt]|[acg]ttaccct/i
|
R[[ agggtaa[cgt]|[acg]ttaccct]]i
|
||||||
} [
|
} [
|
||||||
[ raw>> write bl ]
|
[ raw>> write bl ]
|
||||||
[ count-matches number>string print ]
|
[ count-matches number>string print ]
|
||||||
|
|
|
@ -9,8 +9,8 @@ in: benchmark.regexp
|
||||||
20,000 iota [ number>string ] map
|
20,000 iota [ number>string ] map
|
||||||
200 iota [ 1 + char: a <string> ] map
|
200 iota [ 1 + char: a <string> ] map
|
||||||
'[
|
'[
|
||||||
_ R/ \d+/ [ matches? ] curry all? t assert=
|
_ R[[ \d+]] [ matches? ] curry all? t assert=
|
||||||
_ R/ [a]+/ [ matches? ] curry all? t assert=
|
_ R[[ [a]+]] [ matches? ] curry all? t assert=
|
||||||
] times ;
|
] times ;
|
||||||
|
|
||||||
main: regexp-benchmark
|
main: regexp-benchmark
|
||||||
|
|
|
@ -134,7 +134,7 @@ CONSTANT: galois-slides
|
||||||
"Implemented with library code"
|
"Implemented with library code"
|
||||||
{ $code "use: regexp" }
|
{ $code "use: regexp" }
|
||||||
{ $code "\"ababbc\" \"[ab]+c\" <regexp> matches? ." }
|
{ $code "\"ababbc\" \"[ab]+c\" <regexp> matches? ." }
|
||||||
{ $code "\"ababbc\" R/ [ab]+c/ matches? ." }
|
{ $code "\"ababbc\" R[[ [ab]+c]] matches? ." }
|
||||||
}
|
}
|
||||||
{ $slide "Example: memoization"
|
{ $slide "Example: memoization"
|
||||||
{ "Memoization with " { $link postpone\ MEMO: } }
|
{ "Memoization with " { $link postpone\ MEMO: } }
|
||||||
|
|
|
@ -136,4 +136,4 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
! Test that the regexp syntax works.
|
! Test that the regexp syntax works.
|
||||||
{ t } [ "1234abcd" regexp:R/ ^\d+\w+$/ matches? ] unit-test
|
{ t } [ "1234abcd" regexp:R[[ ^\d+\w+$]] matches? ] unit-test
|
||||||
|
|
|
@ -869,7 +869,7 @@ CONSTANT: emoji H{
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
: emojify ( str -- str' )
|
: emojify ( str -- str' )
|
||||||
R/ :([^:])+:/ [ >string emoji at ] re-replace-with ;
|
R[[ :([^:])+:]] [ >string emoji at ] re-replace-with ;
|
||||||
|
|
||||||
: emojify-main ( -- )
|
: emojify-main ( -- )
|
||||||
command-line get [
|
command-line get [
|
||||||
|
|
|
@ -7,10 +7,10 @@ unicode ;
|
||||||
in: globs
|
in: globs
|
||||||
|
|
||||||
: not-path-separator ( -- sep )
|
: not-path-separator ( -- sep )
|
||||||
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
|
os windows? R{{ [^\\/\\]}} R{{ [^\\/]}} ? ; foldable
|
||||||
|
|
||||||
: wild-path-separator ( -- sep )
|
: wild-path-separator ( -- sep )
|
||||||
os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable
|
os windows? R{{ [^\\/\\][\\/\\]|[^\\/\\]}} R{{ [^\\/][\\/]|[^\\/]}} ? ; foldable
|
||||||
|
|
||||||
: <glob> ( string -- obj ) EBNF{{
|
: <glob> ( string -- obj ) EBNF{{
|
||||||
|
|
||||||
|
|
|
@ -2350,7 +2350,7 @@ CONSTANT: html5 H{
|
||||||
"#" ?head [ numeric-charref ] [ named-charref ] if ;
|
"#" ?head [ numeric-charref ] [ named-charref ] if ;
|
||||||
|
|
||||||
CONSTANT: re-charref
|
CONSTANT: re-charref
|
||||||
R/ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?/ ;
|
R[[ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?]] ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
|
|
@ -26,14 +26,14 @@ ERROR: bad-location str ;
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: string>longitude ( str -- lon/f )
|
: string>longitude ( str -- lon/f )
|
||||||
dup R/ \d+-\d+(-\d+(\.\d+)?)?[WE]/ matches? [
|
dup R{{ \d+-\d+(-\d+(\.\d+)?)?[WE]}} matches? [
|
||||||
unclip-last
|
unclip-last
|
||||||
[ parse-location ]
|
[ parse-location ]
|
||||||
[ char: W = [ neg ] when ] bi*
|
[ char: W = [ neg ] when ] bi*
|
||||||
] [ drop f ] if ;
|
] [ drop f ] if ;
|
||||||
|
|
||||||
: string>latitude ( str -- lat/f )
|
: string>latitude ( str -- lat/f )
|
||||||
dup R/ \d+-\d+(-\d+(\.\d+)?)?[NS]/ matches? [
|
dup R{{ \d+-\d+(-\d+(\.\d+)?)?[NS]}} matches? [
|
||||||
unclip-last
|
unclip-last
|
||||||
[ parse-location ]
|
[ parse-location ]
|
||||||
[ char: S = [ neg ] when ] bi*
|
[ char: S = [ neg ] when ] bi*
|
||||||
|
@ -277,16 +277,16 @@ CONSTANT: sky H{
|
||||||
unclip [ string>number ] [ char: A = ] bi*
|
unclip [ string>number ] [ char: A = ] bi*
|
||||||
[ 100 /f "%.2f Hg" sprintf ] [ "%s hPa" sprintf ] if ;
|
[ 100 /f "%.2f Hg" sprintf ] [ "%s hPa" sprintf ] if ;
|
||||||
|
|
||||||
CONSTANT: re-timestamp R/ \d{6}Z/ ;
|
CONSTANT: re-timestamp R{{ \d{6}Z}} ;
|
||||||
CONSTANT: re-station R/ \w{4}/ ;
|
CONSTANT: re-station R(( \w{4})) ;
|
||||||
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/ ;
|
CONSTANT: re-temperature R{{ [M]?\d{2}\\/([M]?\d{2})?}} ;
|
||||||
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/ ;
|
CONSTANT: re-wind R{{ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT}} ;
|
||||||
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/ ;
|
CONSTANT: re-wind-variable R[[ \d{3}V\d{3}]] ;
|
||||||
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/ ;
|
CONSTANT: re-visibility R{{ [MP]?\d+(\\/\d+)?SM}} ;
|
||||||
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/ ;
|
CONSTANT: re-rvr R{{ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT}} ;
|
||||||
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/ ;
|
CONSTANT: re-weather R{{ [+-]?(VC)?(\w{2}|\w{4})}} ;
|
||||||
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/ ;
|
CONSTANT: re-sky-condition R{{ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)}} ;
|
||||||
CONSTANT: re-altimeter R/ [AQ]\d{4}/ ;
|
CONSTANT: re-altimeter R(( [AQ]\d{4})) ;
|
||||||
|
|
||||||
: find-one ( seq quot: ( elt -- ? ) -- seq elt/f )
|
: find-one ( seq quot: ( elt -- ? ) -- seq elt/f )
|
||||||
dupd find drop [ tail unclip ] [ f ] if* ; inline
|
dupd find drop [ tail unclip ] [ f ] if* ; inline
|
||||||
|
@ -462,7 +462,7 @@ CONSTANT: high-clouds H{
|
||||||
: parse-lightning ( str -- str' )
|
: parse-lightning ( str -- str' )
|
||||||
"LTG" ?head drop 2 group [ lightning at ] map " " join ;
|
"LTG" ?head drop 2 group [ lightning at ] map " " join ;
|
||||||
|
|
||||||
CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/ ;
|
CONSTANT: re-recent-weather R{{ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+}} ;
|
||||||
|
|
||||||
: parse-began/ended ( str -- str' )
|
: parse-began/ended ( str -- str' )
|
||||||
unclip swap
|
unclip swap
|
||||||
|
@ -512,27 +512,27 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/ ;
|
||||||
: parse-remark ( str -- str' )
|
: parse-remark ( str -- str' )
|
||||||
{
|
{
|
||||||
{ [ dup glossary key? ] [ glossary at ] }
|
{ [ dup glossary key? ] [ glossary at ] }
|
||||||
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
|
{ [ dup R(( 1\d{4})) matches? ] [ parse-6hr-max-temp ] }
|
||||||
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
|
{ [ dup R(( 2\d{4})) matches? ] [ parse-6hr-min-temp ] }
|
||||||
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
|
{ [ dup R(( 4\d{8})) matches? ] [ parse-24hr-temp ] }
|
||||||
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
|
{ [ dup R(( 4\\/\d{3})) matches? ] [ parse-snow-depth ] }
|
||||||
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
|
{ [ dup R(( 5\d{4})) matches? ] [ parse-1hr-pressure ] }
|
||||||
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
|
{ [ dup R(( 6[\d\\/]{4})) matches? ] [ parse-6hr-precipitation ] }
|
||||||
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
|
{ [ dup R(( 7\d{4})) matches? ] [ parse-24hr-precipitation ] }
|
||||||
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
|
{ [ dup R(( 8\\/\d{3})) matches? ] [ parse-cloud-cover ] }
|
||||||
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
|
{ [ dup R(( 931\d{3})) matches? ] [ parse-6hr-snowfall ] }
|
||||||
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
|
{ [ dup R(( 933\d{3})) matches? ] [ parse-water-equivalent-snow ] }
|
||||||
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
|
{ [ dup R(( 98\d{3})) matches? ] [ parse-duration-of-sunshine ] }
|
||||||
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
|
{ [ dup R(( T\d{4,8})) matches? ] [ parse-1hr-temp ] }
|
||||||
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
|
{ [ dup R(( \d{3}\d{2,3}\\/\d{2,4})) matches? ] [ parse-peak-wind ] }
|
||||||
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
|
{ [ dup R(( P\d{4})) matches? ] [ parse-1hr-precipitation ] }
|
||||||
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
|
{ [ dup R(( SLP\d{3})) matches? ] [ parse-sea-level-pressure ] }
|
||||||
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
|
{ [ dup R(( LTG\w+)) matches? ] [ parse-lightning ] }
|
||||||
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
|
{ [ dup R(( PROB\d+)) matches? ] [ parse-probability ] }
|
||||||
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
|
{ [ dup R(( \d{3}V\d{3})) matches? ] [ parse-varying ] }
|
||||||
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
|
{ [ dup R(( [^-]+(-[^-]+)+)) matches? ] [ parse-from-to ] }
|
||||||
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
|
{ [ dup R(( [^\\/]+(\\/[^\\/]+)+)) matches? ] [ ] }
|
||||||
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
|
{ [ dup R(( \d+.\d+)) matches? ] [ ] }
|
||||||
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
||||||
{ [ dup re-weather matches? ] [ parse-weather ] }
|
{ [ dup re-weather matches? ] [ parse-weather ] }
|
||||||
{ [ dup re-sky-condition matches? ] [ parse-sky-condition ] }
|
{ [ dup re-sky-condition matches? ] [ parse-sky-condition ] }
|
||||||
|
@ -596,12 +596,12 @@ M: string metar.
|
||||||
[ parse-altitude ] [ parse-wind ] bi* prepend
|
[ parse-altitude ] [ parse-wind ] bi* prepend
|
||||||
"wind shear " prepend ;
|
"wind shear " prepend ;
|
||||||
|
|
||||||
CONSTANT: re-from-timestamp R/ FM\d{6}/ ;
|
CONSTANT: re-from-timestamp R(( FM\d{6})) ;
|
||||||
|
|
||||||
: parse-from-timestamp ( str -- str' )
|
: parse-from-timestamp ( str -- str' )
|
||||||
"FM" ?head drop parse-timestamp ;
|
"FM" ?head drop parse-timestamp ;
|
||||||
|
|
||||||
CONSTANT: re-valid-timestamp R/ \d{4}\/\d{4}/ ;
|
CONSTANT: re-valid-timestamp R(( \d{4}\/\d{4})) ;
|
||||||
|
|
||||||
: parse-valid-timestamp ( str -- str' )
|
: parse-valid-timestamp ( str -- str' )
|
||||||
"/" split1 [ "00" append parse-timestamp ] bi@ " to " glue ;
|
"/" split1 [ "00" append parse-timestamp ] bi@ " to " glue ;
|
||||||
|
|
|
@ -10,7 +10,7 @@ in: regexp.combinators.tests
|
||||||
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
||||||
|
|
||||||
: conj ( -- regexp )
|
: conj ( -- regexp )
|
||||||
{ R/ .*a/ R/ b.*/ } <and> ;
|
{ R(( .*a)) R(( b.*)) } <and> ;
|
||||||
|
|
||||||
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
||||||
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
||||||
|
|
|
@ -13,7 +13,7 @@ in: regexp.combinators
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
CONSTANT: <nothing> R/ (?~.*)/s ;
|
CONSTANT: <nothing> R[[ (?~.*)]]s ;
|
||||||
|
|
||||||
: <literal> ( string -- regexp )
|
: <literal> ( string -- regexp )
|
||||||
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
||||||
|
|
|
@ -7,7 +7,7 @@ in: regexp.prettyprint
|
||||||
M: regexp pprint*
|
M: regexp pprint*
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
[ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ]
|
[ raw>> "R[[ " % % "]]" % ]
|
||||||
[ options>> options>string % ] bi
|
[ options>> options>string % ] bi
|
||||||
] "" make
|
] "" make
|
||||||
] keep present-text ;
|
] keep present-text ;
|
||||||
|
|
|
@ -27,20 +27,20 @@ ARTICLE: "regexp" "Regular expressions"
|
||||||
|
|
||||||
ARTICLE: "regexp-intro" "A quick introduction to regular expressions"
|
ARTICLE: "regexp-intro" "A quick introduction to regular expressions"
|
||||||
"Regular expressions are a terse way to do certain simple string processing tasks. For example, to replace all instances of " { $snippet "foo" } " in one string with " { $snippet "bar" } ", the following can be used:"
|
"Regular expressions are a terse way to do certain simple string processing tasks. For example, to replace all instances of " { $snippet "foo" } " in one string with " { $snippet "bar" } ", the following can be used:"
|
||||||
{ $code "R/ foo/ \"bar\" re-replace" }
|
{ $code "R(( foo)) \"bar\" re-replace" }
|
||||||
"That could be done with sequence operations, but consider doing this replacement for an arbitrary number of o's, at least two:"
|
"That could be done with sequence operations, but consider doing this replacement for an arbitrary number of o's, at least two:"
|
||||||
{ $code "R/ foo+/ \"bar\" re-replace" }
|
{ $code "R(( foo+)) \"bar\" re-replace" }
|
||||||
"The " { $snippet "+" } " operator matches one or more occurrences of the previous expression; in this case " { $snippet "o" } ". Another useful feature is alternation. Say we want to do this replacement with fooooo or boooo. Then we could use the code"
|
"The " { $snippet "+" } " operator matches one or more occurrences of the previous expression; in this case " { $snippet "o" } ". Another useful feature is alternation. Say we want to do this replacement with fooooo or boooo. Then we could use the code"
|
||||||
{ $code "R/ (f|b)oo+/ \"bar\" re-replace" }
|
{ $code "R(( (f|b)oo+)) \"bar\" re-replace" }
|
||||||
"To search a file for all lines that match a given regular expression, you could use code like this:"
|
"To search a file for all lines that match a given regular expression, you could use code like this:"
|
||||||
{ $code "\"file.txt\" ascii file-lines [ R/ (f|b)oo+/ re-contains? ] filter" }
|
{ $code "\"file.txt\" ascii file-lines [ R(( (f|b)oo+)) re-contains? ] filter" }
|
||||||
"To test if a string in its entirety matches a regular expression, the following can be used:"
|
"To test if a string in its entirety matches a regular expression, the following can be used:"
|
||||||
{ $example "use: regexp \"fooo\" R/ (b|f)oo+/ matches? ." "t" }
|
{ $example "use: regexp \"fooo\" R(( (b|f)oo+)) matches? ." "t" }
|
||||||
"Regular expressions can't be used for all parsing tasks. For example, they are not powerful enough to match balancing parentheses." ;
|
"Regular expressions can't be used for all parsing tasks. For example, they are not powerful enough to match balancing parentheses." ;
|
||||||
|
|
||||||
ARTICLE: "regexp-construction" "Constructing regular expressions"
|
ARTICLE: "regexp-construction" "Constructing regular expressions"
|
||||||
"Most of the time, regular expressions are literals and the parsing word should be used, to construct them at parse time. This ensures that they are only compiled once, and gives parse time syntax checking."
|
"Most of the time, regular expressions are literals and the parsing word should be used, to construct them at parse time. This ensures that they are only compiled once, and gives parse time syntax checking."
|
||||||
{ $subsections postpone\ R/ }
|
{ $subsections \ R[[ }
|
||||||
"Sometimes, regular expressions need to be constructed at run time instead; for example, in a text editor, the user might input a regular expression to search for in a document."
|
"Sometimes, regular expressions need to be constructed at run time instead; for example, in a text editor, the user might input a regular expression to search for in a document."
|
||||||
{ $subsections <regexp> <optioned-regexp> }
|
{ $subsections <regexp> <optioned-regexp> }
|
||||||
"Another approach is to use " { $vocab-link "regexp.combinators" } "." ;
|
"Another approach is to use " { $vocab-link "regexp.combinators" } "." ;
|
||||||
|
@ -48,9 +48,9 @@ ARTICLE: "regexp-construction" "Constructing regular expressions"
|
||||||
ARTICLE: "regexp-syntax" "Regular expression syntax"
|
ARTICLE: "regexp-syntax" "Regular expression syntax"
|
||||||
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
|
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
|
||||||
{ $heading "Characters" }
|
{ $heading "Characters" }
|
||||||
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
"At its core, regular expressions consist of character literals. For example, " { $snippet "R(( f))" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
||||||
{ $heading "Concatenation, alternation and grouping" }
|
{ $heading "Concatenation, alternation and grouping" }
|
||||||
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
|
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R(( ab))" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R[[ f(oo|ar)]]" } " would match either 'foo' or 'far'."
|
||||||
{ $heading "Character classes" }
|
{ $heading "Character classes" }
|
||||||
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
|
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
|
||||||
{ $heading "Predefined character classes" }
|
{ $heading "Predefined character classes" }
|
||||||
|
@ -110,7 +110,7 @@ ARTICLE: "regexp-syntax" "Regular expression syntax"
|
||||||
{ { $snippet "(?<=a)" } "Asserts that the current position is immediately preceded by a" }
|
{ { $snippet "(?<=a)" } "Asserts that the current position is immediately preceded by a" }
|
||||||
{ { $snippet "(?<!a)" } "Asserts that the current position is not immediately preceded by a" } }
|
{ { $snippet "(?<!a)" } "Asserts that the current position is not immediately preceded by a" } }
|
||||||
{ $heading "Quotation" }
|
{ $heading "Quotation" }
|
||||||
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
|
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R[[ \\Qfoo\\bar|baz()\\E]]" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
|
||||||
{ $heading "Unsupported features" }
|
{ $heading "Unsupported features" }
|
||||||
{ $subheading "Group capture" }
|
{ $subheading "Group capture" }
|
||||||
{ $subheading "Reluctant and possessive quantifiers" }
|
{ $subheading "Reluctant and possessive quantifiers" }
|
||||||
|
@ -140,23 +140,23 @@ $nl
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
HELP: case-insensitive
|
HELP: case-insensitive
|
||||||
{ $syntax "R/ .../i" }
|
{ $syntax "R[[ ...]]i" }
|
||||||
{ $description "On regexps, the " { $snippet "i" } " option makes the match case-insensitive. Currently, this is handled incorrectly with respect to Unicode, as characters like ß do not expand into SS in upper case. This should be fixed in a future version." } ;
|
{ $description "On regexps, the " { $snippet "i" } " option makes the match case-insensitive. Currently, this is handled incorrectly with respect to Unicode, as characters like ß do not expand into SS in upper case. This should be fixed in a future version." } ;
|
||||||
|
|
||||||
HELP: unix-lines
|
HELP: unix-lines
|
||||||
{ $syntax "R/ .../d" }
|
{ $syntax "R[[ ...]]d" }
|
||||||
{ $description "With this mode, only newlines (" { $snippet "\\n" } ") are recognized for line breaking. This affects " { $snippet "$" } " and " { $snippet "^" } " when in multiline mode." } ;
|
{ $description "With this mode, only newlines (" { $snippet "\\n" } ") are recognized for line breaking. This affects " { $snippet "$" } " and " { $snippet "^" } " when in multiline mode." } ;
|
||||||
|
|
||||||
HELP: multiline
|
HELP: multiline
|
||||||
{ $syntax "R/ .../m" }
|
{ $syntax "R[[ ...]]m" }
|
||||||
{ $description "This mode makes the zero-width constraints " { $snippet "$" } " and " { $snippet "^" } " match the beginning or end of a line. Otherwise, they only match the beginning or end of the input text. This can be used together with " { $link dotall } "." } ;
|
{ $description "This mode makes the zero-width constraints " { $snippet "$" } " and " { $snippet "^" } " match the beginning or end of a line. Otherwise, they only match the beginning or end of the input text. This can be used together with " { $link dotall } "." } ;
|
||||||
|
|
||||||
HELP: dotall
|
HELP: dotall
|
||||||
{ $syntax "R/ .../s" }
|
{ $syntax "R[[ ...]]s" }
|
||||||
{ $description "This mode, traditionally called single line mode, makes " { $snippet "." } " match everything, including line breaks. By default, it does not match line breaking characters. This can be used together with " { $link multiline } "." } ;
|
{ $description "This mode, traditionally called single line mode, makes " { $snippet "." } " match everything, including line breaks. By default, it does not match line breaking characters. This can be used together with " { $link multiline } "." } ;
|
||||||
|
|
||||||
HELP: reversed-regexp
|
HELP: reversed-regexp
|
||||||
{ $syntax "R/ .../r" }
|
{ $syntax "R[[ ...]]r" }
|
||||||
{ $description "When running a regexp compiled with this mode, matches will start from the end of the input string, going towards the beginning." } ;
|
{ $description "When running a regexp compiled with this mode, matches will start from the end of the input string, going towards the beginning." } ;
|
||||||
|
|
||||||
ARTICLE: "regexp-theory" "The theory of regular expressions"
|
ARTICLE: "regexp-theory" "The theory of regular expressions"
|
||||||
|
@ -202,8 +202,8 @@ HELP: <optioned-regexp>
|
||||||
{ $values { "string" string } { "options" "a string of " { $link "regexp-options" } } { "regexp" regexp } }
|
{ $values { "string" string } { "options" "a string of " { $link "regexp-options" } } { "regexp" regexp } }
|
||||||
{ $description "Given a string in regular expression syntax, and a string of options, creates a regular expression object. When it is first used for matching, a DFA is compiled, and this DFA is stored for reuse so it is only compiled once." } ;
|
{ $description "Given a string in regular expression syntax, and a string of options, creates a regular expression object. When it is first used for matching, a DFA is compiled, and this DFA is stored for reuse so it is only compiled once." } ;
|
||||||
|
|
||||||
HELP: R/
|
HELP: \ R[[
|
||||||
{ $syntax "R/ foo.*|[a-zA-Z]bar/options" }
|
{ $syntax "R[[ foo.*|[a-zA-Z]bar]]options" }
|
||||||
{ $description "Literal syntax for a regular expression. When this syntax is used, the DFA is compiled at compile-time, rather than on first use. The syntax for the " { $snippet "options" } " string is documented in " { $link "regexp-options" } "." } ;
|
{ $description "Literal syntax for a regular expression. When this syntax is used, the DFA is compiled at compile-time, rather than on first use. The syntax for the " { $snippet "options" } " string is documented in " { $link "regexp-options" } "." } ;
|
||||||
|
|
||||||
HELP: regexp
|
HELP: regexp
|
||||||
|
@ -231,7 +231,7 @@ HELP: re-replace
|
||||||
{ $examples
|
{ $examples
|
||||||
{ $example
|
{ $example
|
||||||
"USING: prettyprint regexp ;"
|
"USING: prettyprint regexp ;"
|
||||||
"\"python is pythonic\" R/ python/ \"factor\" re-replace ."
|
"\"python is pythonic\" R[[ python]] \"factor\" re-replace ."
|
||||||
"\"factor is factoric\"" }
|
"\"factor is factoric\"" }
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
@ -241,7 +241,7 @@ HELP: re-replace-with
|
||||||
{ $examples
|
{ $examples
|
||||||
{ $example
|
{ $example
|
||||||
"USING: ascii prettyprint regexp ;"
|
"USING: ascii prettyprint regexp ;"
|
||||||
"\"abcdefghi\" R/ [aeiou]/ [ >upper ] re-replace-with ."
|
"\"abcdefghi\" R(( [aeiou])) [ >upper ] re-replace-with ."
|
||||||
"\"AbcdEfghI\"" }
|
"\"AbcdEfghI\"" }
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ in: regexp-tests
|
||||||
! Off by default.
|
! Off by default.
|
||||||
{ f } [ "\n" "." <regexp> matches? ] unit-test
|
{ f } [ "\n" "." <regexp> matches? ] unit-test
|
||||||
{ t } [ "\n" "(?s:.)" <regexp> matches? ] unit-test
|
{ t } [ "\n" "(?s:.)" <regexp> matches? ] unit-test
|
||||||
{ t } [ "\n" R/ ./s matches? ] unit-test
|
{ t } [ "\n" R[[ .]]s matches? ] unit-test
|
||||||
{ f } [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
|
{ f } [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "" ".+" <regexp> matches? ] unit-test
|
{ f } [ "" ".+" <regexp> matches? ] unit-test
|
||||||
|
@ -60,7 +60,7 @@ in: regexp-tests
|
||||||
|
|
||||||
{ t } [ "/" "\\/" <regexp> matches? ] unit-test
|
{ t } [ "/" "\\/" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ a/i matches? ] unit-test
|
{ t } [ "a" R[[ a]]i matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
{ t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||||
{ t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
{ t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||||
|
@ -208,39 +208,39 @@ in: regexp-tests
|
||||||
{ "aaa" } [ "aaacb" "a*" <regexp> first-match >string ] unit-test
|
{ "aaa" } [ "aaacb" "a*" <regexp> first-match >string ] unit-test
|
||||||
{ "aa" } [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
|
{ "aa" } [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
{ t } [ "aaa" R/ AAA/i matches? ] unit-test
|
{ t } [ "aaa" R[[ AAA]]i matches? ] unit-test
|
||||||
{ f } [ "aax" R/ AAA/i matches? ] unit-test
|
{ f } [ "aax" R[[ AAA]]i matches? ] unit-test
|
||||||
{ t } [ "aaa" R/ A*/i matches? ] unit-test
|
{ t } [ "aaa" R[[ A*]]i matches? ] unit-test
|
||||||
{ f } [ "aaba" R/ A*/i matches? ] unit-test
|
{ f } [ "aaba" R[[ A*]]i matches? ] unit-test
|
||||||
{ t } [ "b" R/ [AB]/i matches? ] unit-test
|
{ t } [ "b" R{{ [AB]}}i matches? ] unit-test
|
||||||
{ f } [ "c" R/ [AB]/i matches? ] unit-test
|
{ f } [ "c" R{{ [AB]}}i matches? ] unit-test
|
||||||
{ t } [ "c" R/ [A-Z]/i matches? ] unit-test
|
{ t } [ "c" R{{ [A-Z]}}i matches? ] unit-test
|
||||||
{ f } [ "3" R/ [A-Z]/i matches? ] unit-test
|
{ f } [ "3" R{{ [A-Z]}}i matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||||
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||||
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||||
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ (?-i:a)/i matches? ] unit-test
|
{ t } [ "a" R{{ (?-i:a)}}i matches? ] unit-test
|
||||||
{ t } [ "a" R/ (?-i:a)/i matches? ] unit-test
|
{ t } [ "a" R{{ (?-i:a)}}i matches? ] unit-test
|
||||||
{ f } [ "A" R/ (?-i:a)/i matches? ] unit-test
|
{ f } [ "A" R{{ (?-i:a)}}i matches? ] unit-test
|
||||||
{ f } [ "A" R/ (?-i:a)/i matches? ] unit-test
|
{ f } [ "A" R{{ (?-i:a)}}i matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "A" "[a-z]" <regexp> matches? ] unit-test
|
{ f } [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||||
{ t } [ "A" R/ [a-z]/i matches? ] unit-test
|
{ t } [ "A" R{{ [a-z]}}i matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
{ f } [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||||
{ t } [ "A" R/ \p{Lower}/i matches? ] unit-test
|
{ t } [ "A" R[[ \p{Lower}]]i matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "abc" R/ abc/r matches? ] unit-test
|
{ t } [ "abc" R{{ abc}}r matches? ] unit-test
|
||||||
{ t } [ "abc" R/ a[bB][cC]/r matches? ] unit-test
|
{ t } [ "abc" R{{ a[bB][cC]}}r matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ 3 "xabc" R/ abc/r match-index-from >boolean ] unit-test
|
{ t } [ 3 "xabc" R{{ abc}}r match-index-from >boolean ] unit-test
|
||||||
{ t } [ 3 "xabc" R/ a[bB][cC]/r match-index-from >boolean ] unit-test
|
{ t } [ 3 "xabc" R{{ a[bB][cC]}}r match-index-from >boolean ] unit-test
|
||||||
|
|
||||||
{ 2 } [ 0 "llamallol" R/ ll/ match-index-from ] unit-test
|
{ 2 } [ 0 "llamallol" R{{ ll}} match-index-from ] unit-test
|
||||||
{ 5 } [ 8 "lolmallol" R/ lol/r match-index-from ] unit-test
|
{ 5 } [ 8 "lolmallol" R{{ lol}}r match-index-from ] unit-test
|
||||||
|
|
||||||
{ t } [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
{ t } [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
{ f } [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
{ f } [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
|
@ -274,66 +274,66 @@ in: regexp-tests
|
||||||
{ "b" } [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
|
{ "b" } [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
{ T{ slice { from 5 } { to 10 } { seq "hellohello" } } }
|
{ T{ slice { from 5 } { to 10 } { seq "hellohello" } } }
|
||||||
[ "hellohello" R/ hello/r first-match ]
|
[ "hellohello" R{{ hello}}r first-match ]
|
||||||
unit-test
|
unit-test
|
||||||
|
|
||||||
{ { "1" "2" "3" "4" } }
|
{ { "1" "2" "3" "4" } }
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4" R{{ [A-Z]+}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "1" "2" "3" "4" "" } }
|
{ { "1" "2" "3" "4" "" } }
|
||||||
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4JK" R{{ [A-Z]+}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "" } } [ "" R/ =/ re-split [ >string ] map ] unit-test
|
{ { "" } } [ "" R{{ =}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "a" "" } } [ "a=" R/ =/ re-split [ >string ] map ] unit-test
|
{ { "a" "" } } [ "a=" R{{ =}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "he" "o" } } [ "hello" R/ l+/ re-split [ >string ] map ] unit-test
|
{ { "he" "o" } } [ "hello" R{{ l+}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "h" "llo" } } [ "hello" R/ e+/ re-split [ >string ] map ] unit-test
|
{ { "h" "llo" } } [ "hello" R{{ e+}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { "" "h" "" "l" "l" "o" "" } } [ "hello" R/ e*/ re-split [ >string ] map ] unit-test
|
{ { "" "h" "" "l" "l" "o" "" } } [ "hello" R{{ e*}} re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
{ { { 0 5 "hellohello" } { 5 10 "hellohello" } } }
|
{ { { 0 5 "hellohello" } { 5 10 "hellohello" } } }
|
||||||
[ "hellohello" R/ hello/ [ 3array ] map-matches ]
|
[ "hellohello" R{{ hello}} [ 3array ] map-matches ]
|
||||||
unit-test
|
unit-test
|
||||||
|
|
||||||
{ { { 5 10 "hellohello" } { 0 5 "hellohello" } } }
|
{ { { 5 10 "hellohello" } { 0 5 "hellohello" } } }
|
||||||
[ "hellohello" R/ hello/r [ 3array ] map-matches ]
|
[ "hellohello" R{{ hello}}r [ 3array ] map-matches ]
|
||||||
unit-test
|
unit-test
|
||||||
|
|
||||||
{ { "ABC" "DEF" "GHI" } }
|
{ { "ABC" "DEF" "GHI" } }
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test
|
[ "1ABC2DEF3GHI4" R{{ [A-Z]+}} all-matching-subseqs ] unit-test
|
||||||
|
|
||||||
{ { "ee" "e" } } [ "heellohello" R/ e+/ all-matching-subseqs ] unit-test
|
{ { "ee" "e" } } [ "heellohello" R{{ e+}} all-matching-subseqs ] unit-test
|
||||||
{ { "e" "ee" } } [ "heellohello" R/ e+/r all-matching-subseqs ] unit-test
|
{ { "e" "ee" } } [ "heellohello" R{{ e+}}r all-matching-subseqs ] unit-test
|
||||||
|
|
||||||
{ 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
|
{ 3 } [ "1ABC2DEF3GHI4" R{{ [A-Z]+}} count-matches ] unit-test
|
||||||
|
|
||||||
{ 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/r count-matches ] unit-test
|
{ 3 } [ "1ABC2DEF3GHI4" R{{ [A-Z]+}}r count-matches ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "" R/ / count-matches ] unit-test
|
{ 1 } [ "" R{{ }} count-matches ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "" R/ /r count-matches ] unit-test
|
{ 1 } [ "" R{{ }}r count-matches ] unit-test
|
||||||
|
|
||||||
{ 0 } [ "123" R/ [A-Z]+/ count-matches ] unit-test
|
{ 0 } [ "123" R{{ [A-Z]+}} count-matches ] unit-test
|
||||||
|
|
||||||
{ 0 } [ "123" R/ [A-Z]+/r count-matches ] unit-test
|
{ 0 } [ "123" R{{ [A-Z]+}}r count-matches ] unit-test
|
||||||
|
|
||||||
{ 6 } [ "hello" R/ e*/ count-matches ] unit-test
|
{ 6 } [ "hello" R{{ e*}} count-matches ] unit-test
|
||||||
|
|
||||||
{ 6 } [ "hello" R/ e*/r count-matches ] unit-test
|
{ 6 } [ "hello" R{{ e*}}r count-matches ] unit-test
|
||||||
|
|
||||||
{ 11 } [ "hello world" R/ l*/ count-matches ] unit-test
|
{ 11 } [ "hello world" R{{ l*}} count-matches ] unit-test
|
||||||
|
|
||||||
{ 11 } [ "hello world" R/ l*/r count-matches ] unit-test
|
{ 11 } [ "hello world" R{{ l*}}r count-matches ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "hello" R/ e+/ count-matches ] unit-test
|
{ 1 } [ "hello" R{{ e+}} count-matches ] unit-test
|
||||||
|
|
||||||
{ 2 } [ "hello world" R/ l+/r count-matches ] unit-test
|
{ 2 } [ "hello world" R{{ l+}}r count-matches ] unit-test
|
||||||
|
|
||||||
{ "1.2.3.4." } [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
|
{ "1.2.3.4." } [ "1ABC2DEF3GHI4JK" R{{ [A-Z]+}} "." re-replace ] unit-test
|
||||||
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
|
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R{{ e*}} "X" re-replace ] unit-test
|
||||||
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
{ "-- title --" } [ "== title ==" R{{ =}} "-" re-replace ] unit-test
|
||||||
|
|
||||||
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
|
@ -349,124 +349,124 @@ unit-test
|
||||||
{ f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
|
{ f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
! Bug in parsing word
|
! Bug in parsing word
|
||||||
{ t } [ "a" R/ a/ matches? ] unit-test
|
{ t } [ "a" R{{ a}} matches? ] unit-test
|
||||||
|
|
||||||
! Testing negation
|
! Testing negation
|
||||||
{ f } [ "a" R/ (?~a)/ matches? ] unit-test
|
{ f } [ "a" R{{ (?~a)}} matches? ] unit-test
|
||||||
{ t } [ "aa" R/ (?~a)/ matches? ] unit-test
|
{ t } [ "aa" R{{ (?~a)}} matches? ] unit-test
|
||||||
{ t } [ "bb" R/ (?~a)/ matches? ] unit-test
|
{ t } [ "bb" R{{ (?~a)}} matches? ] unit-test
|
||||||
{ t } [ "" R/ (?~a)/ matches? ] unit-test
|
{ t } [ "" R{{ (?~a)}} matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "a" R/ (?~a+|b)/ matches? ] unit-test
|
{ f } [ "a" R{{ (?~a+|b)}} matches? ] unit-test
|
||||||
{ f } [ "aa" R/ (?~a+|b)/ matches? ] unit-test
|
{ f } [ "aa" R{{ (?~a+|b)}} matches? ] unit-test
|
||||||
{ t } [ "bb" R/ (?~a+|b)/ matches? ] unit-test
|
{ t } [ "bb" R{{ (?~a+|b)}} matches? ] unit-test
|
||||||
{ f } [ "b" R/ (?~a+|b)/ matches? ] unit-test
|
{ f } [ "b" R{{ (?~a+|b)}} matches? ] unit-test
|
||||||
{ t } [ "" R/ (?~a+|b)/ matches? ] unit-test
|
{ t } [ "" R{{ (?~a+|b)}} matches? ] unit-test
|
||||||
|
|
||||||
! Intersecting classes
|
! Intersecting classes
|
||||||
{ t } [ "ab" R/ ac|\p{Lower}b/ matches? ] unit-test
|
{ t } [ "ab" R{{ ac|\p{Lower}b}} matches? ] unit-test
|
||||||
{ t } [ "ab" R/ ac|[a-z]b/ matches? ] unit-test
|
{ t } [ "ab" R{{ ac|[a-z]b}} matches? ] unit-test
|
||||||
{ t } [ "ac" R/ ac|\p{Lower}b/ matches? ] unit-test
|
{ t } [ "ac" R{{ ac|\p{Lower}b}} matches? ] unit-test
|
||||||
{ t } [ "ac" R/ ac|[a-z]b/ matches? ] unit-test
|
{ t } [ "ac" R{{ ac|[a-z]b}} matches? ] unit-test
|
||||||
{ t } [ "ac" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
{ t } [ "ac" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||||
{ t } [ "ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
{ t } [ "ab" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||||
{ t } [ "πb" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
{ t } [ "πb" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||||
{ f } [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
{ f } [ "πc" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||||
{ f } [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
{ f } [ "Ab" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "aaaa" R/ .*a./ matches? ] unit-test
|
{ t } [ "aaaa" R{{ .*a.}} matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "ab" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
{ f } [ "ab" R{{ (?~ac|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ f } [ "ab" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
{ f } [ "ab" R{{ (?~ac|[a-z]b)}} matches? ] unit-test
|
||||||
{ f } [ "ac" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
{ f } [ "ac" R{{ (?~ac|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ f } [ "ac" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
{ f } [ "ac" R{{ (?~ac|[a-z]b)}} matches? ] unit-test
|
||||||
{ f } [ "ac" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
{ f } [ "ac" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ f } [ "ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
{ f } [ "ab" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ f } [ "πb" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
{ f } [ "πb" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ t } [ "πc" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
{ t } [ "πc" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||||
{ t } [ "Ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
{ t } [ "Ab" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||||
|
|
||||||
! DFA is compiled when needed, or when literal
|
! DFA is compiled when needed, or when literal
|
||||||
{ regexp-initial-word } [ "foo" <regexp> dfa>> ] unit-test
|
{ regexp-initial-word } [ "foo" <regexp> dfa>> ] unit-test
|
||||||
{ f } [ R/ foo/ dfa>> \ regexp-initial-word = ] unit-test
|
{ f } [ R{{ foo}} dfa>> \ regexp-initial-word = ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ ^a/ matches? ] unit-test
|
{ t } [ "a" R{{ ^a}} matches? ] unit-test
|
||||||
{ f } [ "\na" R/ ^a/ matches? ] unit-test
|
{ f } [ "\na" R{{ ^a}} matches? ] unit-test
|
||||||
{ f } [ "\r\na" R/ ^a/ matches? ] unit-test
|
{ f } [ "\r\na" R{{ ^a}} matches? ] unit-test
|
||||||
{ f } [ "\ra" R/ ^a/ matches? ] unit-test
|
{ f } [ "\ra" R{{ ^a}} matches? ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "a" R/ ^a/ count-matches ] unit-test
|
{ 1 } [ "a" R{{ ^a}} count-matches ] unit-test
|
||||||
{ 0 } [ "\na" R/ ^a/ count-matches ] unit-test
|
{ 0 } [ "\na" R{{ ^a}} count-matches ] unit-test
|
||||||
{ 0 } [ "\r\na" R/ ^a/ count-matches ] unit-test
|
{ 0 } [ "\r\na" R{{ ^a}} count-matches ] unit-test
|
||||||
{ 0 } [ "\ra" R/ ^a/ count-matches ] unit-test
|
{ 0 } [ "\ra" R{{ ^a}} count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ a$/ matches? ] unit-test
|
{ t } [ "a" R{{ a$}} matches? ] unit-test
|
||||||
{ f } [ "a\n" R/ a$/ matches? ] unit-test
|
{ f } [ "a\n" R{{ a$}} matches? ] unit-test
|
||||||
{ f } [ "a\r" R/ a$/ matches? ] unit-test
|
{ f } [ "a\r" R{{ a$}} matches? ] unit-test
|
||||||
{ f } [ "a\r\n" R/ a$/ matches? ] unit-test
|
{ f } [ "a\r\n" R{{ a$}} matches? ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "a" R/ a$/ count-matches ] unit-test
|
{ 1 } [ "a" R{{ a$}} count-matches ] unit-test
|
||||||
{ 0 } [ "a\n" R/ a$/ count-matches ] unit-test
|
{ 0 } [ "a\n" R{{ a$}} count-matches ] unit-test
|
||||||
{ 0 } [ "a\r" R/ a$/ count-matches ] unit-test
|
{ 0 } [ "a\r" R{{ a$}} count-matches ] unit-test
|
||||||
{ 0 } [ "a\r\n" R/ a$/ count-matches ] unit-test
|
{ 0 } [ "a\r\n" R{{ a$}} count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ a$|b$/ matches? ] unit-test
|
{ t } [ "a" R{{ a$|b$}} matches? ] unit-test
|
||||||
{ t } [ "b" R/ a$|b$/ matches? ] unit-test
|
{ t } [ "b" R{{ a$|b$}} matches? ] unit-test
|
||||||
{ f } [ "ab" R/ a$|b$/ matches? ] unit-test
|
{ f } [ "ab" R{{ a$|b$}} matches? ] unit-test
|
||||||
{ t } [ "ba" R/ ba$|b$/ matches? ] unit-test
|
{ t } [ "ba" R{{ ba$|b$}} matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \Aa/ matches? ] unit-test
|
{ t } [ "a" R{{ \Aa}} matches? ] unit-test
|
||||||
{ f } [ "\na" R/ \Aaa/ matches? ] unit-test
|
{ f } [ "\na" R{{ \Aaa}} matches? ] unit-test
|
||||||
{ f } [ "\r\na" R/ \Aa/ matches? ] unit-test
|
{ f } [ "\r\na" R{{ \Aa}} matches? ] unit-test
|
||||||
{ f } [ "\ra" R/ \Aa/ matches? ] unit-test
|
{ f } [ "\ra" R{{ \Aa}} matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \Aa/m matches? ] unit-test
|
{ t } [ "a" R{{ \Aa}}m matches? ] unit-test
|
||||||
{ f } [ "\na" R/ \Aaa/m matches? ] unit-test
|
{ f } [ "\na" R{{ \Aaa}}m matches? ] unit-test
|
||||||
{ f } [ "\r\na" R/ \Aa/m matches? ] unit-test
|
{ f } [ "\r\na" R{{ \Aa}}m matches? ] unit-test
|
||||||
{ f } [ "\ra" R/ \Aa/m matches? ] unit-test
|
{ f } [ "\ra" R{{ \Aa}}m matches? ] unit-test
|
||||||
{ 0 } [ "\ra" R/ \Aa/m count-matches ] unit-test
|
{ 0 } [ "\ra" R{{ \Aa}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ f } [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
|
{ f } [ "\r\n\n\n\nam" R{{ ^am}}m matches? ] unit-test
|
||||||
{ 1 } [ "\r\n\n\n\nam" R/ ^am/m count-matches ] unit-test
|
{ 1 } [ "\r\n\n\n\nam" R{{ ^am}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \Aa\z/m matches? ] unit-test
|
{ t } [ "a" R{{ \Aa\z}}m matches? ] unit-test
|
||||||
{ f } [ "a\n" R/ \Aa\z/m matches? ] unit-test
|
{ f } [ "a\n" R{{ \Aa\z}}m matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
|
{ f } [ "a\r\n" R{{ \Aa\Z}}m matches? ] unit-test
|
||||||
{ f } [ "a\n" R/ \Aa\Z/m matches? ] unit-test
|
{ f } [ "a\n" R{{ \Aa\Z}}m matches? ] unit-test
|
||||||
{ 1 } [ "a\r\n" R/ \Aa\Z/m count-matches ] unit-test
|
{ 1 } [ "a\r\n" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||||
{ 1 } [ "a\n" R/ \Aa\Z/m count-matches ] unit-test
|
{ 1 } [ "a\n" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \Aa\Z/m matches? ] unit-test
|
{ t } [ "a" R{{ \Aa\Z}}m matches? ] unit-test
|
||||||
{ f } [ "\na" R/ \Aaa\Z/m matches? ] unit-test
|
{ f } [ "\na" R{{ \Aaa\Z}}m matches? ] unit-test
|
||||||
{ f } [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
|
{ f } [ "\r\na" R{{ \Aa\Z}}m matches? ] unit-test
|
||||||
{ f } [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
{ f } [ "\ra" R{{ \Aa\Z}}m matches? ] unit-test
|
||||||
|
|
||||||
{ 1 } [ "a" R/ \Aa\Z/m count-matches ] unit-test
|
{ 1 } [ "a" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||||
{ 0 } [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
{ 0 } [ "\na" R{{ \Aaa\Z}}m count-matches ] unit-test
|
||||||
{ 0 } [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
{ 0 } [ "\r\na" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||||
{ 0 } [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
{ 0 } [ "\ra" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ ^a/m matches? ] unit-test
|
{ t } [ "a" R{{ ^a}}m matches? ] unit-test
|
||||||
{ f } [ "\na" R/ ^a/m matches? ] unit-test
|
{ f } [ "\na" R{{ ^a}}m matches? ] unit-test
|
||||||
{ 1 } [ "\na" R/ ^a/m count-matches ] unit-test
|
{ 1 } [ "\na" R{{ ^a}}m count-matches ] unit-test
|
||||||
{ 1 } [ "\r\na" R/ ^a/m count-matches ] unit-test
|
{ 1 } [ "\r\na" R{{ ^a}}m count-matches ] unit-test
|
||||||
{ 1 } [ "\ra" R/ ^a/m count-matches ] unit-test
|
{ 1 } [ "\ra" R{{ ^a}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ a$/m matches? ] unit-test
|
{ t } [ "a" R{{ a$}}m matches? ] unit-test
|
||||||
{ f } [ "a\n" R/ a$/m matches? ] unit-test
|
{ f } [ "a\n" R{{ a$}}m matches? ] unit-test
|
||||||
{ 1 } [ "a\n" R/ a$/m count-matches ] unit-test
|
{ 1 } [ "a\n" R{{ a$}}m count-matches ] unit-test
|
||||||
{ 1 } [ "a\r" R/ a$/m count-matches ] unit-test
|
{ 1 } [ "a\r" R{{ a$}}m count-matches ] unit-test
|
||||||
{ 1 } [ "a\r\n" R/ a$/m count-matches ] unit-test
|
{ 1 } [ "a\r\n" R{{ a$}}m count-matches ] unit-test
|
||||||
|
|
||||||
{ f } [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
|
{ f } [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
|
||||||
{ 3 } [ "foo" "foo\\z" <regexp> first-match length ] unit-test
|
{ 3 } [ "foo" "foo\\z" <regexp> first-match length ] unit-test
|
||||||
|
|
||||||
{ t } [ "a foo b" R/ foo/ re-contains? ] unit-test
|
{ t } [ "a foo b" R{{ foo}} re-contains? ] unit-test
|
||||||
{ f } [ "a bar b" R/ foo/ re-contains? ] unit-test
|
{ f } [ "a bar b" R{{ foo}} re-contains? ] unit-test
|
||||||
{ t } [ "foo" R/ foo/ re-contains? ] unit-test
|
{ t } [ "foo" R{{ foo}} re-contains? ] unit-test
|
||||||
|
|
||||||
{ { "foo" "fxx" "fab" } } [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test
|
{ { "foo" "fxx" "fab" } } [ "fab fxx foo" R{{ f..}}r all-matching-subseqs ] unit-test
|
||||||
|
|
||||||
{ t } [ "foo" "\\bfoo\\b" <regexp> re-contains? ] unit-test
|
{ t } [ "foo" "\\bfoo\\b" <regexp> re-contains? ] unit-test
|
||||||
{ t } [ "afoob" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
|
{ t } [ "afoob" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
|
||||||
|
@ -506,71 +506,71 @@ unit-test
|
||||||
|
|
||||||
{ 3 } [ "caba" "(?<=b)a" <regexp> first-match from>> ] unit-test
|
{ 3 } [ "caba" "(?<=b)a" <regexp> first-match from>> ] unit-test
|
||||||
|
|
||||||
{ t } [ "\ra" R/ .^a/ms matches? ] unit-test
|
{ t } [ "\ra" R{{ .^a}}ms matches? ] unit-test
|
||||||
{ f } [ "\ra" R/ .^a/mds matches? ] unit-test
|
{ f } [ "\ra" R{{ .^a}}mds matches? ] unit-test
|
||||||
{ t } [ "\na" R/ .^a/ms matches? ] unit-test
|
{ t } [ "\na" R{{ .^a}}ms matches? ] unit-test
|
||||||
{ t } [ "\na" R/ .^a/mds matches? ] unit-test
|
{ t } [ "\na" R{{ .^a}}mds matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a\r" R/ a$./ms matches? ] unit-test
|
{ t } [ "a\r" R{{ a$.}}ms matches? ] unit-test
|
||||||
{ f } [ "a\r" R/ a$./mds matches? ] unit-test
|
{ f } [ "a\r" R{{ a$.}}mds matches? ] unit-test
|
||||||
{ t } [ "a\n" R/ a$./ms matches? ] unit-test
|
{ t } [ "a\n" R{{ a$.}}ms matches? ] unit-test
|
||||||
{ t } [ "a\n" R/ a$./mds matches? ] unit-test
|
{ t } [ "a\n" R{{ a$.}}mds matches? ] unit-test
|
||||||
|
|
||||||
! Unicode categories
|
! Unicode categories
|
||||||
{ t } [ "a" R/ \p{L}/ matches? ] unit-test
|
{ t } [ "a" R(( \p{L})) matches? ] unit-test
|
||||||
{ t } [ "A" R/ \p{L}/ matches? ] unit-test
|
{ t } [ "A" R(( \p{L})) matches? ] unit-test
|
||||||
{ f } [ " " R/ \p{L}/ matches? ] unit-test
|
{ f } [ " " R(( \p{L})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{L}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{L})) matches? ] unit-test
|
||||||
{ f } [ "A" R/ \P{L}/ matches? ] unit-test
|
{ f } [ "A" R(( \P{L})) matches? ] unit-test
|
||||||
{ t } [ " " R/ \P{L}/ matches? ] unit-test
|
{ t } [ " " R(( \P{L})) matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \p{Ll}/ matches? ] unit-test
|
{ t } [ "a" R(( \p{Ll})) matches? ] unit-test
|
||||||
{ f } [ "A" R/ \p{Ll}/ matches? ] unit-test
|
{ f } [ "A" R(( \p{Ll})) matches? ] unit-test
|
||||||
{ f } [ " " R/ \p{Ll}/ matches? ] unit-test
|
{ f } [ " " R(( \p{Ll})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{Ll}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{Ll})) matches? ] unit-test
|
||||||
{ t } [ "A" R/ \P{Ll}/ matches? ] unit-test
|
{ t } [ "A" R(( \P{Ll})) matches? ] unit-test
|
||||||
{ t } [ " " R/ \P{Ll}/ matches? ] unit-test
|
{ t } [ " " R(( \P{Ll})) matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ \p{script=Latin}/ matches? ] unit-test
|
{ t } [ "a" R(( \p{script=Latin})) matches? ] unit-test
|
||||||
{ f } [ " " R/ \p{script=Latin}/ matches? ] unit-test
|
{ f } [ " " R(( \p{script=Latin})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{script=Latin}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{script=Latin})) matches? ] unit-test
|
||||||
{ t } [ " " R/ \P{script=Latin}/ matches? ] unit-test
|
{ t } [ " " R(( \P{script=Latin})) matches? ] unit-test
|
||||||
|
|
||||||
! These should be case-insensitive
|
! These should be case-insensitive
|
||||||
{ f } [ " " R/ \p{l}/ matches? ] unit-test
|
{ f } [ " " R(( \p{l})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{l}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{l})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{ll}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{ll})) matches? ] unit-test
|
||||||
{ t } [ " " R/ \P{LL}/ matches? ] unit-test
|
{ t } [ " " R(( \P{LL})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{sCriPt = latin})) matches? ] unit-test
|
||||||
{ t } [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
|
{ t } [ " " R(( \P{SCRIPT = laTIn})) matches? ] unit-test
|
||||||
|
|
||||||
! Logical operators
|
! Logical operators
|
||||||
{ t } [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
{ t } [ "a" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
{ t } [ "π" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
{ t } [ "A" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
{ f } [ "3" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
{ t } [ "a" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
{ t } [ "π" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
{ t } [ "A" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
{ f } [ "3" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
{ t } [ "a" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
{ f } [ "π" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
{ f } [ "A" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
{ f } [ "3" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
{ f } [ "a" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
{ t } [ "π" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
{ t } [ "A" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
{ f } [ "3" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||||
|
|
||||||
{ f } [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
{ f } [ "a" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
{ f } [ "π" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||||
{ t } [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
{ t } [ "A" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||||
{ f } [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
{ f } [ "3" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||||
|
|
||||||
{ t } [ " " R/ \P{alpha}/ matches? ] unit-test
|
{ t } [ " " R(( \P{alpha})) matches? ] unit-test
|
||||||
{ f } [ "" R/ \P{alpha}/ matches? ] unit-test
|
{ f } [ "" R(( \P{alpha})) matches? ] unit-test
|
||||||
{ f } [ "a " R/ \P{alpha}/ matches? ] unit-test
|
{ f } [ "a " R(( \P{alpha})) matches? ] unit-test
|
||||||
{ f } [ "a" R/ \P{alpha}/ matches? ] unit-test
|
{ f } [ "a" R(( \P{alpha})) matches? ] unit-test
|
||||||
|
|
|
@ -3,7 +3,8 @@
|
||||||
USING: accessors combinators kernel kernel.private math sequences
|
USING: accessors combinators kernel kernel.private math sequences
|
||||||
sequences.private strings sets assocs make lexer namespaces parser
|
sequences.private strings sets assocs make lexer namespaces parser
|
||||||
arrays fry locals regexp.parser splitting sorting regexp.ast
|
arrays fry locals regexp.parser splitting sorting regexp.ast
|
||||||
regexp.negation regexp.compiler compiler.units words math.ranges ;
|
regexp.negation regexp.compiler compiler.units words math.ranges
|
||||||
|
multiline ;
|
||||||
in: regexp
|
in: regexp
|
||||||
|
|
||||||
TUPLE: regexp
|
TUPLE: regexp
|
||||||
|
@ -216,7 +217,11 @@ PRIVATE>
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
SYNTAX: R/ parse-regexp ;
|
SYNTAX: \ R/ parse-regexp ;
|
||||||
|
SYNTAX: \ R[[ "]]" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||||
|
SYNTAX: \ R[=[ "]=]" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||||
|
SYNTAX: \ R(( "))" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||||
|
SYNTAX: \ R{{ "}}" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||||
|
|
||||||
use: vocabs.loader
|
use: vocabs.loader
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,6 @@ unicode ;
|
||||||
in: sorting.title
|
in: sorting.title
|
||||||
|
|
||||||
<< "title" [
|
<< "title" [
|
||||||
>lower dup R/ ^(the|a|an|el|la|los|las|il) / first-match
|
>lower dup R[[ ^(the|a|an|el|la|los|las|il) ]] first-match
|
||||||
[ to>> tail-slice ] when*
|
[ to>> tail-slice ] when*
|
||||||
] define-sorting >>
|
] define-sorting >>
|
||||||
|
|
|
@ -14,7 +14,7 @@ in: text-analysis
|
||||||
[ [ blank? ] trim ] map harvest ;
|
[ [ blank? ] trim ] map harvest ;
|
||||||
|
|
||||||
: split-paragraphs ( str -- seq )
|
: split-paragraphs ( str -- seq )
|
||||||
R/ \r?\n\r?\n/ re-split trimmed ;
|
R[[ \r?\n\r?\n]] re-split trimmed ;
|
||||||
|
|
||||||
<<
|
<<
|
||||||
CONSTANT: ABBREVIATIONS {
|
CONSTANT: ABBREVIATIONS {
|
||||||
|
@ -39,7 +39,7 @@ CONSTANT: ABBREVIATIONS {
|
||||||
: split-sentences ( str -- seq )
|
: split-sentences ( str -- seq )
|
||||||
|
|
||||||
! Mark end of sentences with EOS marker
|
! Mark end of sentences with EOS marker
|
||||||
R/ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/
|
R[[ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)]]
|
||||||
[ [ ".?!\r\n\"')]}" member? not ] cut-when "\x01" glue ]
|
[ [ ".?!\r\n\"')]}" member? not ] cut-when "\x01" glue ]
|
||||||
re-replace-with
|
re-replace-with
|
||||||
|
|
||||||
|
@ -63,46 +63,46 @@ CONSTANT: ABBREVIATIONS {
|
||||||
"\x01" split trimmed ;
|
"\x01" split trimmed ;
|
||||||
|
|
||||||
CONSTANT: sub-syllable {
|
CONSTANT: sub-syllable {
|
||||||
R/ [^aeiou]e$/ ! give, love, bone, done, ride ...
|
R[[ [^aeiou]e$]] ! give, love, bone, done, ride ...
|
||||||
R/ [aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$/
|
R[[ [aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$]]
|
||||||
! (passive) past participles and 3rd person sing present verbs:
|
! (passive) past participles and 3rd person sing present verbs:
|
||||||
! bared, liked, called, tricked, bashed, matched
|
! bared, liked, called, tricked, bashed, matched
|
||||||
|
|
||||||
R/ .e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$/
|
R[[ .e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$]]
|
||||||
! nominal, adjectival and adverbial derivatives from -e$ roots:
|
! nominal, adjectival and adverbial derivatives from -e$ roots:
|
||||||
! absolutely, nicely, likeness, basement, hopeless
|
! absolutely, nicely, likeness, basement, hopeless
|
||||||
! hopeful, tastefully, wasteful
|
! hopeful, tastefully, wasteful
|
||||||
|
|
||||||
R/ ion/ ! action, diction, fiction
|
R{{ ion}} ! action, diction, fiction
|
||||||
R/ [ct]ia[nl]/ ! special(ly), initial, physician, christian
|
R{{ [ct]ia[nl]}} ! special(ly), initial, physician, christian
|
||||||
R/ [^cx]iou/ ! illustrious, NOT spacious, gracious, anxious, noxious
|
R{{ [^cx]iou}} ! illustrious, NOT spacious, gracious, anxious, noxious
|
||||||
R/ sia$/ ! amnesia, polynesia
|
R{{ sia$}} ! amnesia, polynesia
|
||||||
R/ .gue$/ ! dialogue, intrigue, colleague
|
R{{ .gue$}} ! dialogue, intrigue, colleague
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
CONSTANT: add-syllable {
|
CONSTANT: add-syllable {
|
||||||
R/ i[aiou]/ ! alias, science, phobia
|
R{{ i[aiou]}} ! alias, science, phobia
|
||||||
R/ [dls]ien/ ! salient, gradient, transient
|
R{{ [dls]ien}} ! salient, gradient, transient
|
||||||
R/ [aeiouym]ble$/ ! -Vble, plus -mble
|
R{{ [aeiouym]ble$}} ! -Vble, plus -mble
|
||||||
R/ [aeiou]{3}/ ! agreeable
|
R(( [aeiou]{3})) ! agreeable
|
||||||
R/ ^mc/ ! mcwhatever
|
R{{ ^mc}} ! mcwhatever
|
||||||
R/ ism$/ ! sexism, racism
|
R{{ ism$}} ! sexism, racism
|
||||||
R/ (?:([^aeiouy])\1|ck|mp|ng)le$/ ! bubble, cattle, cackle, sample, angle
|
R{{ (?:([^aeiouy])\1|ck|mp|ng)le$}} ! bubble, cattle, cackle, sample, angle
|
||||||
R/ dnt$/ ! couldn/t
|
R{{ dnt$}} ! couldn/t
|
||||||
R/ [aeiou]y[aeiou]/ ! annoying, layer
|
R{{ [aeiou]y[aeiou]}} ! annoying, layer
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
: syllables ( str -- n )
|
: syllables ( str -- n )
|
||||||
dup length 1 = [ drop 1 ] [
|
dup length 1 = [ drop 1 ] [
|
||||||
>lower char: . swap remove
|
>lower char: . swap remove
|
||||||
[ R/ [aeiouy]+/ count-matches ]
|
[ R{{ [aeiouy]+}} count-matches ]
|
||||||
[ sub-syllable [ matches? ] with count - ]
|
[ sub-syllable [ matches? ] with count - ]
|
||||||
[ add-syllable [ matches? ] with count + ] tri
|
[ add-syllable [ matches? ] with count + ] tri
|
||||||
1 max
|
1 max
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: split-words ( str -- words )
|
: split-words ( str -- words )
|
||||||
R/ \b([a-z][a-z\-']*)\b/i all-matching-subseqs ;
|
R{{ \b([a-z][a-z\-']*)\b}}i all-matching-subseqs ;
|
||||||
|
|
||||||
TUPLE: text-analysis #paragraphs #sentences #chars #words
|
TUPLE: text-analysis #paragraphs #sentences #chars #words
|
||||||
#syllables #complex-words #unique-words #difficult-words ;
|
#syllables #complex-words #unique-words #difficult-words ;
|
||||||
|
|
|
@ -61,11 +61,11 @@ in: validators
|
||||||
! From http://www.regular-expressions.info/email.html
|
! From http://www.regular-expressions.info/email.html
|
||||||
320 v-max-length
|
320 v-max-length
|
||||||
"e-mail"
|
"e-mail"
|
||||||
R/ [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}/i
|
R(( [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}))i
|
||||||
v-regexp ;
|
v-regexp ;
|
||||||
|
|
||||||
: v-url ( str -- str )
|
: v-url ( str -- str )
|
||||||
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
|
"URL" R(( (?:ftp|http|https):\\/\\/\S+)) v-regexp ;
|
||||||
|
|
||||||
: v-captcha ( str -- str )
|
: v-captcha ( str -- str )
|
||||||
dup empty? [ "must remain blank" throw ] unless ;
|
dup empty? [ "must remain blank" throw ] unless ;
|
||||||
|
|
|
@ -13,7 +13,7 @@ in: xkcd
|
||||||
|
|
||||||
: comic-image ( url -- image )
|
: comic-image ( url -- image )
|
||||||
http-get nip
|
http-get nip
|
||||||
R/ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
|
R[[ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)]]
|
||||||
first-match >string load-http-image ;
|
first-match >string load-http-image ;
|
||||||
|
|
||||||
: comic-image. ( url -- )
|
: comic-image. ( url -- )
|
||||||
|
|
|
@ -15,16 +15,16 @@ CONSTANT: YAML_VALUE_TAG "tag:yaml.org,2002:value" ;
|
||||||
! http://www.yaml.org/spec/1.2/spec.html
|
! http://www.yaml.org/spec/1.2/spec.html
|
||||||
! 10.3. Core Schema
|
! 10.3. Core Schema
|
||||||
|
|
||||||
CONSTANT: re-null R/ null|Null|NULL|~/ ;
|
CONSTANT: re-null R[[ null|Null|NULL|~]] ;
|
||||||
CONSTANT: re-empty R/ / ;
|
CONSTANT: re-empty R[[ ]] ;
|
||||||
CONSTANT: re-bool R/ true|True|TRUE|false|False|FALSE/ ;
|
CONSTANT: re-bool R[[ true|True|TRUE|false|False|FALSE]] ;
|
||||||
CONSTANT: re-int10 R/ [-+]?[0-9]+/ ;
|
CONSTANT: re-int10 R[[ [-+]?[0-9]+]] ;
|
||||||
CONSTANT: re-int8 R/ 0o[0-7]+/ ;
|
CONSTANT: re-int8 R[[ 0o[0-7]+]] ;
|
||||||
CONSTANT: re-int16 R/ 0x[0-9a-fA-F]+/ ;
|
CONSTANT: re-int16 R[[ 0x[0-9a-fA-F]+]] ;
|
||||||
CONSTANT: re-number R/ [-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?/ ;
|
CONSTANT: re-number R[[ [-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?]] ;
|
||||||
CONSTANT: re-infinity R/ [-+]?\.(inf|Inf|INF)/ ;
|
CONSTANT: re-infinity R[[ [-+]?\.(inf|Inf|INF)]] ;
|
||||||
CONSTANT: re-nan R/ \.(nan|NaN|NAN)/ ;
|
CONSTANT: re-nan R[[ \.(nan|NaN|NAN)]] ;
|
||||||
CONSTANT: re-timestamp R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9][0-9]?-[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?:[0-9][0-9]:[0-9][0-9](\.[0-9]*)?([ \t]*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?))?/ ;
|
CONSTANT: re-timestamp R[[ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9][0-9]?-[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?:[0-9][0-9]:[0-9][0-9](\.[0-9]*)?([ \t]*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?))?]] ;
|
||||||
|
|
||||||
: resolve-normal-plain-scalar ( str -- tag )
|
: resolve-normal-plain-scalar ( str -- tag )
|
||||||
{
|
{
|
||||||
|
@ -41,8 +41,8 @@ CONSTANT: re-timestamp R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][
|
||||||
[ drop YAML_STR_TAG ]
|
[ drop YAML_STR_TAG ]
|
||||||
} cond-case ;
|
} cond-case ;
|
||||||
|
|
||||||
CONSTANT: re-merge R/ <</ ;
|
CONSTANT: re-merge R[[ <<]] ;
|
||||||
CONSTANT: re-value R/ =/ ;
|
CONSTANT: re-value R[[ =]] ;
|
||||||
: (resolve-mapping-key-plain-scalar) ( str -- tag )
|
: (resolve-mapping-key-plain-scalar) ( str -- tag )
|
||||||
{
|
{
|
||||||
{ [ re-merge matches? ] [ YAML_MERGE_TAG ] }
|
{ [ re-merge matches? ] [ YAML_MERGE_TAG ] }
|
||||||
|
@ -87,7 +87,7 @@ CONSTANT: YAML_OMAP_TAG "tag:yaml.org,2002:omap" ;
|
||||||
CONSTANT: YAML_PAIRS_TAG "tag:yaml.org,2002:pairs" ;
|
CONSTANT: YAML_PAIRS_TAG "tag:yaml.org,2002:pairs" ;
|
||||||
CONSTANT: YAML_SET_TAG "tag:yaml.org,2002:set" ;
|
CONSTANT: YAML_SET_TAG "tag:yaml.org,2002:set" ;
|
||||||
|
|
||||||
: construct-bool ( str -- ? ) R/ true|True|TRUE/ matches? ;
|
: construct-bool ( str -- ? ) R[[ true|True|TRUE]] matches? ;
|
||||||
|
|
||||||
: construct-int ( str -- n ) string>number ;
|
: construct-int ( str -- n ) string>number ;
|
||||||
|
|
||||||
|
@ -107,14 +107,14 @@ CONSTANT: YAML_SET_TAG "tag:yaml.org,2002:set" ;
|
||||||
! - months, days and hours on 1 digit
|
! - months, days and hours on 1 digit
|
||||||
! preprocess to fix this mess...
|
! preprocess to fix this mess...
|
||||||
: yaml>rfc3339 ( str -- str' )
|
: yaml>rfc3339 ( str -- str' )
|
||||||
R/ -[0-9][^0-9]/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
R{{ -[0-9][^0-9]}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||||
R/ -[0-9][^0-9]/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
R{{ -[0-9][^0-9]}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||||
R/ [^0-9][0-9]:/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
R{{ [^0-9][0-9]:}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||||
R/ [ \t]+/ " " re-replace
|
R{{ [ \t]+}} " " re-replace
|
||||||
char: \: over index cut char: space swap remove append ;
|
char: \: over index cut char: space swap remove append ;
|
||||||
|
|
||||||
: construct-timestamp ( obj -- obj' )
|
: construct-timestamp ( obj -- obj' )
|
||||||
dup R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]/ matches?
|
dup R{{ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]}} matches?
|
||||||
[ ymd>timestamp ] [ yaml>rfc3339 rfc3339>timestamp ] if ;
|
[ ymd>timestamp ] [ yaml>rfc3339 rfc3339>timestamp ] if ;
|
||||||
|
|
||||||
TUPLE: yaml-merge ;
|
TUPLE: yaml-merge ;
|
||||||
|
|
|
@ -482,8 +482,8 @@ CONSTANT: nested-merge-obj H{
|
||||||
${ nested-merge-obj } [ $ nested-merge-str yaml> ] unit-test
|
${ nested-merge-obj } [ $ nested-merge-str yaml> ] unit-test
|
||||||
${ nested-merge-obj } [ $ nested-merge-obj >yaml yaml> ] unit-test
|
${ nested-merge-obj } [ $ nested-merge-obj >yaml yaml> ] unit-test
|
||||||
|
|
||||||
CONSTANT: recursive-merge-str "--- &A ;
|
CONSTANT: recursive-merge-str "--- &A
|
||||||
<<: *A"
|
<<: *A" ;
|
||||||
CONSTANT: recursive-merge-obj H{ } ;
|
CONSTANT: recursive-merge-obj H{ } ;
|
||||||
|
|
||||||
${ recursive-merge-obj } [ $ recursive-merge-str yaml> ] unit-test
|
${ recursive-merge-obj } [ $ recursive-merge-str yaml> ] unit-test
|
||||||
|
|
Loading…
Reference in New Issue