regexp: make it use R{{ R[[ R(( for now.
parent
0e71afabe0
commit
3199c23f17
|
@ -7,19 +7,19 @@ in: benchmark.regex-dna
|
|||
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
||||
|
||||
: strip-line-breaks ( string -- string' )
|
||||
R/ >.*\n|\n/ "" re-replace ;
|
||||
R[[ >.*\n|\n]] "" re-replace ;
|
||||
|
||||
: count-patterns ( string -- )
|
||||
{
|
||||
R/ agggtaaa|tttaccct/i
|
||||
R/ [cgt]gggtaaa|tttaccc[acg]/i
|
||||
R/ a[act]ggtaaa|tttacc[agt]t/i
|
||||
R/ ag[act]gtaaa|tttac[agt]ct/i
|
||||
R/ agg[act]taaa|ttta[agt]cct/i
|
||||
R/ aggg[acg]aaa|ttt[cgt]ccct/i
|
||||
R/ agggt[cgt]aa|tt[acg]accct/i
|
||||
R/ agggta[cgt]a|t[acg]taccct/i
|
||||
R/ agggtaa[cgt]|[acg]ttaccct/i
|
||||
R[[ agggtaaa|tttaccct]]i
|
||||
R(( [cgt]gggtaaa|tttaccc[acg]))i
|
||||
R[[ a[act]ggtaaa|tttacc[agt]t]]i
|
||||
R[[ ag[act]gtaaa|tttac[agt]ct]]i
|
||||
R[[ agg[act]taaa|ttta[agt]cct]]i
|
||||
R[[ aggg[acg]aaa|ttt[cgt]ccct]]i
|
||||
R[[ agggt[cgt]aa|tt[acg]accct]]i
|
||||
R[[ agggta[cgt]a|t[acg]taccct]]i
|
||||
R[[ agggtaa[cgt]|[acg]ttaccct]]i
|
||||
} [
|
||||
[ raw>> write bl ]
|
||||
[ count-matches number>string print ]
|
||||
|
|
|
@ -9,8 +9,8 @@ in: benchmark.regexp
|
|||
20,000 iota [ number>string ] map
|
||||
200 iota [ 1 + char: a <string> ] map
|
||||
'[
|
||||
_ R/ \d+/ [ matches? ] curry all? t assert=
|
||||
_ R/ [a]+/ [ matches? ] curry all? t assert=
|
||||
_ R[[ \d+]] [ matches? ] curry all? t assert=
|
||||
_ R[[ [a]+]] [ matches? ] curry all? t assert=
|
||||
] times ;
|
||||
|
||||
main: regexp-benchmark
|
||||
|
|
|
@ -134,7 +134,7 @@ CONSTANT: galois-slides
|
|||
"Implemented with library code"
|
||||
{ $code "use: regexp" }
|
||||
{ $code "\"ababbc\" \"[ab]+c\" <regexp> matches? ." }
|
||||
{ $code "\"ababbc\" R/ [ab]+c/ matches? ." }
|
||||
{ $code "\"ababbc\" R[[ [ab]+c]] matches? ." }
|
||||
}
|
||||
{ $slide "Example: memoization"
|
||||
{ "Memoization with " { $link postpone\ MEMO: } }
|
||||
|
|
|
@ -136,4 +136,4 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
|
|||
] unit-test
|
||||
|
||||
! Test that the regexp syntax works.
|
||||
{ t } [ "1234abcd" regexp:R/ ^\d+\w+$/ matches? ] unit-test
|
||||
{ t } [ "1234abcd" regexp:R[[ ^\d+\w+$]] matches? ] unit-test
|
||||
|
|
|
@ -869,7 +869,7 @@ CONSTANT: emoji H{
|
|||
} ;
|
||||
|
||||
: emojify ( str -- str' )
|
||||
R/ :([^:])+:/ [ >string emoji at ] re-replace-with ;
|
||||
R[[ :([^:])+:]] [ >string emoji at ] re-replace-with ;
|
||||
|
||||
: emojify-main ( -- )
|
||||
command-line get [
|
||||
|
|
|
@ -7,10 +7,10 @@ unicode ;
|
|||
in: globs
|
||||
|
||||
: not-path-separator ( -- sep )
|
||||
os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
|
||||
os windows? R{{ [^\\/\\]}} R{{ [^\\/]}} ? ; foldable
|
||||
|
||||
: wild-path-separator ( -- sep )
|
||||
os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable
|
||||
os windows? R{{ [^\\/\\][\\/\\]|[^\\/\\]}} R{{ [^\\/][\\/]|[^\\/]}} ? ; foldable
|
||||
|
||||
: <glob> ( string -- obj ) EBNF{{
|
||||
|
||||
|
|
|
@ -2350,7 +2350,7 @@ CONSTANT: html5 H{
|
|||
"#" ?head [ numeric-charref ] [ named-charref ] if ;
|
||||
|
||||
CONSTANT: re-charref
|
||||
R/ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?/ ;
|
||||
R[[ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?]] ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
|
|
@ -26,14 +26,14 @@ ERROR: bad-location str ;
|
|||
} case ;
|
||||
|
||||
: string>longitude ( str -- lon/f )
|
||||
dup R/ \d+-\d+(-\d+(\.\d+)?)?[WE]/ matches? [
|
||||
dup R{{ \d+-\d+(-\d+(\.\d+)?)?[WE]}} matches? [
|
||||
unclip-last
|
||||
[ parse-location ]
|
||||
[ char: W = [ neg ] when ] bi*
|
||||
] [ drop f ] if ;
|
||||
|
||||
: string>latitude ( str -- lat/f )
|
||||
dup R/ \d+-\d+(-\d+(\.\d+)?)?[NS]/ matches? [
|
||||
dup R{{ \d+-\d+(-\d+(\.\d+)?)?[NS]}} matches? [
|
||||
unclip-last
|
||||
[ parse-location ]
|
||||
[ char: S = [ neg ] when ] bi*
|
||||
|
@ -277,16 +277,16 @@ CONSTANT: sky H{
|
|||
unclip [ string>number ] [ char: A = ] bi*
|
||||
[ 100 /f "%.2f Hg" sprintf ] [ "%s hPa" sprintf ] if ;
|
||||
|
||||
CONSTANT: re-timestamp R/ \d{6}Z/ ;
|
||||
CONSTANT: re-station R/ \w{4}/ ;
|
||||
CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/ ;
|
||||
CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/ ;
|
||||
CONSTANT: re-wind-variable R/ \d{3}V\d{3}/ ;
|
||||
CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/ ;
|
||||
CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/ ;
|
||||
CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/ ;
|
||||
CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/ ;
|
||||
CONSTANT: re-altimeter R/ [AQ]\d{4}/ ;
|
||||
CONSTANT: re-timestamp R{{ \d{6}Z}} ;
|
||||
CONSTANT: re-station R(( \w{4})) ;
|
||||
CONSTANT: re-temperature R{{ [M]?\d{2}\\/([M]?\d{2})?}} ;
|
||||
CONSTANT: re-wind R{{ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT}} ;
|
||||
CONSTANT: re-wind-variable R[[ \d{3}V\d{3}]] ;
|
||||
CONSTANT: re-visibility R{{ [MP]?\d+(\\/\d+)?SM}} ;
|
||||
CONSTANT: re-rvr R{{ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT}} ;
|
||||
CONSTANT: re-weather R{{ [+-]?(VC)?(\w{2}|\w{4})}} ;
|
||||
CONSTANT: re-sky-condition R{{ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)}} ;
|
||||
CONSTANT: re-altimeter R(( [AQ]\d{4})) ;
|
||||
|
||||
: find-one ( seq quot: ( elt -- ? ) -- seq elt/f )
|
||||
dupd find drop [ tail unclip ] [ f ] if* ; inline
|
||||
|
@ -462,7 +462,7 @@ CONSTANT: high-clouds H{
|
|||
: parse-lightning ( str -- str' )
|
||||
"LTG" ?head drop 2 group [ lightning at ] map " " join ;
|
||||
|
||||
CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/ ;
|
||||
CONSTANT: re-recent-weather R{{ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+}} ;
|
||||
|
||||
: parse-began/ended ( str -- str' )
|
||||
unclip swap
|
||||
|
@ -512,27 +512,27 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/ ;
|
|||
: parse-remark ( str -- str' )
|
||||
{
|
||||
{ [ dup glossary key? ] [ glossary at ] }
|
||||
{ [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
|
||||
{ [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
|
||||
{ [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
|
||||
{ [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
|
||||
{ [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
|
||||
{ [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
|
||||
{ [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
|
||||
{ [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
|
||||
{ [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
|
||||
{ [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
|
||||
{ [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
|
||||
{ [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
|
||||
{ [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
|
||||
{ [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
|
||||
{ [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
|
||||
{ [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
|
||||
{ [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
|
||||
{ [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
|
||||
{ [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
|
||||
{ [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
|
||||
{ [ dup R/ \d+.\d+/ matches? ] [ ] }
|
||||
{ [ dup R(( 1\d{4})) matches? ] [ parse-6hr-max-temp ] }
|
||||
{ [ dup R(( 2\d{4})) matches? ] [ parse-6hr-min-temp ] }
|
||||
{ [ dup R(( 4\d{8})) matches? ] [ parse-24hr-temp ] }
|
||||
{ [ dup R(( 4\\/\d{3})) matches? ] [ parse-snow-depth ] }
|
||||
{ [ dup R(( 5\d{4})) matches? ] [ parse-1hr-pressure ] }
|
||||
{ [ dup R(( 6[\d\\/]{4})) matches? ] [ parse-6hr-precipitation ] }
|
||||
{ [ dup R(( 7\d{4})) matches? ] [ parse-24hr-precipitation ] }
|
||||
{ [ dup R(( 8\\/\d{3})) matches? ] [ parse-cloud-cover ] }
|
||||
{ [ dup R(( 931\d{3})) matches? ] [ parse-6hr-snowfall ] }
|
||||
{ [ dup R(( 933\d{3})) matches? ] [ parse-water-equivalent-snow ] }
|
||||
{ [ dup R(( 98\d{3})) matches? ] [ parse-duration-of-sunshine ] }
|
||||
{ [ dup R(( T\d{4,8})) matches? ] [ parse-1hr-temp ] }
|
||||
{ [ dup R(( \d{3}\d{2,3}\\/\d{2,4})) matches? ] [ parse-peak-wind ] }
|
||||
{ [ dup R(( P\d{4})) matches? ] [ parse-1hr-precipitation ] }
|
||||
{ [ dup R(( SLP\d{3})) matches? ] [ parse-sea-level-pressure ] }
|
||||
{ [ dup R(( LTG\w+)) matches? ] [ parse-lightning ] }
|
||||
{ [ dup R(( PROB\d+)) matches? ] [ parse-probability ] }
|
||||
{ [ dup R(( \d{3}V\d{3})) matches? ] [ parse-varying ] }
|
||||
{ [ dup R(( [^-]+(-[^-]+)+)) matches? ] [ parse-from-to ] }
|
||||
{ [ dup R(( [^\\/]+(\\/[^\\/]+)+)) matches? ] [ ] }
|
||||
{ [ dup R(( \d+.\d+)) matches? ] [ ] }
|
||||
{ [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
|
||||
{ [ dup re-weather matches? ] [ parse-weather ] }
|
||||
{ [ dup re-sky-condition matches? ] [ parse-sky-condition ] }
|
||||
|
@ -596,12 +596,12 @@ M: string metar.
|
|||
[ parse-altitude ] [ parse-wind ] bi* prepend
|
||||
"wind shear " prepend ;
|
||||
|
||||
CONSTANT: re-from-timestamp R/ FM\d{6}/ ;
|
||||
CONSTANT: re-from-timestamp R(( FM\d{6})) ;
|
||||
|
||||
: parse-from-timestamp ( str -- str' )
|
||||
"FM" ?head drop parse-timestamp ;
|
||||
|
||||
CONSTANT: re-valid-timestamp R/ \d{4}\/\d{4}/ ;
|
||||
CONSTANT: re-valid-timestamp R(( \d{4}\/\d{4})) ;
|
||||
|
||||
: parse-valid-timestamp ( str -- str' )
|
||||
"/" split1 [ "00" append parse-timestamp ] bi@ " to " glue ;
|
||||
|
|
|
@ -10,7 +10,7 @@ in: regexp.combinators.tests
|
|||
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
||||
|
||||
: conj ( -- regexp )
|
||||
{ R/ .*a/ R/ b.*/ } <and> ;
|
||||
{ R(( .*a)) R(( b.*)) } <and> ;
|
||||
|
||||
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
||||
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
||||
|
|
|
@ -13,7 +13,7 @@ in: regexp.combinators
|
|||
|
||||
PRIVATE>
|
||||
|
||||
CONSTANT: <nothing> R/ (?~.*)/s ;
|
||||
CONSTANT: <nothing> R[[ (?~.*)]]s ;
|
||||
|
||||
: <literal> ( string -- regexp )
|
||||
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
||||
|
|
|
@ -7,7 +7,7 @@ in: regexp.prettyprint
|
|||
M: regexp pprint*
|
||||
[
|
||||
[
|
||||
[ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ]
|
||||
[ raw>> "R[[ " % % "]]" % ]
|
||||
[ options>> options>string % ] bi
|
||||
] "" make
|
||||
] keep present-text ;
|
||||
|
|
|
@ -27,20 +27,20 @@ ARTICLE: "regexp" "Regular expressions"
|
|||
|
||||
ARTICLE: "regexp-intro" "A quick introduction to regular expressions"
|
||||
"Regular expressions are a terse way to do certain simple string processing tasks. For example, to replace all instances of " { $snippet "foo" } " in one string with " { $snippet "bar" } ", the following can be used:"
|
||||
{ $code "R/ foo/ \"bar\" re-replace" }
|
||||
{ $code "R(( foo)) \"bar\" re-replace" }
|
||||
"That could be done with sequence operations, but consider doing this replacement for an arbitrary number of o's, at least two:"
|
||||
{ $code "R/ foo+/ \"bar\" re-replace" }
|
||||
{ $code "R(( foo+)) \"bar\" re-replace" }
|
||||
"The " { $snippet "+" } " operator matches one or more occurrences of the previous expression; in this case " { $snippet "o" } ". Another useful feature is alternation. Say we want to do this replacement with fooooo or boooo. Then we could use the code"
|
||||
{ $code "R/ (f|b)oo+/ \"bar\" re-replace" }
|
||||
{ $code "R(( (f|b)oo+)) \"bar\" re-replace" }
|
||||
"To search a file for all lines that match a given regular expression, you could use code like this:"
|
||||
{ $code "\"file.txt\" ascii file-lines [ R/ (f|b)oo+/ re-contains? ] filter" }
|
||||
{ $code "\"file.txt\" ascii file-lines [ R(( (f|b)oo+)) re-contains? ] filter" }
|
||||
"To test if a string in its entirety matches a regular expression, the following can be used:"
|
||||
{ $example "use: regexp \"fooo\" R/ (b|f)oo+/ matches? ." "t" }
|
||||
{ $example "use: regexp \"fooo\" R(( (b|f)oo+)) matches? ." "t" }
|
||||
"Regular expressions can't be used for all parsing tasks. For example, they are not powerful enough to match balancing parentheses." ;
|
||||
|
||||
ARTICLE: "regexp-construction" "Constructing regular expressions"
|
||||
"Most of the time, regular expressions are literals and the parsing word should be used, to construct them at parse time. This ensures that they are only compiled once, and gives parse time syntax checking."
|
||||
{ $subsections postpone\ R/ }
|
||||
{ $subsections \ R[[ }
|
||||
"Sometimes, regular expressions need to be constructed at run time instead; for example, in a text editor, the user might input a regular expression to search for in a document."
|
||||
{ $subsections <regexp> <optioned-regexp> }
|
||||
"Another approach is to use " { $vocab-link "regexp.combinators" } "." ;
|
||||
|
@ -48,9 +48,9 @@ ARTICLE: "regexp-construction" "Constructing regular expressions"
|
|||
ARTICLE: "regexp-syntax" "Regular expression syntax"
|
||||
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
|
||||
{ $heading "Characters" }
|
||||
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
||||
"At its core, regular expressions consist of character literals. For example, " { $snippet "R(( f))" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
||||
{ $heading "Concatenation, alternation and grouping" }
|
||||
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
|
||||
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R(( ab))" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R[[ f(oo|ar)]]" } " would match either 'foo' or 'far'."
|
||||
{ $heading "Character classes" }
|
||||
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
|
||||
{ $heading "Predefined character classes" }
|
||||
|
@ -110,7 +110,7 @@ ARTICLE: "regexp-syntax" "Regular expression syntax"
|
|||
{ { $snippet "(?<=a)" } "Asserts that the current position is immediately preceded by a" }
|
||||
{ { $snippet "(?<!a)" } "Asserts that the current position is not immediately preceded by a" } }
|
||||
{ $heading "Quotation" }
|
||||
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
|
||||
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R[[ \\Qfoo\\bar|baz()\\E]]" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
|
||||
{ $heading "Unsupported features" }
|
||||
{ $subheading "Group capture" }
|
||||
{ $subheading "Reluctant and possessive quantifiers" }
|
||||
|
@ -140,23 +140,23 @@ $nl
|
|||
} ;
|
||||
|
||||
HELP: case-insensitive
|
||||
{ $syntax "R/ .../i" }
|
||||
{ $syntax "R[[ ...]]i" }
|
||||
{ $description "On regexps, the " { $snippet "i" } " option makes the match case-insensitive. Currently, this is handled incorrectly with respect to Unicode, as characters like ß do not expand into SS in upper case. This should be fixed in a future version." } ;
|
||||
|
||||
HELP: unix-lines
|
||||
{ $syntax "R/ .../d" }
|
||||
{ $syntax "R[[ ...]]d" }
|
||||
{ $description "With this mode, only newlines (" { $snippet "\\n" } ") are recognized for line breaking. This affects " { $snippet "$" } " and " { $snippet "^" } " when in multiline mode." } ;
|
||||
|
||||
HELP: multiline
|
||||
{ $syntax "R/ .../m" }
|
||||
{ $syntax "R[[ ...]]m" }
|
||||
{ $description "This mode makes the zero-width constraints " { $snippet "$" } " and " { $snippet "^" } " match the beginning or end of a line. Otherwise, they only match the beginning or end of the input text. This can be used together with " { $link dotall } "." } ;
|
||||
|
||||
HELP: dotall
|
||||
{ $syntax "R/ .../s" }
|
||||
{ $syntax "R[[ ...]]s" }
|
||||
{ $description "This mode, traditionally called single line mode, makes " { $snippet "." } " match everything, including line breaks. By default, it does not match line breaking characters. This can be used together with " { $link multiline } "." } ;
|
||||
|
||||
HELP: reversed-regexp
|
||||
{ $syntax "R/ .../r" }
|
||||
{ $syntax "R[[ ...]]r" }
|
||||
{ $description "When running a regexp compiled with this mode, matches will start from the end of the input string, going towards the beginning." } ;
|
||||
|
||||
ARTICLE: "regexp-theory" "The theory of regular expressions"
|
||||
|
@ -202,8 +202,8 @@ HELP: <optioned-regexp>
|
|||
{ $values { "string" string } { "options" "a string of " { $link "regexp-options" } } { "regexp" regexp } }
|
||||
{ $description "Given a string in regular expression syntax, and a string of options, creates a regular expression object. When it is first used for matching, a DFA is compiled, and this DFA is stored for reuse so it is only compiled once." } ;
|
||||
|
||||
HELP: R/
|
||||
{ $syntax "R/ foo.*|[a-zA-Z]bar/options" }
|
||||
HELP: \ R[[
|
||||
{ $syntax "R[[ foo.*|[a-zA-Z]bar]]options" }
|
||||
{ $description "Literal syntax for a regular expression. When this syntax is used, the DFA is compiled at compile-time, rather than on first use. The syntax for the " { $snippet "options" } " string is documented in " { $link "regexp-options" } "." } ;
|
||||
|
||||
HELP: regexp
|
||||
|
@ -231,7 +231,7 @@ HELP: re-replace
|
|||
{ $examples
|
||||
{ $example
|
||||
"USING: prettyprint regexp ;"
|
||||
"\"python is pythonic\" R/ python/ \"factor\" re-replace ."
|
||||
"\"python is pythonic\" R[[ python]] \"factor\" re-replace ."
|
||||
"\"factor is factoric\"" }
|
||||
} ;
|
||||
|
||||
|
@ -241,7 +241,7 @@ HELP: re-replace-with
|
|||
{ $examples
|
||||
{ $example
|
||||
"USING: ascii prettyprint regexp ;"
|
||||
"\"abcdefghi\" R/ [aeiou]/ [ >upper ] re-replace-with ."
|
||||
"\"abcdefghi\" R(( [aeiou])) [ >upper ] re-replace-with ."
|
||||
"\"AbcdEfghI\"" }
|
||||
} ;
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ in: regexp-tests
|
|||
! Off by default.
|
||||
{ f } [ "\n" "." <regexp> matches? ] unit-test
|
||||
{ t } [ "\n" "(?s:.)" <regexp> matches? ] unit-test
|
||||
{ t } [ "\n" R/ ./s matches? ] unit-test
|
||||
{ t } [ "\n" R[[ .]]s matches? ] unit-test
|
||||
{ f } [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
|
||||
|
||||
{ f } [ "" ".+" <regexp> matches? ] unit-test
|
||||
|
@ -60,7 +60,7 @@ in: regexp-tests
|
|||
|
||||
{ t } [ "/" "\\/" <regexp> matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ a/i matches? ] unit-test
|
||||
{ t } [ "a" R[[ a]]i matches? ] unit-test
|
||||
|
||||
{ t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
{ t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
|
@ -208,39 +208,39 @@ in: regexp-tests
|
|||
{ "aaa" } [ "aaacb" "a*" <regexp> first-match >string ] unit-test
|
||||
{ "aa" } [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
|
||||
|
||||
{ t } [ "aaa" R/ AAA/i matches? ] unit-test
|
||||
{ f } [ "aax" R/ AAA/i matches? ] unit-test
|
||||
{ t } [ "aaa" R/ A*/i matches? ] unit-test
|
||||
{ f } [ "aaba" R/ A*/i matches? ] unit-test
|
||||
{ t } [ "b" R/ [AB]/i matches? ] unit-test
|
||||
{ f } [ "c" R/ [AB]/i matches? ] unit-test
|
||||
{ t } [ "c" R/ [A-Z]/i matches? ] unit-test
|
||||
{ f } [ "3" R/ [A-Z]/i matches? ] unit-test
|
||||
{ t } [ "aaa" R[[ AAA]]i matches? ] unit-test
|
||||
{ f } [ "aax" R[[ AAA]]i matches? ] unit-test
|
||||
{ t } [ "aaa" R[[ A*]]i matches? ] unit-test
|
||||
{ f } [ "aaba" R[[ A*]]i matches? ] unit-test
|
||||
{ t } [ "b" R{{ [AB]}}i matches? ] unit-test
|
||||
{ f } [ "c" R{{ [AB]}}i matches? ] unit-test
|
||||
{ t } [ "c" R{{ [A-Z]}}i matches? ] unit-test
|
||||
{ f } [ "3" R{{ [A-Z]}}i matches? ] unit-test
|
||||
|
||||
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||
{ t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||
{ t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ (?-i:a)/i matches? ] unit-test
|
||||
{ t } [ "a" R/ (?-i:a)/i matches? ] unit-test
|
||||
{ f } [ "A" R/ (?-i:a)/i matches? ] unit-test
|
||||
{ f } [ "A" R/ (?-i:a)/i matches? ] unit-test
|
||||
{ t } [ "a" R{{ (?-i:a)}}i matches? ] unit-test
|
||||
{ t } [ "a" R{{ (?-i:a)}}i matches? ] unit-test
|
||||
{ f } [ "A" R{{ (?-i:a)}}i matches? ] unit-test
|
||||
{ f } [ "A" R{{ (?-i:a)}}i matches? ] unit-test
|
||||
|
||||
{ f } [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||
{ t } [ "A" R/ [a-z]/i matches? ] unit-test
|
||||
{ t } [ "A" R{{ [a-z]}}i matches? ] unit-test
|
||||
|
||||
{ f } [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||
{ t } [ "A" R/ \p{Lower}/i matches? ] unit-test
|
||||
{ t } [ "A" R[[ \p{Lower}]]i matches? ] unit-test
|
||||
|
||||
{ t } [ "abc" R/ abc/r matches? ] unit-test
|
||||
{ t } [ "abc" R/ a[bB][cC]/r matches? ] unit-test
|
||||
{ t } [ "abc" R{{ abc}}r matches? ] unit-test
|
||||
{ t } [ "abc" R{{ a[bB][cC]}}r matches? ] unit-test
|
||||
|
||||
{ t } [ 3 "xabc" R/ abc/r match-index-from >boolean ] unit-test
|
||||
{ t } [ 3 "xabc" R/ a[bB][cC]/r match-index-from >boolean ] unit-test
|
||||
{ t } [ 3 "xabc" R{{ abc}}r match-index-from >boolean ] unit-test
|
||||
{ t } [ 3 "xabc" R{{ a[bB][cC]}}r match-index-from >boolean ] unit-test
|
||||
|
||||
{ 2 } [ 0 "llamallol" R/ ll/ match-index-from ] unit-test
|
||||
{ 5 } [ 8 "lolmallol" R/ lol/r match-index-from ] unit-test
|
||||
{ 2 } [ 0 "llamallol" R{{ ll}} match-index-from ] unit-test
|
||||
{ 5 } [ 8 "lolmallol" R{{ lol}}r match-index-from ] unit-test
|
||||
|
||||
{ t } [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
{ f } [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
|
@ -274,66 +274,66 @@ in: regexp-tests
|
|||
{ "b" } [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
|
||||
|
||||
{ T{ slice { from 5 } { to 10 } { seq "hellohello" } } }
|
||||
[ "hellohello" R/ hello/r first-match ]
|
||||
[ "hellohello" R{{ hello}}r first-match ]
|
||||
unit-test
|
||||
|
||||
{ { "1" "2" "3" "4" } }
|
||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||
[ "1ABC2DEF3GHI4" R{{ [A-Z]+}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "1" "2" "3" "4" "" } }
|
||||
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||
[ "1ABC2DEF3GHI4JK" R{{ [A-Z]+}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "" } } [ "" R/ =/ re-split [ >string ] map ] unit-test
|
||||
{ { "" } } [ "" R{{ =}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "a" "" } } [ "a=" R/ =/ re-split [ >string ] map ] unit-test
|
||||
{ { "a" "" } } [ "a=" R{{ =}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "he" "o" } } [ "hello" R/ l+/ re-split [ >string ] map ] unit-test
|
||||
{ { "he" "o" } } [ "hello" R{{ l+}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "h" "llo" } } [ "hello" R/ e+/ re-split [ >string ] map ] unit-test
|
||||
{ { "h" "llo" } } [ "hello" R{{ e+}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { "" "h" "" "l" "l" "o" "" } } [ "hello" R/ e*/ re-split [ >string ] map ] unit-test
|
||||
{ { "" "h" "" "l" "l" "o" "" } } [ "hello" R{{ e*}} re-split [ >string ] map ] unit-test
|
||||
|
||||
{ { { 0 5 "hellohello" } { 5 10 "hellohello" } } }
|
||||
[ "hellohello" R/ hello/ [ 3array ] map-matches ]
|
||||
[ "hellohello" R{{ hello}} [ 3array ] map-matches ]
|
||||
unit-test
|
||||
|
||||
{ { { 5 10 "hellohello" } { 0 5 "hellohello" } } }
|
||||
[ "hellohello" R/ hello/r [ 3array ] map-matches ]
|
||||
[ "hellohello" R{{ hello}}r [ 3array ] map-matches ]
|
||||
unit-test
|
||||
|
||||
{ { "ABC" "DEF" "GHI" } }
|
||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test
|
||||
[ "1ABC2DEF3GHI4" R{{ [A-Z]+}} all-matching-subseqs ] unit-test
|
||||
|
||||
{ { "ee" "e" } } [ "heellohello" R/ e+/ all-matching-subseqs ] unit-test
|
||||
{ { "e" "ee" } } [ "heellohello" R/ e+/r all-matching-subseqs ] unit-test
|
||||
{ { "ee" "e" } } [ "heellohello" R{{ e+}} all-matching-subseqs ] unit-test
|
||||
{ { "e" "ee" } } [ "heellohello" R{{ e+}}r all-matching-subseqs ] unit-test
|
||||
|
||||
{ 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
|
||||
{ 3 } [ "1ABC2DEF3GHI4" R{{ [A-Z]+}} count-matches ] unit-test
|
||||
|
||||
{ 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/r count-matches ] unit-test
|
||||
{ 3 } [ "1ABC2DEF3GHI4" R{{ [A-Z]+}}r count-matches ] unit-test
|
||||
|
||||
{ 1 } [ "" R/ / count-matches ] unit-test
|
||||
{ 1 } [ "" R{{ }} count-matches ] unit-test
|
||||
|
||||
{ 1 } [ "" R/ /r count-matches ] unit-test
|
||||
{ 1 } [ "" R{{ }}r count-matches ] unit-test
|
||||
|
||||
{ 0 } [ "123" R/ [A-Z]+/ count-matches ] unit-test
|
||||
{ 0 } [ "123" R{{ [A-Z]+}} count-matches ] unit-test
|
||||
|
||||
{ 0 } [ "123" R/ [A-Z]+/r count-matches ] unit-test
|
||||
{ 0 } [ "123" R{{ [A-Z]+}}r count-matches ] unit-test
|
||||
|
||||
{ 6 } [ "hello" R/ e*/ count-matches ] unit-test
|
||||
{ 6 } [ "hello" R{{ e*}} count-matches ] unit-test
|
||||
|
||||
{ 6 } [ "hello" R/ e*/r count-matches ] unit-test
|
||||
{ 6 } [ "hello" R{{ e*}}r count-matches ] unit-test
|
||||
|
||||
{ 11 } [ "hello world" R/ l*/ count-matches ] unit-test
|
||||
{ 11 } [ "hello world" R{{ l*}} count-matches ] unit-test
|
||||
|
||||
{ 11 } [ "hello world" R/ l*/r count-matches ] unit-test
|
||||
{ 11 } [ "hello world" R{{ l*}}r count-matches ] unit-test
|
||||
|
||||
{ 1 } [ "hello" R/ e+/ count-matches ] unit-test
|
||||
{ 1 } [ "hello" R{{ e+}} count-matches ] unit-test
|
||||
|
||||
{ 2 } [ "hello world" R/ l+/r count-matches ] unit-test
|
||||
{ 2 } [ "hello world" R{{ l+}}r count-matches ] unit-test
|
||||
|
||||
{ "1.2.3.4." } [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
|
||||
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
|
||||
{ "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||
{ "1.2.3.4." } [ "1ABC2DEF3GHI4JK" R{{ [A-Z]+}} "." re-replace ] unit-test
|
||||
{ "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R{{ e*}} "X" re-replace ] unit-test
|
||||
{ "-- title --" } [ "== title ==" R{{ =}} "-" re-replace ] unit-test
|
||||
|
||||
{ "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||
{ "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||
|
@ -349,124 +349,124 @@ unit-test
|
|||
{ f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
{ t } [ "a" R/ a/ matches? ] unit-test
|
||||
{ t } [ "a" R{{ a}} matches? ] unit-test
|
||||
|
||||
! Testing negation
|
||||
{ f } [ "a" R/ (?~a)/ matches? ] unit-test
|
||||
{ t } [ "aa" R/ (?~a)/ matches? ] unit-test
|
||||
{ t } [ "bb" R/ (?~a)/ matches? ] unit-test
|
||||
{ t } [ "" R/ (?~a)/ matches? ] unit-test
|
||||
{ f } [ "a" R{{ (?~a)}} matches? ] unit-test
|
||||
{ t } [ "aa" R{{ (?~a)}} matches? ] unit-test
|
||||
{ t } [ "bb" R{{ (?~a)}} matches? ] unit-test
|
||||
{ t } [ "" R{{ (?~a)}} matches? ] unit-test
|
||||
|
||||
{ f } [ "a" R/ (?~a+|b)/ matches? ] unit-test
|
||||
{ f } [ "aa" R/ (?~a+|b)/ matches? ] unit-test
|
||||
{ t } [ "bb" R/ (?~a+|b)/ matches? ] unit-test
|
||||
{ f } [ "b" R/ (?~a+|b)/ matches? ] unit-test
|
||||
{ t } [ "" R/ (?~a+|b)/ matches? ] unit-test
|
||||
{ f } [ "a" R{{ (?~a+|b)}} matches? ] unit-test
|
||||
{ f } [ "aa" R{{ (?~a+|b)}} matches? ] unit-test
|
||||
{ t } [ "bb" R{{ (?~a+|b)}} matches? ] unit-test
|
||||
{ f } [ "b" R{{ (?~a+|b)}} matches? ] unit-test
|
||||
{ t } [ "" R{{ (?~a+|b)}} matches? ] unit-test
|
||||
|
||||
! Intersecting classes
|
||||
{ t } [ "ab" R/ ac|\p{Lower}b/ matches? ] unit-test
|
||||
{ t } [ "ab" R/ ac|[a-z]b/ matches? ] unit-test
|
||||
{ t } [ "ac" R/ ac|\p{Lower}b/ matches? ] unit-test
|
||||
{ t } [ "ac" R/ ac|[a-z]b/ matches? ] unit-test
|
||||
{ t } [ "ac" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
{ t } [ "ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
{ t } [ "πb" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
{ f } [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
{ f } [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
{ t } [ "ab" R{{ ac|\p{Lower}b}} matches? ] unit-test
|
||||
{ t } [ "ab" R{{ ac|[a-z]b}} matches? ] unit-test
|
||||
{ t } [ "ac" R{{ ac|\p{Lower}b}} matches? ] unit-test
|
||||
{ t } [ "ac" R{{ ac|[a-z]b}} matches? ] unit-test
|
||||
{ t } [ "ac" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||
{ t } [ "ab" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||
{ t } [ "πb" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||
{ f } [ "πc" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||
{ f } [ "Ab" R{{ [a-zA-Z]c|\p{Lower}b}} matches? ] unit-test
|
||||
|
||||
{ t } [ "aaaa" R/ .*a./ matches? ] unit-test
|
||||
{ t } [ "aaaa" R{{ .*a.}} matches? ] unit-test
|
||||
|
||||
{ f } [ "ab" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
||||
{ f } [ "ab" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
||||
{ f } [ "ac" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
||||
{ f } [ "ac" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
||||
{ f } [ "ac" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||
{ f } [ "ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||
{ f } [ "πb" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||
{ t } [ "πc" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||
{ t } [ "Ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||
{ f } [ "ab" R{{ (?~ac|\p{Lower}b)}} matches? ] unit-test
|
||||
{ f } [ "ab" R{{ (?~ac|[a-z]b)}} matches? ] unit-test
|
||||
{ f } [ "ac" R{{ (?~ac|\p{Lower}b)}} matches? ] unit-test
|
||||
{ f } [ "ac" R{{ (?~ac|[a-z]b)}} matches? ] unit-test
|
||||
{ f } [ "ac" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||
{ f } [ "ab" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||
{ f } [ "πb" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||
{ t } [ "πc" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||
{ t } [ "Ab" R{{ (?~[a-zA-Z]c|\p{Lower}b)}} matches? ] unit-test
|
||||
|
||||
! DFA is compiled when needed, or when literal
|
||||
{ regexp-initial-word } [ "foo" <regexp> dfa>> ] unit-test
|
||||
{ f } [ R/ foo/ dfa>> \ regexp-initial-word = ] unit-test
|
||||
{ f } [ R{{ foo}} dfa>> \ regexp-initial-word = ] unit-test
|
||||
|
||||
{ t } [ "a" R/ ^a/ matches? ] unit-test
|
||||
{ f } [ "\na" R/ ^a/ matches? ] unit-test
|
||||
{ f } [ "\r\na" R/ ^a/ matches? ] unit-test
|
||||
{ f } [ "\ra" R/ ^a/ matches? ] unit-test
|
||||
{ t } [ "a" R{{ ^a}} matches? ] unit-test
|
||||
{ f } [ "\na" R{{ ^a}} matches? ] unit-test
|
||||
{ f } [ "\r\na" R{{ ^a}} matches? ] unit-test
|
||||
{ f } [ "\ra" R{{ ^a}} matches? ] unit-test
|
||||
|
||||
{ 1 } [ "a" R/ ^a/ count-matches ] unit-test
|
||||
{ 0 } [ "\na" R/ ^a/ count-matches ] unit-test
|
||||
{ 0 } [ "\r\na" R/ ^a/ count-matches ] unit-test
|
||||
{ 0 } [ "\ra" R/ ^a/ count-matches ] unit-test
|
||||
{ 1 } [ "a" R{{ ^a}} count-matches ] unit-test
|
||||
{ 0 } [ "\na" R{{ ^a}} count-matches ] unit-test
|
||||
{ 0 } [ "\r\na" R{{ ^a}} count-matches ] unit-test
|
||||
{ 0 } [ "\ra" R{{ ^a}} count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ a$/ matches? ] unit-test
|
||||
{ f } [ "a\n" R/ a$/ matches? ] unit-test
|
||||
{ f } [ "a\r" R/ a$/ matches? ] unit-test
|
||||
{ f } [ "a\r\n" R/ a$/ matches? ] unit-test
|
||||
{ t } [ "a" R{{ a$}} matches? ] unit-test
|
||||
{ f } [ "a\n" R{{ a$}} matches? ] unit-test
|
||||
{ f } [ "a\r" R{{ a$}} matches? ] unit-test
|
||||
{ f } [ "a\r\n" R{{ a$}} matches? ] unit-test
|
||||
|
||||
{ 1 } [ "a" R/ a$/ count-matches ] unit-test
|
||||
{ 0 } [ "a\n" R/ a$/ count-matches ] unit-test
|
||||
{ 0 } [ "a\r" R/ a$/ count-matches ] unit-test
|
||||
{ 0 } [ "a\r\n" R/ a$/ count-matches ] unit-test
|
||||
{ 1 } [ "a" R{{ a$}} count-matches ] unit-test
|
||||
{ 0 } [ "a\n" R{{ a$}} count-matches ] unit-test
|
||||
{ 0 } [ "a\r" R{{ a$}} count-matches ] unit-test
|
||||
{ 0 } [ "a\r\n" R{{ a$}} count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ a$|b$/ matches? ] unit-test
|
||||
{ t } [ "b" R/ a$|b$/ matches? ] unit-test
|
||||
{ f } [ "ab" R/ a$|b$/ matches? ] unit-test
|
||||
{ t } [ "ba" R/ ba$|b$/ matches? ] unit-test
|
||||
{ t } [ "a" R{{ a$|b$}} matches? ] unit-test
|
||||
{ t } [ "b" R{{ a$|b$}} matches? ] unit-test
|
||||
{ f } [ "ab" R{{ a$|b$}} matches? ] unit-test
|
||||
{ t } [ "ba" R{{ ba$|b$}} matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \Aa/ matches? ] unit-test
|
||||
{ f } [ "\na" R/ \Aaa/ matches? ] unit-test
|
||||
{ f } [ "\r\na" R/ \Aa/ matches? ] unit-test
|
||||
{ f } [ "\ra" R/ \Aa/ matches? ] unit-test
|
||||
{ t } [ "a" R{{ \Aa}} matches? ] unit-test
|
||||
{ f } [ "\na" R{{ \Aaa}} matches? ] unit-test
|
||||
{ f } [ "\r\na" R{{ \Aa}} matches? ] unit-test
|
||||
{ f } [ "\ra" R{{ \Aa}} matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \Aa/m matches? ] unit-test
|
||||
{ f } [ "\na" R/ \Aaa/m matches? ] unit-test
|
||||
{ f } [ "\r\na" R/ \Aa/m matches? ] unit-test
|
||||
{ f } [ "\ra" R/ \Aa/m matches? ] unit-test
|
||||
{ 0 } [ "\ra" R/ \Aa/m count-matches ] unit-test
|
||||
{ t } [ "a" R{{ \Aa}}m matches? ] unit-test
|
||||
{ f } [ "\na" R{{ \Aaa}}m matches? ] unit-test
|
||||
{ f } [ "\r\na" R{{ \Aa}}m matches? ] unit-test
|
||||
{ f } [ "\ra" R{{ \Aa}}m matches? ] unit-test
|
||||
{ 0 } [ "\ra" R{{ \Aa}}m count-matches ] unit-test
|
||||
|
||||
{ f } [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
|
||||
{ 1 } [ "\r\n\n\n\nam" R/ ^am/m count-matches ] unit-test
|
||||
{ f } [ "\r\n\n\n\nam" R{{ ^am}}m matches? ] unit-test
|
||||
{ 1 } [ "\r\n\n\n\nam" R{{ ^am}}m count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \Aa\z/m matches? ] unit-test
|
||||
{ f } [ "a\n" R/ \Aa\z/m matches? ] unit-test
|
||||
{ t } [ "a" R{{ \Aa\z}}m matches? ] unit-test
|
||||
{ f } [ "a\n" R{{ \Aa\z}}m matches? ] unit-test
|
||||
|
||||
{ f } [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
{ f } [ "a\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
{ 1 } [ "a\r\n" R/ \Aa\Z/m count-matches ] unit-test
|
||||
{ 1 } [ "a\n" R/ \Aa\Z/m count-matches ] unit-test
|
||||
{ f } [ "a\r\n" R{{ \Aa\Z}}m matches? ] unit-test
|
||||
{ f } [ "a\n" R{{ \Aa\Z}}m matches? ] unit-test
|
||||
{ 1 } [ "a\r\n" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||
{ 1 } [ "a\n" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \Aa\Z/m matches? ] unit-test
|
||||
{ f } [ "\na" R/ \Aaa\Z/m matches? ] unit-test
|
||||
{ f } [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
|
||||
{ f } [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
||||
{ t } [ "a" R{{ \Aa\Z}}m matches? ] unit-test
|
||||
{ f } [ "\na" R{{ \Aaa\Z}}m matches? ] unit-test
|
||||
{ f } [ "\r\na" R{{ \Aa\Z}}m matches? ] unit-test
|
||||
{ f } [ "\ra" R{{ \Aa\Z}}m matches? ] unit-test
|
||||
|
||||
{ 1 } [ "a" R/ \Aa\Z/m count-matches ] unit-test
|
||||
{ 0 } [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
||||
{ 0 } [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
||||
{ 0 } [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
||||
{ 1 } [ "a" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||
{ 0 } [ "\na" R{{ \Aaa\Z}}m count-matches ] unit-test
|
||||
{ 0 } [ "\r\na" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||
{ 0 } [ "\ra" R{{ \Aa\Z}}m count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ ^a/m matches? ] unit-test
|
||||
{ f } [ "\na" R/ ^a/m matches? ] unit-test
|
||||
{ 1 } [ "\na" R/ ^a/m count-matches ] unit-test
|
||||
{ 1 } [ "\r\na" R/ ^a/m count-matches ] unit-test
|
||||
{ 1 } [ "\ra" R/ ^a/m count-matches ] unit-test
|
||||
{ t } [ "a" R{{ ^a}}m matches? ] unit-test
|
||||
{ f } [ "\na" R{{ ^a}}m matches? ] unit-test
|
||||
{ 1 } [ "\na" R{{ ^a}}m count-matches ] unit-test
|
||||
{ 1 } [ "\r\na" R{{ ^a}}m count-matches ] unit-test
|
||||
{ 1 } [ "\ra" R{{ ^a}}m count-matches ] unit-test
|
||||
|
||||
{ t } [ "a" R/ a$/m matches? ] unit-test
|
||||
{ f } [ "a\n" R/ a$/m matches? ] unit-test
|
||||
{ 1 } [ "a\n" R/ a$/m count-matches ] unit-test
|
||||
{ 1 } [ "a\r" R/ a$/m count-matches ] unit-test
|
||||
{ 1 } [ "a\r\n" R/ a$/m count-matches ] unit-test
|
||||
{ t } [ "a" R{{ a$}}m matches? ] unit-test
|
||||
{ f } [ "a\n" R{{ a$}}m matches? ] unit-test
|
||||
{ 1 } [ "a\n" R{{ a$}}m count-matches ] unit-test
|
||||
{ 1 } [ "a\r" R{{ a$}}m count-matches ] unit-test
|
||||
{ 1 } [ "a\r\n" R{{ a$}}m count-matches ] unit-test
|
||||
|
||||
{ f } [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
|
||||
{ 3 } [ "foo" "foo\\z" <regexp> first-match length ] unit-test
|
||||
|
||||
{ t } [ "a foo b" R/ foo/ re-contains? ] unit-test
|
||||
{ f } [ "a bar b" R/ foo/ re-contains? ] unit-test
|
||||
{ t } [ "foo" R/ foo/ re-contains? ] unit-test
|
||||
{ t } [ "a foo b" R{{ foo}} re-contains? ] unit-test
|
||||
{ f } [ "a bar b" R{{ foo}} re-contains? ] unit-test
|
||||
{ t } [ "foo" R{{ foo}} re-contains? ] unit-test
|
||||
|
||||
{ { "foo" "fxx" "fab" } } [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test
|
||||
{ { "foo" "fxx" "fab" } } [ "fab fxx foo" R{{ f..}}r all-matching-subseqs ] unit-test
|
||||
|
||||
{ t } [ "foo" "\\bfoo\\b" <regexp> re-contains? ] unit-test
|
||||
{ t } [ "afoob" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
|
||||
|
@ -506,71 +506,71 @@ unit-test
|
|||
|
||||
{ 3 } [ "caba" "(?<=b)a" <regexp> first-match from>> ] unit-test
|
||||
|
||||
{ t } [ "\ra" R/ .^a/ms matches? ] unit-test
|
||||
{ f } [ "\ra" R/ .^a/mds matches? ] unit-test
|
||||
{ t } [ "\na" R/ .^a/ms matches? ] unit-test
|
||||
{ t } [ "\na" R/ .^a/mds matches? ] unit-test
|
||||
{ t } [ "\ra" R{{ .^a}}ms matches? ] unit-test
|
||||
{ f } [ "\ra" R{{ .^a}}mds matches? ] unit-test
|
||||
{ t } [ "\na" R{{ .^a}}ms matches? ] unit-test
|
||||
{ t } [ "\na" R{{ .^a}}mds matches? ] unit-test
|
||||
|
||||
{ t } [ "a\r" R/ a$./ms matches? ] unit-test
|
||||
{ f } [ "a\r" R/ a$./mds matches? ] unit-test
|
||||
{ t } [ "a\n" R/ a$./ms matches? ] unit-test
|
||||
{ t } [ "a\n" R/ a$./mds matches? ] unit-test
|
||||
{ t } [ "a\r" R{{ a$.}}ms matches? ] unit-test
|
||||
{ f } [ "a\r" R{{ a$.}}mds matches? ] unit-test
|
||||
{ t } [ "a\n" R{{ a$.}}ms matches? ] unit-test
|
||||
{ t } [ "a\n" R{{ a$.}}mds matches? ] unit-test
|
||||
|
||||
! Unicode categories
|
||||
{ t } [ "a" R/ \p{L}/ matches? ] unit-test
|
||||
{ t } [ "A" R/ \p{L}/ matches? ] unit-test
|
||||
{ f } [ " " R/ \p{L}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{L}/ matches? ] unit-test
|
||||
{ f } [ "A" R/ \P{L}/ matches? ] unit-test
|
||||
{ t } [ " " R/ \P{L}/ matches? ] unit-test
|
||||
{ t } [ "a" R(( \p{L})) matches? ] unit-test
|
||||
{ t } [ "A" R(( \p{L})) matches? ] unit-test
|
||||
{ f } [ " " R(( \p{L})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{L})) matches? ] unit-test
|
||||
{ f } [ "A" R(( \P{L})) matches? ] unit-test
|
||||
{ t } [ " " R(( \P{L})) matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \p{Ll}/ matches? ] unit-test
|
||||
{ f } [ "A" R/ \p{Ll}/ matches? ] unit-test
|
||||
{ f } [ " " R/ \p{Ll}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{Ll}/ matches? ] unit-test
|
||||
{ t } [ "A" R/ \P{Ll}/ matches? ] unit-test
|
||||
{ t } [ " " R/ \P{Ll}/ matches? ] unit-test
|
||||
{ t } [ "a" R(( \p{Ll})) matches? ] unit-test
|
||||
{ f } [ "A" R(( \p{Ll})) matches? ] unit-test
|
||||
{ f } [ " " R(( \p{Ll})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{Ll})) matches? ] unit-test
|
||||
{ t } [ "A" R(( \P{Ll})) matches? ] unit-test
|
||||
{ t } [ " " R(( \P{Ll})) matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ \p{script=Latin}/ matches? ] unit-test
|
||||
{ f } [ " " R/ \p{script=Latin}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{script=Latin}/ matches? ] unit-test
|
||||
{ t } [ " " R/ \P{script=Latin}/ matches? ] unit-test
|
||||
{ t } [ "a" R(( \p{script=Latin})) matches? ] unit-test
|
||||
{ f } [ " " R(( \p{script=Latin})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{script=Latin})) matches? ] unit-test
|
||||
{ t } [ " " R(( \P{script=Latin})) matches? ] unit-test
|
||||
|
||||
! These should be case-insensitive
|
||||
{ f } [ " " R/ \p{l}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{l}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{ll}/ matches? ] unit-test
|
||||
{ t } [ " " R/ \P{LL}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
|
||||
{ t } [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
|
||||
{ f } [ " " R(( \p{l})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{l})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{ll})) matches? ] unit-test
|
||||
{ t } [ " " R(( \P{LL})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{sCriPt = latin})) matches? ] unit-test
|
||||
{ t } [ " " R(( \P{SCRIPT = laTIn})) matches? ] unit-test
|
||||
|
||||
! Logical operators
|
||||
{ t } [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "a" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "π" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "A" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "3" R(( [\p{script=latin}\p{lower}])) matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "a" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "π" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "A" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "3" R(( [\p{script=latin}||\p{lower}])) matches? ] unit-test
|
||||
|
||||
{ t } [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "a" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "π" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "A" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "3" R(( [\p{script=latin}&&\p{lower}])) matches? ] unit-test
|
||||
|
||||
{ f } [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "a" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "π" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "A" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "3" R(( [\p{script=latin}~~\p{lower}])) matches? ] unit-test
|
||||
|
||||
{ f } [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
{ t } [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
{ f } [ "a" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "π" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||
{ t } [ "A" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||
{ f } [ "3" R(( [\p{script=latin}--\p{lower}])) matches? ] unit-test
|
||||
|
||||
{ t } [ " " R/ \P{alpha}/ matches? ] unit-test
|
||||
{ f } [ "" R/ \P{alpha}/ matches? ] unit-test
|
||||
{ f } [ "a " R/ \P{alpha}/ matches? ] unit-test
|
||||
{ f } [ "a" R/ \P{alpha}/ matches? ] unit-test
|
||||
{ t } [ " " R(( \P{alpha})) matches? ] unit-test
|
||||
{ f } [ "" R(( \P{alpha})) matches? ] unit-test
|
||||
{ f } [ "a " R(( \P{alpha})) matches? ] unit-test
|
||||
{ f } [ "a" R(( \P{alpha})) matches? ] unit-test
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
USING: accessors combinators kernel kernel.private math sequences
|
||||
sequences.private strings sets assocs make lexer namespaces parser
|
||||
arrays fry locals regexp.parser splitting sorting regexp.ast
|
||||
regexp.negation regexp.compiler compiler.units words math.ranges ;
|
||||
regexp.negation regexp.compiler compiler.units words math.ranges
|
||||
multiline ;
|
||||
in: regexp
|
||||
|
||||
TUPLE: regexp
|
||||
|
@ -216,7 +217,11 @@ PRIVATE>
|
|||
|
||||
PRIVATE>
|
||||
|
||||
SYNTAX: R/ parse-regexp ;
|
||||
SYNTAX: \ R/ parse-regexp ;
|
||||
SYNTAX: \ R[[ "]]" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||
SYNTAX: \ R[=[ "]=]" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||
SYNTAX: \ R(( "))" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||
SYNTAX: \ R{{ "}}" parse-multiline-string lexer get parse-noblank-token <optioned-regexp> compile-next-match suffix! ;
|
||||
|
||||
use: vocabs.loader
|
||||
|
||||
|
|
|
@ -5,6 +5,6 @@ unicode ;
|
|||
in: sorting.title
|
||||
|
||||
<< "title" [
|
||||
>lower dup R/ ^(the|a|an|el|la|los|las|il) / first-match
|
||||
>lower dup R[[ ^(the|a|an|el|la|los|las|il) ]] first-match
|
||||
[ to>> tail-slice ] when*
|
||||
] define-sorting >>
|
||||
|
|
|
@ -14,7 +14,7 @@ in: text-analysis
|
|||
[ [ blank? ] trim ] map harvest ;
|
||||
|
||||
: split-paragraphs ( str -- seq )
|
||||
R/ \r?\n\r?\n/ re-split trimmed ;
|
||||
R[[ \r?\n\r?\n]] re-split trimmed ;
|
||||
|
||||
<<
|
||||
CONSTANT: ABBREVIATIONS {
|
||||
|
@ -39,7 +39,7 @@ CONSTANT: ABBREVIATIONS {
|
|||
: split-sentences ( str -- seq )
|
||||
|
||||
! Mark end of sentences with EOS marker
|
||||
R/ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/
|
||||
R[[ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)]]
|
||||
[ [ ".?!\r\n\"')]}" member? not ] cut-when "\x01" glue ]
|
||||
re-replace-with
|
||||
|
||||
|
@ -63,46 +63,46 @@ CONSTANT: ABBREVIATIONS {
|
|||
"\x01" split trimmed ;
|
||||
|
||||
CONSTANT: sub-syllable {
|
||||
R/ [^aeiou]e$/ ! give, love, bone, done, ride ...
|
||||
R/ [aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$/
|
||||
R[[ [^aeiou]e$]] ! give, love, bone, done, ride ...
|
||||
R[[ [aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$]]
|
||||
! (passive) past participles and 3rd person sing present verbs:
|
||||
! bared, liked, called, tricked, bashed, matched
|
||||
|
||||
R/ .e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$/
|
||||
R[[ .e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$]]
|
||||
! nominal, adjectival and adverbial derivatives from -e$ roots:
|
||||
! absolutely, nicely, likeness, basement, hopeless
|
||||
! hopeful, tastefully, wasteful
|
||||
|
||||
R/ ion/ ! action, diction, fiction
|
||||
R/ [ct]ia[nl]/ ! special(ly), initial, physician, christian
|
||||
R/ [^cx]iou/ ! illustrious, NOT spacious, gracious, anxious, noxious
|
||||
R/ sia$/ ! amnesia, polynesia
|
||||
R/ .gue$/ ! dialogue, intrigue, colleague
|
||||
R{{ ion}} ! action, diction, fiction
|
||||
R{{ [ct]ia[nl]}} ! special(ly), initial, physician, christian
|
||||
R{{ [^cx]iou}} ! illustrious, NOT spacious, gracious, anxious, noxious
|
||||
R{{ sia$}} ! amnesia, polynesia
|
||||
R{{ .gue$}} ! dialogue, intrigue, colleague
|
||||
} ;
|
||||
|
||||
CONSTANT: add-syllable {
|
||||
R/ i[aiou]/ ! alias, science, phobia
|
||||
R/ [dls]ien/ ! salient, gradient, transient
|
||||
R/ [aeiouym]ble$/ ! -Vble, plus -mble
|
||||
R/ [aeiou]{3}/ ! agreeable
|
||||
R/ ^mc/ ! mcwhatever
|
||||
R/ ism$/ ! sexism, racism
|
||||
R/ (?:([^aeiouy])\1|ck|mp|ng)le$/ ! bubble, cattle, cackle, sample, angle
|
||||
R/ dnt$/ ! couldn/t
|
||||
R/ [aeiou]y[aeiou]/ ! annoying, layer
|
||||
R{{ i[aiou]}} ! alias, science, phobia
|
||||
R{{ [dls]ien}} ! salient, gradient, transient
|
||||
R{{ [aeiouym]ble$}} ! -Vble, plus -mble
|
||||
R(( [aeiou]{3})) ! agreeable
|
||||
R{{ ^mc}} ! mcwhatever
|
||||
R{{ ism$}} ! sexism, racism
|
||||
R{{ (?:([^aeiouy])\1|ck|mp|ng)le$}} ! bubble, cattle, cackle, sample, angle
|
||||
R{{ dnt$}} ! couldn/t
|
||||
R{{ [aeiou]y[aeiou]}} ! annoying, layer
|
||||
} ;
|
||||
|
||||
: syllables ( str -- n )
|
||||
dup length 1 = [ drop 1 ] [
|
||||
>lower char: . swap remove
|
||||
[ R/ [aeiouy]+/ count-matches ]
|
||||
[ R{{ [aeiouy]+}} count-matches ]
|
||||
[ sub-syllable [ matches? ] with count - ]
|
||||
[ add-syllable [ matches? ] with count + ] tri
|
||||
1 max
|
||||
] if ;
|
||||
|
||||
: split-words ( str -- words )
|
||||
R/ \b([a-z][a-z\-']*)\b/i all-matching-subseqs ;
|
||||
R{{ \b([a-z][a-z\-']*)\b}}i all-matching-subseqs ;
|
||||
|
||||
TUPLE: text-analysis #paragraphs #sentences #chars #words
|
||||
#syllables #complex-words #unique-words #difficult-words ;
|
||||
|
|
|
@ -61,11 +61,11 @@ in: validators
|
|||
! From http://www.regular-expressions.info/email.html
|
||||
320 v-max-length
|
||||
"e-mail"
|
||||
R/ [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}/i
|
||||
R(( [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}))i
|
||||
v-regexp ;
|
||||
|
||||
: v-url ( str -- str )
|
||||
"URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
|
||||
"URL" R(( (?:ftp|http|https):\\/\\/\S+)) v-regexp ;
|
||||
|
||||
: v-captcha ( str -- str )
|
||||
dup empty? [ "must remain blank" throw ] unless ;
|
||||
|
|
|
@ -13,7 +13,7 @@ in: xkcd
|
|||
|
||||
: comic-image ( url -- image )
|
||||
http-get nip
|
||||
R/ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
|
||||
R[[ http:\\/\\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)]]
|
||||
first-match >string load-http-image ;
|
||||
|
||||
: comic-image. ( url -- )
|
||||
|
|
|
@ -15,16 +15,16 @@ CONSTANT: YAML_VALUE_TAG "tag:yaml.org,2002:value" ;
|
|||
! http://www.yaml.org/spec/1.2/spec.html
|
||||
! 10.3. Core Schema
|
||||
|
||||
CONSTANT: re-null R/ null|Null|NULL|~/ ;
|
||||
CONSTANT: re-empty R/ / ;
|
||||
CONSTANT: re-bool R/ true|True|TRUE|false|False|FALSE/ ;
|
||||
CONSTANT: re-int10 R/ [-+]?[0-9]+/ ;
|
||||
CONSTANT: re-int8 R/ 0o[0-7]+/ ;
|
||||
CONSTANT: re-int16 R/ 0x[0-9a-fA-F]+/ ;
|
||||
CONSTANT: re-number R/ [-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?/ ;
|
||||
CONSTANT: re-infinity R/ [-+]?\.(inf|Inf|INF)/ ;
|
||||
CONSTANT: re-nan R/ \.(nan|NaN|NAN)/ ;
|
||||
CONSTANT: re-timestamp R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9][0-9]?-[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?:[0-9][0-9]:[0-9][0-9](\.[0-9]*)?([ \t]*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?))?/ ;
|
||||
CONSTANT: re-null R[[ null|Null|NULL|~]] ;
|
||||
CONSTANT: re-empty R[[ ]] ;
|
||||
CONSTANT: re-bool R[[ true|True|TRUE|false|False|FALSE]] ;
|
||||
CONSTANT: re-int10 R[[ [-+]?[0-9]+]] ;
|
||||
CONSTANT: re-int8 R[[ 0o[0-7]+]] ;
|
||||
CONSTANT: re-int16 R[[ 0x[0-9a-fA-F]+]] ;
|
||||
CONSTANT: re-number R[[ [-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?]] ;
|
||||
CONSTANT: re-infinity R[[ [-+]?\.(inf|Inf|INF)]] ;
|
||||
CONSTANT: re-nan R[[ \.(nan|NaN|NAN)]] ;
|
||||
CONSTANT: re-timestamp R[[ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9][0-9]?-[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?:[0-9][0-9]:[0-9][0-9](\.[0-9]*)?([ \t]*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?))?]] ;
|
||||
|
||||
: resolve-normal-plain-scalar ( str -- tag )
|
||||
{
|
||||
|
@ -41,8 +41,8 @@ CONSTANT: re-timestamp R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][
|
|||
[ drop YAML_STR_TAG ]
|
||||
} cond-case ;
|
||||
|
||||
CONSTANT: re-merge R/ <</ ;
|
||||
CONSTANT: re-value R/ =/ ;
|
||||
CONSTANT: re-merge R[[ <<]] ;
|
||||
CONSTANT: re-value R[[ =]] ;
|
||||
: (resolve-mapping-key-plain-scalar) ( str -- tag )
|
||||
{
|
||||
{ [ re-merge matches? ] [ YAML_MERGE_TAG ] }
|
||||
|
@ -87,7 +87,7 @@ CONSTANT: YAML_OMAP_TAG "tag:yaml.org,2002:omap" ;
|
|||
CONSTANT: YAML_PAIRS_TAG "tag:yaml.org,2002:pairs" ;
|
||||
CONSTANT: YAML_SET_TAG "tag:yaml.org,2002:set" ;
|
||||
|
||||
: construct-bool ( str -- ? ) R/ true|True|TRUE/ matches? ;
|
||||
: construct-bool ( str -- ? ) R[[ true|True|TRUE]] matches? ;
|
||||
|
||||
: construct-int ( str -- n ) string>number ;
|
||||
|
||||
|
@ -107,14 +107,14 @@ CONSTANT: YAML_SET_TAG "tag:yaml.org,2002:set" ;
|
|||
! - months, days and hours on 1 digit
|
||||
! preprocess to fix this mess...
|
||||
: yaml>rfc3339 ( str -- str' )
|
||||
R/ -[0-9][^0-9]/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R/ -[0-9][^0-9]/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R/ [^0-9][0-9]:/ [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R/ [ \t]+/ " " re-replace
|
||||
R{{ -[0-9][^0-9]}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R{{ -[0-9][^0-9]}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R{{ [^0-9][0-9]:}} [ [ char: 0 1 ] dip insert-nth ] re-replace-with
|
||||
R{{ [ \t]+}} " " re-replace
|
||||
char: \: over index cut char: space swap remove append ;
|
||||
|
||||
: construct-timestamp ( obj -- obj' )
|
||||
dup R/ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]/ matches?
|
||||
dup R{{ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]}} matches?
|
||||
[ ymd>timestamp ] [ yaml>rfc3339 rfc3339>timestamp ] if ;
|
||||
|
||||
TUPLE: yaml-merge ;
|
||||
|
|
|
@ -482,8 +482,8 @@ CONSTANT: nested-merge-obj H{
|
|||
${ nested-merge-obj } [ $ nested-merge-str yaml> ] unit-test
|
||||
${ nested-merge-obj } [ $ nested-merge-obj >yaml yaml> ] unit-test
|
||||
|
||||
CONSTANT: recursive-merge-str "--- &A ;
|
||||
<<: *A"
|
||||
CONSTANT: recursive-merge-str "--- &A
|
||||
<<: *A" ;
|
||||
CONSTANT: recursive-merge-obj H{ } ;
|
||||
|
||||
${ recursive-merge-obj } [ $ recursive-merge-str yaml> ] unit-test
|
||||
|
|
Loading…
Reference in New Issue