USING: regexp tools.test kernel sequences regexp.parser regexp.traversal eval strings multiline ; IN: regexp-tests \ must-infer \ matches? must-infer [ f ] [ "b" "a*" matches? ] unit-test [ t ] [ "" "a*" matches? ] unit-test [ t ] [ "a" "a*" matches? ] unit-test [ t ] [ "aaaaaaa" "a*" matches? ] unit-test [ f ] [ "ab" "a*" matches? ] unit-test [ t ] [ "abc" "abc" matches? ] unit-test [ t ] [ "a" "a|b|c" matches? ] unit-test [ t ] [ "b" "a|b|c" matches? ] unit-test [ t ] [ "c" "a|b|c" matches? ] unit-test [ f ] [ "c" "d|e|f" matches? ] unit-test [ t ] [ "b" "|b" matches? ] unit-test [ t ] [ "b" "b|" matches? ] unit-test [ t ] [ "" "b|" matches? ] unit-test [ t ] [ "" "b|" matches? ] unit-test [ f ] [ "" "|" matches? ] unit-test [ f ] [ "" "|||||||" matches? ] unit-test [ f ] [ "aa" "a|b|c" matches? ] unit-test [ f ] [ "bb" "a|b|c" matches? ] unit-test [ f ] [ "cc" "a|b|c" matches? ] unit-test [ f ] [ "cc" "d|e|f" matches? ] unit-test [ f ] [ "" "a+" matches? ] unit-test [ t ] [ "a" "a+" matches? ] unit-test [ t ] [ "aa" "a+" matches? ] unit-test [ t ] [ "" "a?" matches? ] unit-test [ t ] [ "a" "a?" matches? ] unit-test [ f ] [ "aa" "a?" matches? ] unit-test [ f ] [ "" "." matches? ] unit-test [ t ] [ "a" "." matches? ] unit-test [ t ] [ "." "." matches? ] unit-test ! Dotall mode -- when on, . matches newlines. ! Off by default. [ f ] [ "\n" "." matches? ] unit-test [ t ] [ "\n" "(?s)." matches? ] unit-test [ t ] [ "\n" R/ ./s matches? ] unit-test [ f ] [ "\n\n" "(?s).(?-s)." matches? ] unit-test [ f ] [ "" ".+" matches? ] unit-test [ t ] [ "a" ".+" matches? ] unit-test [ t ] [ "ab" ".+" matches? ] unit-test [ t ] [ " " "[\\s]" matches? ] unit-test [ f ] [ "a" "[\\s]" matches? ] unit-test [ f ] [ " " "[\\S]" matches? ] unit-test [ t ] [ "a" "[\\S]" matches? ] unit-test [ f ] [ " " "[\\w]" matches? ] unit-test [ t ] [ "a" "[\\w]" matches? ] unit-test [ t ] [ " " "[\\W]" matches? ] unit-test [ f ] [ "a" "[\\W]" matches? ] unit-test [ t ] [ "/" "\\/" matches? ] unit-test [ t ] [ "a" R' a'i matches? ] unit-test [ t ] [ "" "a|b*|c+|d?" matches? ] unit-test [ t ] [ "a" "a|b*|c+|d?" matches? ] unit-test [ t ] [ "c" "a|b*|c+|d?" matches? ] unit-test [ t ] [ "cc" "a|b*|c+|d?" matches? ] unit-test [ f ] [ "ccd" "a|b*|c+|d?" matches? ] unit-test [ t ] [ "d" "a|b*|c+|d?" matches? ] unit-test [ t ] [ "foo" "foo|bar" matches? ] unit-test [ t ] [ "bar" "foo|bar" matches? ] unit-test [ f ] [ "foobar" "foo|bar" matches? ] unit-test /* ! FIXME [ f ] [ "" "(a)" matches? ] unit-test [ t ] [ "a" "(a)" matches? ] unit-test [ f ] [ "aa" "(a)" matches? ] unit-test [ t ] [ "aa" "(a*)" matches? ] unit-test [ f ] [ "aababaaabbac" "(a|b)+" matches? ] unit-test [ t ] [ "ababaaabba" "(a|b)+" matches? ] unit-test */ [ f ] [ "" "a{1}" matches? ] unit-test [ t ] [ "a" "a{1}" matches? ] unit-test [ f ] [ "aa" "a{1}" matches? ] unit-test [ f ] [ "a" "a{2,}" matches? ] unit-test [ t ] [ "aaa" "a{2,}" matches? ] unit-test [ t ] [ "aaaa" "a{2,}" matches? ] unit-test [ t ] [ "aaaaa" "a{2,}" matches? ] unit-test [ t ] [ "" "a{,2}" matches? ] unit-test [ t ] [ "a" "a{,2}" matches? ] unit-test [ t ] [ "aa" "a{,2}" matches? ] unit-test [ f ] [ "aaa" "a{,2}" matches? ] unit-test [ f ] [ "aaaa" "a{,2}" matches? ] unit-test [ f ] [ "aaaaa" "a{,2}" matches? ] unit-test [ f ] [ "" "a{1,3}" matches? ] unit-test [ t ] [ "a" "a{1,3}" matches? ] unit-test [ t ] [ "aa" "a{1,3}" matches? ] unit-test [ t ] [ "aaa" "a{1,3}" matches? ] unit-test [ f ] [ "aaaa" "a{1,3}" matches? ] unit-test [ f ] [ "" "[a]" matches? ] unit-test [ t ] [ "a" "[a]" matches? ] unit-test [ t ] [ "a" "[abc]" matches? ] unit-test [ f ] [ "b" "[a]" matches? ] unit-test [ f ] [ "d" "[abc]" matches? ] unit-test [ t ] [ "ab" "[abc]{1,2}" matches? ] unit-test [ f ] [ "abc" "[abc]{1,2}" matches? ] unit-test [ f ] [ "" "[^a]" matches? ] unit-test [ f ] [ "a" "[^a]" matches? ] unit-test [ f ] [ "a" "[^abc]" matches? ] unit-test [ t ] [ "b" "[^a]" matches? ] unit-test [ t ] [ "d" "[^abc]" matches? ] unit-test [ f ] [ "ab" "[^abc]{1,2}" matches? ] unit-test [ f ] [ "abc" "[^abc]{1,2}" matches? ] unit-test [ t ] [ "]" "[]]" matches? ] unit-test [ f ] [ "]" "[^]]" matches? ] unit-test [ t ] [ "a" "[^]]" matches? ] unit-test [ "^" "[^]" matches? ] must-fail [ t ] [ "^" "[]^]" matches? ] unit-test [ t ] [ "]" "[]^]" matches? ] unit-test [ t ] [ "[" "[[]" matches? ] unit-test [ f ] [ "^" "[^^]" matches? ] unit-test [ t ] [ "a" "[^^]" matches? ] unit-test [ t ] [ "-" "[-]" matches? ] unit-test [ f ] [ "a" "[-]" matches? ] unit-test [ f ] [ "-" "[^-]" matches? ] unit-test [ t ] [ "a" "[^-]" matches? ] unit-test [ t ] [ "-" "[-a]" matches? ] unit-test [ t ] [ "a" "[-a]" matches? ] unit-test [ t ] [ "-" "[a-]" matches? ] unit-test [ t ] [ "a" "[a-]" matches? ] unit-test [ f ] [ "b" "[a-]" matches? ] unit-test [ f ] [ "-" "[^-]" matches? ] unit-test [ t ] [ "a" "[^-]" matches? ] unit-test [ f ] [ "-" "[a-c]" matches? ] unit-test [ t ] [ "-" "[^a-c]" matches? ] unit-test [ t ] [ "b" "[a-c]" matches? ] unit-test [ f ] [ "b" "[^a-c]" matches? ] unit-test [ t ] [ "-" "[a-c-]" matches? ] unit-test [ f ] [ "-" "[^a-c-]" matches? ] unit-test [ t ] [ "\\" "[\\\\]" matches? ] unit-test [ f ] [ "a" "[\\\\]" matches? ] unit-test [ f ] [ "\\" "[^\\\\]" matches? ] unit-test [ t ] [ "a" "[^\\\\]" matches? ] unit-test [ t ] [ "0" "[\\d]" matches? ] unit-test [ f ] [ "a" "[\\d]" matches? ] unit-test [ f ] [ "0" "[^\\d]" matches? ] unit-test [ t ] [ "a" "[^\\d]" matches? ] unit-test /* ! FIXME [ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" matches? ] unit-test [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" matches? ] unit-test [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" matches? ] unit-test */ [ t ] [ "1000" "\\d{4,6}" matches? ] unit-test [ t ] [ "1000" "[0-9]{4,6}" matches? ] unit-test [ t ] [ "abc" "\\p{Lower}{3}" matches? ] unit-test [ f ] [ "ABC" "\\p{Lower}{3}" matches? ] unit-test [ t ] [ "ABC" "\\p{Upper}{3}" matches? ] unit-test [ f ] [ "abc" "\\p{Upper}{3}" matches? ] unit-test [ f ] [ "abc" "[\\p{Upper}]{3}" matches? ] unit-test [ t ] [ "ABC" "[\\p{Upper}]{3}" matches? ] unit-test [ f ] [ "" "\\Q\\E" matches? ] unit-test [ f ] [ "a" "\\Q\\E" matches? ] unit-test [ t ] [ "|*+" "\\Q|*+\\E" matches? ] unit-test [ f ] [ "abc" "\\Q|*+\\E" matches? ] unit-test [ t ] [ "s" "\\Qs\\E" matches? ] unit-test [ t ] [ "S" "\\0123" matches? ] unit-test [ t ] [ "SXY" "\\0123XY" matches? ] unit-test [ t ] [ "x" "\\x78" matches? ] unit-test [ f ] [ "y" "\\x78" matches? ] unit-test [ t ] [ "x" "\\u000078" matches? ] unit-test [ f ] [ "y" "\\u000078" matches? ] unit-test [ t ] [ "ab" "a+b" matches? ] unit-test [ f ] [ "b" "a+b" matches? ] unit-test [ t ] [ "aab" "a+b" matches? ] unit-test [ f ] [ "abb" "a+b" matches? ] unit-test [ t ] [ "abbbb" "ab*" matches? ] unit-test [ t ] [ "a" "ab*" matches? ] unit-test [ f ] [ "abab" "ab*" matches? ] unit-test [ f ] [ "x" "\\." matches? ] unit-test [ t ] [ "." "\\." matches? ] unit-test [ t ] [ "aaaab" "a+ab" matches? ] unit-test [ f ] [ "aaaxb" "a+ab" matches? ] unit-test [ t ] [ "aaacb" "a+cb" matches? ] unit-test [ 3 ] [ "aaacb" "a*" match-head ] unit-test [ 2 ] [ "aaacb" "aa?" match-head ] unit-test [ t ] [ "aaa" R/ AAA/i matches? ] unit-test [ f ] [ "aax" R/ AAA/i matches? ] unit-test [ t ] [ "aaa" R/ A*/i matches? ] unit-test [ f ] [ "aaba" R/ A*/i matches? ] unit-test [ t ] [ "b" R/ [AB]/i matches? ] unit-test [ f ] [ "c" R/ [AB]/i matches? ] unit-test [ t ] [ "c" R/ [A-Z]/i matches? ] unit-test [ f ] [ "3" R/ [A-Z]/i matches? ] unit-test [ t ] [ "a" "(?i)a" matches? ] unit-test [ t ] [ "a" "(?i)a" matches? ] unit-test [ t ] [ "A" "(?i)a" matches? ] unit-test [ t ] [ "A" "(?i)a" matches? ] unit-test [ t ] [ "a" R/ (?-i)a/i matches? ] unit-test [ t ] [ "a" R/ (?-i)a/i matches? ] unit-test [ f ] [ "A" R/ (?-i)a/i matches? ] unit-test [ f ] [ "A" R/ (?-i)a/i matches? ] unit-test [ f ] [ "A" "[a-z]" matches? ] unit-test [ t ] [ "A" R/ [a-z]/i matches? ] unit-test [ f ] [ "A" "\\p{Lower}" matches? ] unit-test [ t ] [ "A" R/ \p{Lower}/i matches? ] unit-test [ t ] [ "abc" R/ abc/r matches? ] unit-test [ t ] [ "abc" R/ a[bB][cC]/r matches? ] unit-test ! [ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/ matches? ] unit-test ! FIXME [ t ] [ "s@f" "[a-z.-]@[a-z]" matches? ] unit-test [ f ] [ "a" "[a-z.-]@[a-z]" matches? ] unit-test [ t ] [ ".o" "\\.[a-z]" matches? ] unit-test [ t ] [ "abc*" "[^\\*]*\\*" matches? ] unit-test [ t ] [ "bca" "[^a]*a" matches? ] unit-test /* ! FIXME [ ] [ "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))" drop ] unit-test [ ] [ "(\\$[\\p{XDigit}]|[\\p{Digit}])" drop ] unit-test ! Comment inside a regular expression [ t ] [ "ac" "a(?#boo)c" matches? ] unit-test [ ] [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval ] unit-test [ ] [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval ] unit-test [ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test [ "ab" ] [ "ab" "(a|ab)(bc)?" first-match >string ] unit-test [ "abc" ] [ "abc" "(a|ab)(bc)?" first-match >string ] unit-test [ "ab" ] [ "ab" "(ab|a)(bc)?" first-match >string ] unit-test [ "abc" ] [ "abc" "(ab|a)(bc)?" first-match >string ] unit-test [ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" first-match >string ] unit-test */ ! [ t ] [ "a:b" ".+:?" matches? ] unit-test ! [ 1 ] [ "hello" ".+?" match length ] unit-test [ { "1" "2" "3" "4" } ] [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test [ { "1" "2" "3" "4" } ] [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test [ { "ABC" "DEF" "GHI" } ] [ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test [ 3 ] [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test [ 0 ] [ "123" R/ [A-Z]+/ count-matches ] unit-test [ "1.2.3.4" ] [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test /* ! FIXME [ f ] [ "ab" "a(?!b)" first-match ] unit-test [ "a" ] [ "ac" "a(?!b)" first-match >string ] unit-test ! [ t ] [ "fxxbar" "(?!foo).{3}bar" matches? ] unit-test [ f ] [ "foobar" "(?!foo).{3}bar" matches? ] unit-test [ "a" ] [ "ab" "a(?=b)(?=b)" first-match >string ] unit-test [ "a" ] [ "ba" "a(?<=b)(?<=b)" first-match >string ] unit-test [ "a" ] [ "cab" "a(?=b)(?<=c)" first-match >string ] unit-test [ 3 ] [ "foobar" "foo(?=bar)" match-head ] unit-test [ f ] [ "foobxr" "foo(?=bar)" match-head ] unit-test */ ! Bug in parsing word [ t ] [ "a" R' a' matches? ] unit-test ! Convert to lowercase until E [ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test [ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test ! Convert to uppercase until E [ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test [ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test ! [ "{Lower}" ] [ invalid-range? ] must-fail-with ! [ t ] [ "a" R/ ^a/ matches? ] unit-test ! [ f ] [ "\na" R/ ^a/ matches? ] unit-test ! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test ! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test ! [ t ] [ "a" R/ a$/ matches? ] unit-test ! [ f ] [ "a\n" R/ a$/ matches? ] unit-test ! [ f ] [ "a\r" R/ a$/ matches? ] unit-test ! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test ! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test ! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test ! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test ! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test ! [ t ] [ "a" R/ \Aa/ matches? ] unit-test ! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test ! [ f ] [ "\r\na" R/ \Aa/ matches? ] unit-test ! [ f ] [ "\ra" R/ \Aa/ matches? ] unit-test ! [ t ] [ "a" R/ \Aa/m matches? ] unit-test ! [ f ] [ "\na" R/ \Aaa/m matches? ] unit-test ! [ f ] [ "\r\na" R/ \Aa/m matches? ] unit-test ! [ f ] [ "\ra" R/ \Aa/m matches? ] unit-test ! [ t ] [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test ! [ t ] [ "a" R/ \Aa\z/m matches? ] unit-test ! [ f ] [ "a\n" R/ \Aa\z/m matches? ] unit-test ! [ t ] [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test ! [ t ] [ "a\n" R/ \Aa\Z/m matches? ] unit-test ! [ t ] [ "a" R/ \Aa\Z/m matches? ] unit-test ! [ f ] [ "\na" R/ \Aaa\Z/m matches? ] unit-test ! [ f ] [ "\r\na" R/ \Aa\Z/m matches? ] unit-test ! [ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test ! [ t ] [ "a" R/ ^a/m matches? ] unit-test ! [ t ] [ "\na" R/ ^a/m matches? ] unit-test ! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test ! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test ! [ t ] [ "a" "a$" R/ a$/m matches? ] unit-test ! [ t ] [ "a\n" "a$" R/ a$/m matches? ] unit-test ! [ t ] [ "a\r" "a$" R/ a$/m matches? ] unit-test ! [ t ] [ "a\r\n" "a$" R/ a$/m matches? ] unit-test ! [ f ] [ "foobxr" "foo\\z" match-head ] unit-test ! [ 3 ] [ "foo" "foo\\z" match-head ] unit-test ! [ t ] [ "foo" "\\bfoo\\b" matches? ] unit-test ! [ t ] [ "afoob" "\\Bfoo\\B" matches? ] unit-test ! [ t ] [ "afoob" "\\bfoo\\b" matches? ] unit-test ! [ f ] [ "foo" "\\Bfoo\\B" matches? ] unit-test ! [ 3 ] [ "foo bar" "foo\\b" match-head ] unit-test ! [ f ] [ "fooxbar" "foo\\b" matches? ] unit-test ! [ t ] [ "foo" "foo\\b" matches? ] unit-test ! [ t ] [ "foo bar" "foo\\b bar" matches? ] unit-test ! [ f ] [ "fooxbar" "foo\\bxbar" matches? ] unit-test ! [ f ] [ "foo" "foo\\bbar" matches? ] unit-test ! [ f ] [ "foo bar" "foo\\B" matches? ] unit-test ! [ 3 ] [ "fooxbar" "foo\\B" match-head ] unit-test ! [ t ] [ "foo" "foo\\B" matches? ] unit-test ! [ f ] [ "foo bar" "foo\\B bar" matches? ] unit-test ! [ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test ! [ f ] [ "foo" "foo\\Bbar" matches? ] unit-test ! [ 1 ] [ "aaacb" "a+?" match-head ] unit-test ! [ 1 ] [ "aaacb" "aa??" match-head ] unit-test ! [ f ] [ "aaaab" "a++ab" matches? ] unit-test ! [ t ] [ "aaacb" "a++cb" matches? ] unit-test ! [ 3 ] [ "aacb" "aa?c" match-head ] unit-test ! [ 3 ] [ "aacb" "aa??c" match-head ] unit-test ! "ab" "a(?=b*)" match ! "abbbbbc" "a(?=b*c)" match ! "ab" "a(?=b*)" match ! "baz" "(az)(?<=b)" first-match ! "cbaz" "a(?<=b*)" first-match ! "baz" "a(?<=b)" first-match ! "baz" "a(? first-match ! "caz" "a(? first-match ! "abcdefg" "a(?=bcdefg)bcd" first-match ! "abcdefg" "a(?#bcdefg)bcd" first-match ! "abcdefg" "a(?:bcdefg)" first-match ! "caba" "a(?<=b)" first-match ! capture group 1: "aaaa" 2: "" ! "aaaa" "(a*)(a*)" match* ! "aaaa" "(a*)(a+)" match*