2013-11-29 09:52:30 -05:00
|
|
|
USING: accessors arrays assocs continuations http.client kernel
|
2016-08-19 11:32:45 -04:00
|
|
|
literals math math.parser math.ranges pcre pcre.ffi pcre.private
|
|
|
|
random sequences system tools.test ;
|
2013-11-04 07:26:37 -05:00
|
|
|
QUALIFIED: regexp
|
2016-08-19 11:32:45 -04:00
|
|
|
QUALIFIED: splitting
|
2013-09-21 20:52:39 -04:00
|
|
|
IN: pcre.tests
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { "Bords" "words" "word" } } [
|
2013-11-20 16:06:49 -05:00
|
|
|
"Bords, words, word." { ", " ", " "." } split-subseqs
|
|
|
|
] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { 3 "day" } { 2 "month" } { 1 "year" } } } [
|
2013-11-20 16:06:49 -05:00
|
|
|
"(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
|
|
|
|
<compiled-pcre> nametable>>
|
|
|
|
] unit-test
|
|
|
|
|
2013-09-21 20:52:39 -04:00
|
|
|
CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
|
|
|
|
|
2013-10-25 11:47:58 -04:00
|
|
|
! On windows the erroffset appears to be set to 0 despite there being
|
|
|
|
! nothing wrong with the regexp.
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [
|
2013-11-25 12:29:43 -05:00
|
|
|
"foo" (pcre) 3array rest { { f -1 } { f 0 } } member?
|
2013-10-25 11:47:58 -04:00
|
|
|
] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { 1 2 3 } } [
|
2013-09-21 20:52:39 -04:00
|
|
|
iso-date <pcre>
|
|
|
|
{ "year" "month" "day" } [ pcre_get_stringnumber ] with map
|
|
|
|
] unit-test
|
|
|
|
|
2016-08-19 15:09:00 -04:00
|
|
|
{ t } [
|
|
|
|
"foo" <compiled-pcre> PCRE_UTF8 has-option?
|
|
|
|
] unit-test
|
|
|
|
|
|
|
|
! This option is not present on old PCRE versions.
|
|
|
|
{ t } [
|
|
|
|
"foo" <compiled-pcre> version 8.10 >
|
|
|
|
[ PCRE_UCP has-option? ] [ PCRE_UCP has-option? not ] if
|
|
|
|
] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2013-11-20 19:54:56 -05:00
|
|
|
os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
|
2013-09-21 20:52:39 -04:00
|
|
|
|
|
|
|
! In this day and age, not supporting utf-8 is broken.
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 1 } [ PCRE_CONFIG_UTF8 pcre-config ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 1 } [ PCRE_CONFIG_UNICODE_PROPERTIES pcre-config ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2013-11-29 09:52:30 -05:00
|
|
|
! Ok if these options throw if the pcre library is to old to support
|
|
|
|
! these configuration parameters.
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [
|
2013-11-29 09:52:30 -05:00
|
|
|
[ PCRE_CONFIG_UTF16 pcre-config ] [ what>> ] recover
|
|
|
|
{ 0 $ PCRE_CONFIG_UTF16 } member?
|
|
|
|
] unit-test
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [
|
2013-11-29 09:52:30 -05:00
|
|
|
[ PCRE_CONFIG_UTF32 pcre-config ] [ what>> ] recover
|
|
|
|
{ 0 $ PCRE_CONFIG_UTF32 } member?
|
|
|
|
] unit-test
|
2013-11-09 23:55:19 -05:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 33 }
|
2013-11-29 10:17:33 -05:00
|
|
|
[
|
|
|
|
[ "foo" <pcre> f 33 pcre-fullinfo ] [ what>> ] recover
|
|
|
|
] unit-test
|
|
|
|
|
2013-09-21 20:52:39 -04:00
|
|
|
! Tests for findall
|
2015-07-03 12:39:59 -04:00
|
|
|
{
|
2013-09-21 20:52:39 -04:00
|
|
|
{ { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
|
2015-07-03 12:39:59 -04:00
|
|
|
} [
|
2013-09-21 20:52:39 -04:00
|
|
|
"1999-01-12" iso-date <compiled-pcre> findall first
|
|
|
|
] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 3 } [
|
2013-09-21 20:52:39 -04:00
|
|
|
"2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
|
|
|
|
] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 5 } [ "abcdef" "[a-e]" findall length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 3 } [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 3 } [ "örjan är åtta" "[åäö]" findall length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 3 } [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 3 } [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { { f ", " } } { { f ", " } } { { f "." } } } } [
|
2013-09-23 10:10:11 -04:00
|
|
|
"Words, words, word." "\\W+" findall
|
|
|
|
] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { ", " ", " "." } } [
|
2013-09-22 15:48:16 -04:00
|
|
|
"Words, words, word." "\\W+" findall [ first second ] map
|
|
|
|
] unit-test
|
|
|
|
|
2013-09-21 20:52:39 -04:00
|
|
|
: long-string ( -- x )
|
|
|
|
10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
|
|
|
|
|
|
|
|
! Performance
|
2015-07-03 12:39:59 -04:00
|
|
|
{ 0 } [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2013-09-22 15:48:16 -04:00
|
|
|
! Empty matches, corner case behaviour is copied from pcredemo.c
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { { f "foo" } } { { f "" } } } }
|
2013-09-22 15:48:16 -04:00
|
|
|
[ "foo" ".*" findall ] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { { f "" } } { { f "" } } { { f "" } } } }
|
2013-09-22 15:48:16 -04:00
|
|
|
[ "foo" "B*" findall ] unit-test
|
|
|
|
|
2013-09-23 08:51:36 -04:00
|
|
|
! Empty matches in strings with multi-byte characters are tricky.
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } }
|
2013-09-23 10:10:11 -04:00
|
|
|
[ "öööö" "x*" findall ] unit-test
|
2013-09-23 08:51:36 -04:00
|
|
|
|
2013-09-21 20:52:39 -04:00
|
|
|
! Tests for matches?
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [ "örjan" "örjan" matches? ] unit-test
|
2013-09-21 20:52:39 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
|
2013-09-22 15:48:16 -04:00
|
|
|
|
2016-08-19 11:32:45 -04:00
|
|
|
! This used to work in 8.36, but might have changed in later versions.
|
|
|
|
! See: https://bugs.exim.org/show_bug.cgi?id=1875
|
2016-08-19 14:50:58 -04:00
|
|
|
version 8.36 <= [
|
2016-08-19 11:32:45 -04:00
|
|
|
{ t t } [
|
|
|
|
"(?s)." <compiled-pcre> PCRE_DOTALL has-option?
|
|
|
|
"(?i)x" <compiled-pcre> PCRE_CASELESS has-option?
|
|
|
|
] unit-test
|
|
|
|
] when
|
2013-09-22 15:48:16 -04:00
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ f } [ "\n" "." matches? ] unit-test
|
|
|
|
{ t } [ "\n" "(?s)." matches? ] unit-test
|
2013-09-22 15:48:16 -04:00
|
|
|
|
2016-08-19 11:32:45 -04:00
|
|
|
{ f t } [
|
|
|
|
"hello\nthere" "^.*$" <compiled-pcre> matches?
|
|
|
|
"hello\nthere" "(?s)^.*$" <compiled-pcre> matches?
|
|
|
|
] unit-test
|
|
|
|
|
|
|
|
! Modes off by default
|
|
|
|
{ f f } [
|
|
|
|
! Caseless mode
|
|
|
|
"x" <compiled-pcre> PCRE_CASELESS has-option?
|
|
|
|
! Dotall mode
|
|
|
|
"." <compiled-pcre> PCRE_DOTALL has-option?
|
2013-09-22 15:48:16 -04:00
|
|
|
] unit-test
|
|
|
|
|
|
|
|
! Backreferences
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { t f } } [
|
2013-09-22 15:48:16 -04:00
|
|
|
{ "response and responsibility" "sense and responsibility" }
|
|
|
|
[ "(sens|respons)e and \\1ibility" matches? ] map
|
|
|
|
] unit-test
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { t t f } } [
|
2013-09-22 15:48:16 -04:00
|
|
|
{ "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
|
|
|
|
] unit-test
|
|
|
|
|
|
|
|
! Splitting
|
2015-07-03 12:39:59 -04:00
|
|
|
{ { { "Words" "words" "word" } { "Words" "words" "word" } } } [
|
2013-09-22 15:48:16 -04:00
|
|
|
"Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
|
|
|
|
] unit-test
|
|
|
|
|
|
|
|
! Bigger tests
|
2015-07-03 12:39:59 -04:00
|
|
|
{ t } [
|
2013-09-22 15:48:16 -04:00
|
|
|
"http://factorcode.org/" http-get nip
|
|
|
|
"href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
|
|
|
|
] unit-test
|
2013-11-04 07:26:37 -05:00
|
|
|
|
|
|
|
! Test that the regexp syntax works.
|
2015-07-21 22:33:54 -04:00
|
|
|
{ t } [ "1234abcd" regexp:R/ ^\d+\w+$/ matches? ] unit-test
|