handle dotall mode -- . matches newlines when it's on. dotall mode is off by default. rearrange unit tests a bit

db4
Doug Coleman 2008-11-22 21:04:09 -06:00
parent 75c32bde5a
commit ba680fae6f
3 changed files with 43 additions and 42 deletions

View File

@ -14,6 +14,9 @@ M: character-class-range class-member? ( obj class -- ? )
M: any-char class-member? ( obj class -- ? )
2drop t ;
M: any-char-no-nl class-member? ( obj class -- ? )
drop CHAR: \n = not ;
M: letter-class class-member? ( obj class -- ? )
drop letter? ;

View File

@ -43,6 +43,7 @@ INSTANCE: comment-group parentheses-group
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
SINGLETON: epsilon INSTANCE: epsilon node
SINGLETON: any-char INSTANCE: any-char node
SINGLETON: any-char-no-nl INSTANCE: any-char-no-nl node
SINGLETON: front-anchor INSTANCE: front-anchor node
SINGLETON: back-anchor INSTANCE: back-anchor node
@ -172,7 +173,7 @@ DEFER: (parse-regexp)
[ drop1 (parse-special-group) ]
[ capture-group f nested-parse-regexp ] if ;
: handle-dot ( -- ) any-char push-stack ;
: handle-dot ( -- ) get-dotall any-char any-char-no-nl ? push-stack ;
: handle-pipe ( -- ) pipe push-stack ;
: (handle-star) ( obj -- kleene-star )
peek1 {

View File

@ -40,7 +40,11 @@ IN: regexp-tests
[ f ] [ "" "." <regexp> matches? ] unit-test
[ t ] [ "a" "." <regexp> matches? ] unit-test
[ t ] [ "." "." <regexp> matches? ] unit-test
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
! Dotall mode -- when on, . matches newlines.
! Off by default.
[ f ] [ "\n" "." <regexp> matches? ] unit-test
[ t ] [ "\n" "(?s)." <regexp> matches? ] unit-test
[ f ] [ "" ".+" <regexp> matches? ] unit-test
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
@ -170,7 +174,6 @@ IN: regexp-tests
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
!
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
@ -252,7 +255,40 @@ IN: regexp-tests
! Comment
[ t ] [ "ac" "a(?#boo)c" <regexp> matches? ] unit-test
[ ] [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval ] unit-test
[ ] [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval ] unit-test
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
[ t ] [ "a:b" ".+:?" <regexp> matches? ] unit-test
[ 1 ] [ "hello" ".+?" <regexp> match length ] unit-test
[ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
[ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
[ { "ABC" "DEF" "GHI" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
[ "1.2.3.4" ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match >string ] unit-test
! [ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
@ -286,21 +322,10 @@ IN: regexp-tests
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
[ ] [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval ] unit-test
[ ] [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval ] unit-test
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
! Bug in parsing word
! [ t ] [ "a" R' a' matches? ] unit-test
! ((A)(B(C)))
! 1. ((A)(B(C)))
! 2. (A)
! 3. (B(C))
! 4. (C)
! clear "a(?=b*)" <regexp> "ab" over match
! clear "a(?=b*c)" <regexp> "abbbbbc" over match
! clear "a(?=b*)" <regexp> "ab" over match
@ -327,38 +352,10 @@ IN: regexp-tests
! "a(?:bcdefg)" <regexp> "abcdefg" over first-match
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
! "a(?<=b)" <regexp> "caba" over first-match
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match >string ] unit-test
! capture group 1: "aaaa" 2: ""
! "aaaa" "(a*)(a*)" <regexp> match*
! "aaaa" "(a*)(a+)" <regexp> match*
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
[ t ] [ "a:b" ".+:?" <regexp> matches? ] unit-test
[ 1 ] [ "hello" ".+?" <regexp> match length ] unit-test
[ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
[ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
[ { "ABC" "DEF" "GHI" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
[ "1.2.3.4" ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test