Fixing help-lint for regexp; adding first-match and re-contains?

2009-03-10 19:34:49 -05:00 · 2009-03-10 19:34:49 -05:00 · e2fda2e227
parent 8836b2a73b
commit e2fda2e227
3 changed files with 46 additions and 32 deletions
--- a/basis/regexp/regexp-docs.factor
+++ b/basis/regexp/regexp-docs.factor
@ -39,13 +39,14 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
 "The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
 ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
 { $subsection all-matches }
 { $subsection matches? }
 { $subsection re-contains? }
 { $subsection first-match }
 { $subsection all-matches }
 { $subsection re-split1 }
 { $subsection re-split }
 { $subsection re-replace }
-{ $subsection count-matches }
+{ $subsection count-matches } ;
 { $subsection re-replace } ;
 HELP: <regexp>
 { $values { "string" string } { "regexp" regexp } }
@ -63,25 +64,33 @@ HELP: regexp
 { $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ;
 HELP: matches?
-{ $values { "string" string } { "matcher" regexp } { "?" "a boolean" } }
+{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
 { $description "Tests if the string as a whole matches the given regular expression." } ;
 HELP: re-split1
-{ $values { "string" string } { "matcher" regexp } { "before" string } { "after/f" string } }
+{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
 { $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
 HELP: all-matches
-{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
+{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
 { $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
 HELP: count-matches
-{ $values { "string" string } { "matcher" regexp } { "n" integer } }
+{ $values { "string" string } { "regexp" regexp } { "n" integer } }
 { $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
 HELP: re-split
-{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
+{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
 { $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
 HELP: re-replace
-{ $values { "string" string } { "matcher" regexp } { "replacement" string } { "result" string } }
+{ $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
 { $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
 HELP: first-match
 { $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
 { $description "Finds the first match of the regular expression in the string, and returns it as a slice. If there is no match, then " { $link f } " is returned." } ;
 HELP: re-contains?
 { $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
 { $description "Determines whether the string has a substring which matches the regular expression given." } ;
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -211,8 +211,8 @@ IN: regexp-tests
 [ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
 [ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
-[ "aaa" ] [ "aaacb" "a*" <regexp> match-head >string ] unit-test
+[ "aaa" ] [ "aaacb" "a*" <regexp> first-match >string ] unit-test
-[ "aa" ] [ "aaacb" "aa?" <regexp> match-head >string ] unit-test
+[ "aa" ] [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
 [ t ] [ "aaa" R/ AAA/i matches? ] unit-test
 [ f ] [ "aax" R/ AAA/i matches? ] unit-test
@ -268,13 +268,13 @@ IN: regexp-tests
 [ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
-[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
+[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
-[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
+[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
-[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
+[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
-[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
+[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
-[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
+[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
 [ { "1" "2" "3" "4" } ]
 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
@ -300,18 +300,18 @@ IN: regexp-tests
 [ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
-[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
+[ "" ] [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
-[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
 [ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
 [ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
 [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
-[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
-[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test
+[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> first-match >string ] unit-test
-[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> first-match >string ] unit-test
-[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head length ] unit-test
+[ 3 ] [ "foobar" "foo(?=bar)" <regexp> first-match length ] unit-test
-[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+[ f ] [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
 ! Bug in parsing word
 [ t ] [ "a" R' a' matches? ] unit-test
@ -424,8 +424,12 @@ IN: regexp-tests
 [ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
 [ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
-[ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
+[ f ] [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
-[ 3 ] [ "foo" "foo\\z" <regexp> match-head length ] unit-test
+[ 3 ] [ "foo" "foo\\z" <regexp> first-match length ] unit-test
 [ t ] [ "a foo b" R/ foo/ re-contains? ] unit-test
 [ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
 [ t ] [ "foo" R/ foo/ re-contains? ] unit-test
 ! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
 ! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -89,16 +89,17 @@ PRIVATE>
    slices [ from>> ] map string length suffix
    [ string <slice> ] 2map ;
 : match-head ( str regexp -- slice/f )
    [
        [ 0 ] [ check-string ] [ dup dfa>> '[ _ _ execute ] ] tri*
        match-from
    ] call( str regexp -- slice/f ) ;
 PRIVATE>
 : first-match ( string regexp -- slice/f )
    [ 0 ] [ check-string ] [ ] tri*
    do-next-match nip ;
 : re-contains? ( string regexp -- ? )
    first-match >boolean ;
 : re-split1 ( string regexp -- before after/f )
-    dupd match-head [ 1array split-slices first2 ] [ f ] if* ;
+    dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
 : re-split ( string regexp -- seq )
    dupd all-matches split-slices ;