Fixing help-lint for regexp; adding first-match and re-contains?

db4
Daniel Ehrenberg 2009-03-10 19:34:49 -05:00
parent 8836b2a73b
commit e2fda2e227
3 changed files with 46 additions and 32 deletions

View File

@ -39,13 +39,14 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ; "The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions" ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
{ $subsection all-matches }
{ $subsection matches? } { $subsection matches? }
{ $subsection re-contains? }
{ $subsection first-match }
{ $subsection all-matches }
{ $subsection re-split1 } { $subsection re-split1 }
{ $subsection re-split } { $subsection re-split }
{ $subsection re-replace } { $subsection re-replace }
{ $subsection count-matches } { $subsection count-matches } ;
{ $subsection re-replace } ;
HELP: <regexp> HELP: <regexp>
{ $values { "string" string } { "regexp" regexp } } { $values { "string" string } { "regexp" regexp } }
@ -63,25 +64,33 @@ HELP: regexp
{ $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ; { $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ;
HELP: matches? HELP: matches?
{ $values { "string" string } { "matcher" regexp } { "?" "a boolean" } } { $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
{ $description "Tests if the string as a whole matches the given regular expression." } ; { $description "Tests if the string as a whole matches the given regular expression." } ;
HELP: re-split1 HELP: re-split1
{ $values { "string" string } { "matcher" regexp } { "before" string } { "after/f" string } } { $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ; { $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
HELP: all-matches HELP: all-matches
{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } } { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
{ $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ; { $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
HELP: count-matches HELP: count-matches
{ $values { "string" string } { "matcher" regexp } { "n" integer } } { $values { "string" string } { "regexp" regexp } { "n" integer } }
{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ; { $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
HELP: re-split HELP: re-split
{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } } { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ; { $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
HELP: re-replace HELP: re-replace
{ $values { "string" string } { "matcher" regexp } { "replacement" string } { "result" string } } { $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ; { $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
HELP: first-match
{ $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
{ $description "Finds the first match of the regular expression in the string, and returns it as a slice. If there is no match, then " { $link f } " is returned." } ;
HELP: re-contains?
{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
{ $description "Determines whether the string has a substring which matches the regular expression given." } ;

View File

@ -211,8 +211,8 @@ IN: regexp-tests
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test [ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test [ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
[ "aaa" ] [ "aaacb" "a*" <regexp> match-head >string ] unit-test [ "aaa" ] [ "aaacb" "a*" <regexp> first-match >string ] unit-test
[ "aa" ] [ "aaacb" "aa?" <regexp> match-head >string ] unit-test [ "aa" ] [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
[ t ] [ "aaa" R/ AAA/i matches? ] unit-test [ t ] [ "aaa" R/ AAA/i matches? ] unit-test
[ f ] [ "aax" R/ AAA/i matches? ] unit-test [ f ] [ "aax" R/ AAA/i matches? ] unit-test
@ -268,13 +268,13 @@ IN: regexp-tests
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test [ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test [ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test [ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test [ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test [ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test [ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
[ { "1" "2" "3" "4" } ] [ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
@ -300,18 +300,18 @@ IN: regexp-tests
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test [ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test [ "" ] [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test [ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test [ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test [ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test [ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test [ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> first-match >string ] unit-test
[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test [ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> first-match >string ] unit-test
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head length ] unit-test [ 3 ] [ "foobar" "foo(?=bar)" <regexp> first-match length ] unit-test
[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test [ f ] [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
! Bug in parsing word ! Bug in parsing word
[ t ] [ "a" R' a' matches? ] unit-test [ t ] [ "a" R' a' matches? ] unit-test
@ -424,8 +424,12 @@ IN: regexp-tests
[ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test [ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
[ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test [ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
[ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test [ f ] [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
[ 3 ] [ "foo" "foo\\z" <regexp> match-head length ] unit-test [ 3 ] [ "foo" "foo\\z" <regexp> first-match length ] unit-test
[ t ] [ "a foo b" R/ foo/ re-contains? ] unit-test
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test ! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test ! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test

View File

@ -89,16 +89,17 @@ PRIVATE>
slices [ from>> ] map string length suffix slices [ from>> ] map string length suffix
[ string <slice> ] 2map ; [ string <slice> ] 2map ;
: match-head ( str regexp -- slice/f )
[
[ 0 ] [ check-string ] [ dup dfa>> '[ _ _ execute ] ] tri*
match-from
] call( str regexp -- slice/f ) ;
PRIVATE> PRIVATE>
: first-match ( string regexp -- slice/f )
[ 0 ] [ check-string ] [ ] tri*
do-next-match nip ;
: re-contains? ( string regexp -- ? )
first-match >boolean ;
: re-split1 ( string regexp -- before after/f ) : re-split1 ( string regexp -- before after/f )
dupd match-head [ 1array split-slices first2 ] [ f ] if* ; dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
: re-split ( string regexp -- seq ) : re-split ( string regexp -- seq )
dupd all-matches split-slices ; dupd all-matches split-slices ;