add more unit tests, add match-head

db4
erg 2008-05-21 18:36:01 -05:00
parent 39950b9884
commit c1d961748b
2 changed files with 141 additions and 1 deletions

View File

@ -131,6 +131,116 @@ IN: regexp4-tests
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
[ t ] [ "." "\\." <regexp> matches? ] unit-test
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
[ ] [
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
<regexp> drop
] unit-test
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
! Bug in parsing word
[ t ] [
"a"
R' a'
matches?
] unit-test
! ((A)(B(C)))
! 1. ((A)(B(C)))

View File

@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math
sequences namespaces locals combinators.lib state-tables
math.parser state-parser sets dlists unicode.categories
math.order quotations shuffle math.ranges splitting
symbols fry ;
symbols fry parser ;
IN: regexp4
SYMBOLS: eps start-state final-state beginning-of-text
@ -544,6 +544,33 @@ ERROR: unsupported-token token ;
<vector-table> >>nfa
dup [ parse-raw-regexp ] [ subset-construction ] bi ;
! Literal syntax for regexps
: parse-options ( string -- ? )
#! Lame
{
{ "" [ f ] }
{ "i" [ t ] }
} case ;
: parse-regexp ( accum end -- accum )
lexer get dup skip-blank
[ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column
! lexer get dup still-parsing-line?
! [ (parse-token) parse-options ] [ drop f ] if
<regexp> parsed ;
: R! CHAR: ! parse-regexp ; parsing
: R" CHAR: " parse-regexp ; parsing
: R# CHAR: # parse-regexp ; parsing
: R' CHAR: ' parse-regexp ; parsing
: R( CHAR: ) parse-regexp ; parsing
: R/ CHAR: / parse-regexp ; parsing
: R@ CHAR: @ parse-regexp ; parsing
: R[ CHAR: ] parse-regexp ; parsing
: R` CHAR: ` parse-regexp ; parsing
: R{ CHAR: } parse-regexp ; parsing
: R| CHAR: | parse-regexp ; parsing
TUPLE: dfa-traverser
dfa
last-state current-state
@ -611,6 +638,9 @@ TUPLE: dfa-traverser
: matches? ( string regexp -- ? )
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
: match-head ( string regexp -- end )
match length>> ;
! character classes
! TUPLE: range-class from to ;
! TUPLE: or-class left right ;