add more unit tests, add match-head
parent
39950b9884
commit
c1d961748b
|
@ -131,6 +131,116 @@ IN: regexp4-tests
|
|||
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
|
||||
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "\\." <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
|
||||
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
|
||||
|
||||
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
|
||||
[ ] [
|
||||
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
||||
<regexp> drop
|
||||
] unit-test
|
||||
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
|
||||
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
|
||||
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
[ t ] [
|
||||
"a"
|
||||
R' a'
|
||||
matches?
|
||||
] unit-test
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
! ((A)(B(C)))
|
||||
! 1. ((A)(B(C)))
|
||||
|
|
|
@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math
|
|||
sequences namespaces locals combinators.lib state-tables
|
||||
math.parser state-parser sets dlists unicode.categories
|
||||
math.order quotations shuffle math.ranges splitting
|
||||
symbols fry ;
|
||||
symbols fry parser ;
|
||||
IN: regexp4
|
||||
|
||||
SYMBOLS: eps start-state final-state beginning-of-text
|
||||
|
@ -544,6 +544,33 @@ ERROR: unsupported-token token ;
|
|||
<vector-table> >>nfa
|
||||
dup [ parse-raw-regexp ] [ subset-construction ] bi ;
|
||||
|
||||
! Literal syntax for regexps
|
||||
: parse-options ( string -- ? )
|
||||
#! Lame
|
||||
{
|
||||
{ "" [ f ] }
|
||||
{ "i" [ t ] }
|
||||
} case ;
|
||||
|
||||
: parse-regexp ( accum end -- accum )
|
||||
lexer get dup skip-blank
|
||||
[ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column
|
||||
! lexer get dup still-parsing-line?
|
||||
! [ (parse-token) parse-options ] [ drop f ] if
|
||||
<regexp> parsed ;
|
||||
|
||||
: R! CHAR: ! parse-regexp ; parsing
|
||||
: R" CHAR: " parse-regexp ; parsing
|
||||
: R# CHAR: # parse-regexp ; parsing
|
||||
: R' CHAR: ' parse-regexp ; parsing
|
||||
: R( CHAR: ) parse-regexp ; parsing
|
||||
: R/ CHAR: / parse-regexp ; parsing
|
||||
: R@ CHAR: @ parse-regexp ; parsing
|
||||
: R[ CHAR: ] parse-regexp ; parsing
|
||||
: R` CHAR: ` parse-regexp ; parsing
|
||||
: R{ CHAR: } parse-regexp ; parsing
|
||||
: R| CHAR: | parse-regexp ; parsing
|
||||
|
||||
TUPLE: dfa-traverser
|
||||
dfa
|
||||
last-state current-state
|
||||
|
@ -611,6 +638,9 @@ TUPLE: dfa-traverser
|
|||
: matches? ( string regexp -- ? )
|
||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end )
|
||||
match length>> ;
|
||||
|
||||
! character classes
|
||||
! TUPLE: range-class from to ;
|
||||
! TUPLE: or-class left right ;
|
||||
|
|
Loading…
Reference in New Issue