diff --git a/basis/regexp/ast/ast.factor b/basis/regexp/ast/ast.factor index 65748005f4..b804eacc09 100644 --- a/basis/regexp/ast/ast.factor +++ b/basis/regexp/ast/ast.factor @@ -13,7 +13,10 @@ C: from-to TUPLE: at-least n ; C: at-least -SINGLETON: epsilon +TUPLE: tagged-epsilon tag ; +C: tagged-epsilon + +CONSTANT: epsilon T{ tagged-epsilon } TUPLE: concatenation first second ; @@ -60,3 +63,10 @@ C: lookahead TUPLE: lookbehind term ; C: lookbehind + +TUPLE: possessive-star term ; +C: possessive-star + +: ( term -- term' ) + dup 2array ; + diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 6e68e9e0f6..0990ac786b 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -12,8 +12,7 @@ ascii-class punctuation-class java-printable-class blank-class control-character-class hex-digit-class java-blank-class c-identifier-class unmatchable-class terminator-class word-boundary-class ; -SINGLETONS: beginning-of-input beginning-of-line -end-of-input end-of-line ; +SINGLETONS: beginning-of-input ^ end-of-input $ ; TUPLE: range from to ; C: range @@ -100,10 +99,10 @@ M: unmatchable-class class-member? ( obj class -- ? ) M: terminator-class class-member? ( obj class -- ? ) drop "\r\n\u000085\u002029\u002028" member? ; -M: beginning-of-line class-member? ( obj class -- ? ) +M: ^ class-member? ( obj class -- ? ) 2drop f ; -M: end-of-line class-member? ( obj class -- ? ) +M: $ class-member? ( obj class -- ? ) 2drop f ; M: f class-member? 2drop f ; diff --git a/basis/regexp/combinators/combinators-tests.factor b/basis/regexp/combinators/combinators-tests.factor new file mode 100644 index 0000000000..dc6b5a6567 --- /dev/null +++ b/basis/regexp/combinators/combinators-tests.factor @@ -0,0 +1,29 @@ +! Copyright (C) 2009 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. +USING: regexp.combinators tools.test regexp kernel sequences ; +IN: regexp.combinators.tests + +: strings ( -- regexp ) + { "foo" "bar" "baz" } ; + +[ t t t ] [ "foo" "bar" "baz" [ strings matches? ] tri@ ] unit-test +[ f f f ] [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test + +: conj ( -- regexp ) + { R/ .*a/ R/ b.*/ } ; + +[ t ] [ "bljhasflsda" conj matches? ] unit-test +[ f ] [ "bsdfdfs" conj matches? ] unit-test ! why does this fail? +[ f ] [ "fsfa" conj matches? ] unit-test + +! For some reason, creating this DFA doesn't work +! [ f ] [ "bljhasflsda" conj matches? ] unit-test +! [ t ] [ "bsdfdfs" conj matches? ] unit-test +! [ t ] [ "fsfa" conj matches? ] unit-test + +[ f f ] [ "" "hi" [ matches? ] bi@ ] unit-test +[ t t ] [ "" "hi" [ matches? ] bi@ ] unit-test + +[ { t t t f } ] [ { "" "a" "aaaaa" "aab" } [ "a" matches? ] map ] unit-test +[ { f t t f } ] [ { "" "a" "aaaaa" "aab" } [ "a" matches? ] map ] unit-test +[ { t t f f } ] [ { "" "a" "aaaaa" "aab" } [ "a"