diff --git a/basis/regexp/classes/classes-tests.factor b/basis/regexp/classes/classes-tests.factor index 2253cd999a..9a210fb576 100644 --- a/basis/regexp/classes/classes-tests.factor +++ b/basis/regexp/classes/classes-tests.factor @@ -27,20 +27,23 @@ IN: regexp.classes.tests [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test [ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } { 2 3 } 2array ] unit-test +[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } 1 2array ] unit-test +[ f ] [ t ] unit-test +[ t ] [ f ] unit-test ! Making classes into nested conditionals [ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test -[ { 3 } ] [ { { t 3 } } table>condition ] unit-test -[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test -[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test -[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test -[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test +[ { 3 } ] [ { { 3 t } } table>condition ] unit-test +[ { T{ primitive-class } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>questions ] unit-test +[ { { 1 t } { 2 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } t answer ] unit-test +[ { { 1 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } f answer ] unit-test +[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>condition ] unit-test SYMBOL: foo SYMBOL: bar -[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test +[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 2 T{ primitive-class f foo } } { 3 T{ primitive-class f bar } } } table>condition ] unit-test [ t ] [ foo dup t replace-question ] unit-test [ f ] [ foo dup f replace-question ] unit-test diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 229197e507..f8fce02213 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel math math.order words combinators locals ascii unicode.categories combinators.short-circuit sequences -fry macros arrays assocs sets ; +fry macros arrays assocs sets classes ; IN: regexp.classes SINGLETONS: any-char any-char-no-nl @@ -130,7 +130,13 @@ M: f combine-and nip t ; M: not-class combine-and - class>> = [ f t ] [ f f ] if ; + class>> 2dup = [ 2drop f t ] [ + dup integer? [ + 2dup swap class-member? + [ 2drop f f ] + [ drop t ] if + ] [ 2drop f f ] if + ] if ; M: integer combine-and swap 2dup class-member? [ drop t ] [ 2drop f t ] if ; @@ -151,9 +157,6 @@ M: not-class combine-or M: integer combine-or 2dup swap class-member? [ drop t ] [ 2drop f f ] if ; -MACRO: instance? ( class -- ? ) - "predicate" word-prop ; - : flatten ( seq class -- newseq ) '[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline @@ -201,6 +204,9 @@ M: and-class M: or-class seq>> [ ] map ; +M: t drop f ; +M: f drop t ; + M: not-class class-member? class>> class-member? not ; @@ -230,8 +236,8 @@ M: not-class replace-question class>> replace-question ; : answer ( table question answer -- new-table ) - '[ [ _ _ replace-question ] dip ] assoc-map - [ drop ] assoc-filter ; + '[ _ _ replace-question ] assoc-map + [ nip ] assoc-filter ; DEFER: make-condition @@ -242,7 +248,7 @@ DEFER: make-condition 2dup = [ 2nip ] [ ] if ; : make-condition ( table questions -- condition ) - [ values ] [ unclip (make-condition) ] if-empty ; + [ keys ] [ unclip (make-condition) ] if-empty ; GENERIC: class>questions ( class -- questions ) : compound-questions ( class -- questions ) seq>> [ class>questions ] gather ; @@ -252,9 +258,10 @@ M: not-class class>questions class>> class>questions ; M: object class>questions 1array ; : table>questions ( table -- questions ) - keys class>questions t swap remove ; + values class>questions t swap remove ; : table>condition ( table -- condition ) + ! input table is state => class >alist dup table>questions make-condition ; : condition-map ( condition quot: ( obj -- obj' ) -- new-condition ) diff --git a/basis/regexp/compiler/compiler.factor b/basis/regexp/compiler/compiler.factor index 88fc415b42..30c9a5a5cb 100644 --- a/basis/regexp/compiler/compiler.factor +++ b/basis/regexp/compiler/compiler.factor @@ -18,9 +18,13 @@ IN: regexp.compiler [ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty ] if ; -: non-literals>dispatch ( non-literal-transitions -- quot ) +: new-non-literals>dispatch ( non-literal-transitions -- quot ) table>condition condition>quot ; +: non-literals>dispatch ( non-literal-transitions -- quot ) + [ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map + [ 3drop ] suffix '[ _ cond ] ; + : expand-one-or ( or-class transition -- alist ) [ seq>> ] dip '[ _ 2array ] map ; diff --git a/basis/regexp/dfa/dfa.factor b/basis/regexp/dfa/dfa.factor index f05f5d5c7f..6ddc0396a7 100644 --- a/basis/regexp/dfa/dfa.factor +++ b/basis/regexp/dfa/dfa.factor @@ -29,7 +29,7 @@ IN: regexp.dfa '[ _ _ t epsilon-loop ] each ; : find-epsilon-closure ( states nfa -- dfa-state ) - epsilon-table [ swap ] assoc-map table>condition ; + epsilon-table table>condition ; : find-closure ( states transition nfa -- new-states ) [ find-delta ] keep find-epsilon-closure ; @@ -59,18 +59,13 @@ IN: regexp.dfa nfa dfa new-states visited-states new-transitions ] if-empty ; -: states ( hashtable -- array ) - [ keys ] - [ values [ values concat ] map concat ] bi - append ; - : set-final-states ( nfa dfa -- ) [ [ final-states>> keys ] - [ transitions>> states ] bi* + [ transitions>> keys ] bi* [ intersects? ] with filter - ] [ final-states>> ] bi - [ conjoin ] curry each ; + unique + ] keep (>>final-states) ; : initialize-dfa ( nfa -- dfa ) diff --git a/basis/regexp/minimize/minimize-tests.factor b/basis/regexp/minimize/minimize-tests.factor index c5564caa55..8cbfaf4a71 100644 --- a/basis/regexp/minimize/minimize-tests.factor +++ b/basis/regexp/minimize/minimize-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: tools.test regexp.minimize assocs regexp -accessors regexp.transition-tables ; +accessors regexp.transition-tables regexp.parser regexp.negation ; IN: regexp.minimize.tests [ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test @@ -13,13 +13,16 @@ IN: regexp.minimize.tests [ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test -[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test -[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test -[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test -[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test -[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test -[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test -[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test +: regexp-states ( string -- n ) + parse-regexp ast>dfa transitions>> assoc-size ; + +[ 3 ] [ "ab|ac" regexp-states ] unit-test +[ 3 ] [ "a(b|c)" regexp-states ] unit-test +[ 1 ] [ "((aa*)*)*" regexp-states ] unit-test +[ 1 ] [ "a|((aa*)*)*" regexp-states ] unit-test +[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test +[ 4 ] [ "ab|cd" regexp-states ] unit-test +[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test [ T{ transition-table diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index ab091a7682..1bd242315f 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -48,7 +48,7 @@ C: reverse-matcher ] change-reverse-dfa ; M: regexp match-index-from ( string regexp -- index/f ) - compile-regexp dfa-quot>> match-index-from ; + compile-regexp dfa>> match-index-from ; M: reverse-matcher match-index-from ( string regexp -- index/f ) [ ] [ regexp>> compile-reverse reverse-dfa>> ] bi* @@ -81,7 +81,7 @@ M: reverse-matcher match-index-from ( string regexp -- index/f ) : parsing-regexp ( accum end -- accum ) lexer get [ take-until ] [ parse-noblank-token ] bi - compile-dfa-quot parsed ; + compile-regexp parsed ; PRIVATE>