From e70748f8f10a2c5ea5a02e9facbd4650b73dbbdd Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 19:39:35 -0500 Subject: [PATCH] Redoing class algebra so conjunction works --- basis/regexp/classes/classes-tests.factor | 8 +- basis/regexp/classes/classes.factor | 170 ++++++++++-------- .../combinators/combinators-tests.factor | 4 - basis/regexp/minimize/minimize-tests.factor | 2 +- 4 files changed, 101 insertions(+), 83 deletions(-) diff --git a/basis/regexp/classes/classes-tests.factor b/basis/regexp/classes/classes-tests.factor index 2deb944b61..e2db86f6c1 100644 --- a/basis/regexp/classes/classes-tests.factor +++ b/basis/regexp/classes/classes-tests.factor @@ -6,7 +6,7 @@ IN: regexp.classes.tests ! Class algebra [ f ] [ { 1 2 } ] unit-test -[ T{ or-class f { 2 1 } } ] [ { 1 2 } ] unit-test +[ T{ or-class f { 1 2 } } ] [ { 1 2 } ] unit-test [ 3 ] [ { 1 2 } 3 2array ] unit-test [ CHAR: A ] [ CHAR: A LETTER-class 2array ] unit-test [ CHAR: A ] [ LETTER-class CHAR: A 2array ] unit-test @@ -26,11 +26,13 @@ IN: regexp.classes.tests [ t ] [ { t t } ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test -[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } { 2 3 } 2array ] unit-test -[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } 1 2array ] unit-test +[ T{ or-class { seq { 1 2 3 } } } ] [ { 1 2 } { 2 3 } 2array ] unit-test +[ T{ or-class { seq { 2 3 } } } ] [ { 2 3 } 1 2array ] unit-test [ f ] [ t ] unit-test [ t ] [ f ] unit-test [ f ] [ 1 1 t answer ] unit-test +[ t ] [ { 1 2 } 1 2 3array ] unit-test +[ f ] [ { 1 2 } 1 2 3array ] unit-test ! Making classes into nested conditionals diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 1959a91cb5..d26ff7f69c 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel math math.order words combinators locals ascii unicode.categories combinators.short-circuit sequences -fry macros arrays assocs sets classes ; +fry macros arrays assocs sets classes mirrors ; IN: regexp.classes SINGLETONS: any-char any-char-no-nl @@ -110,97 +110,116 @@ M: f class-member? 2drop f ; TUPLE: primitive-class class ; C: primitive-class +TUPLE: not-class class ; + +PREDICATE: not-integer < not-class class>> integer? ; +PREDICATE: not-primitive < not-class class>> primitive-class? ; + +M: not-class class-member? + class>> class-member? not ; + TUPLE: or-class seq ; -TUPLE: not-class class ; +M: or-class class-member? + seq>> [ class-member? ] with any? ; TUPLE: and-class seq ; -GENERIC: combine-and ( class1 class2 -- combined ? ) +M: and-class class-member? + seq>> [ class-member? ] with all? ; -: replace-if-= ( object object -- object ? ) - over = ; - -M: object combine-and replace-if-= ; - -M: t combine-and - drop t ; - -M: f combine-and - nip t ; - -M: not-class combine-and - class>> 2dup = [ 2drop f t ] [ - dup integer? [ - 2dup swap class-member? - [ 2drop f f ] - [ drop t ] if - ] [ 2drop f f ] if - ] if ; - -M: integer combine-and - swap 2dup class-member? [ drop t ] [ 2drop f t ] if ; - -GENERIC: combine-or ( class1 class2 -- combined ? ) - -M: object combine-or replace-if-= ; - -M: t combine-or - nip t ; - -M: f combine-or - drop t ; - -M: not-class combine-or - class>> = [ t t ] [ f f ] if ; - -M: integer combine-or - 2dup swap class-member? [ drop t ] [ 2drop f f ] if ; +DEFER: substitute : flatten ( seq class -- newseq ) '[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline -: try-combine ( elt1 elt2 quot -- combined/f ? ) - 3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline - -DEFER: answer - -:: try-cancel ( elt1 elt2 empty -- combined/f ? ) - [ elt1 elt2 empty answer dup elt1 = not ] try-combine ; - -:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq ) - f :> combined! - seq [ elt quot call swap combined! ] find drop - [ seq remove-nth combined prefix ] - [ seq elt prefix ] if* ; inline - -: combine-by ( seq quot -- new-seq ) - { } swap '[ _ prefix-combining ] reduce ; inline - :: seq>instance ( seq empty class -- instance ) seq length { { 0 [ empty ] } { 1 [ seq first ] } - [ drop class new seq >>seq ] + [ drop class new seq { } like >>seq ] } case ; inline -:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq ) - seq class flatten - [ quot try-combine ] combine-by - ! [ empty try-cancel ] combine-by ! This makes the algorithm O(n^4) - empty class seq>instance ; inline +TUPLE: class-partition integers not-integers primitives not-primitives and or other ; + +: partition-classes ( seq -- class-partition ) + prune + [ integer? ] partition + [ not-integer? ] partition + [ primitive-class? ] partition ! extend primitive-class to epsilon tags + [ not-primitive? ] partition + [ and-class? ] partition + [ or-class? ] partition + class-partition boa ; + +: class-partition>seq ( class-partition -- seq ) + make-mirror values concat ; + +: repartition ( partition -- partition' ) + ! This could be made more efficient; only and and or are effected + class-partition>seq partition-classes ; + +: filter-not-integers ( partition -- partition' ) + dup + [ primitives>> ] [ not-primitives>> ] [ or>> ] tri + 3append and-class boa + '[ [ class>> _ class-member? ] filter ] change-not-integers ; + +: answer-ors ( partition -- partition' ) + dup [ not-integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + '[ [ _ [ t substitute ] each ] map ] change-or ; + +: contradiction? ( partition -- ? ) + { + [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ other>> f swap member? ] + } 1|| ; + +: make-and-class ( partition -- and-class ) + answer-ors repartition + [ t swap remove ] change-other + dup contradiction? + [ drop f ] + [ filter-not-integers class-partition>seq prune t and-class seq>instance ] if ; : ( seq -- class ) - [ combine-and ] t and-class combine ; + dup and-class flatten partition-classes + dup integers>> length { + { 0 [ nip make-and-class ] } + { 1 [ integers>> first [ '[ _ swap class-member? ] all? ] keep and ] } + [ 3drop f ] + } case ; -M: and-class class-member? - seq>> [ class-member? ] with all? ; +: filter-integers ( partition -- partition' ) + dup + [ primitives>> ] [ not-primitives>> ] [ and>> ] tri + 3append or-class boa + '[ [ _ class-member? not ] filter ] change-integers ; + +: answer-ands ( partition -- partition' ) + dup [ integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + '[ [ _ [ f substitute ] each ] map ] change-and ; + +: tautology? ( partition -- ? ) + { + [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ other>> t swap member? ] + } 1|| ; + +: make-or-class ( partition -- and-class ) + answer-ands repartition + [ f swap remove ] change-other + dup tautology? + [ drop t ] + [ filter-integers class-partition>seq prune f or-class seq>instance ] if ; : ( seq -- class ) - [ combine-or ] f or-class combine ; - -M: or-class class-member? - seq>> [ class-member? ] with any? ; + dup or-class flatten partition-classes + dup not-integers>> length { + { 0 [ nip make-or-class ] } + { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] } + [ 3drop t ] + } case ; GENERIC: ( class -- inverse ) @@ -219,9 +238,6 @@ M: or-class M: t drop f ; M: f drop t ; -M: not-class class-member? - class>> class-member? not ; - M: primitive-class class-member? class>> class-member? ; @@ -247,8 +263,12 @@ M: or-class answer M: not-class answer [ class>> ] 2dip answer ; +GENERIC# substitute 1 ( class from to -- new-class ) +M: object substitute answer ; +M: not-class substitute [ ] bi@ answer ; + : assoc-answer ( table question answer -- new-table ) - '[ _ _ answer ] assoc-map + '[ _ _ substitute ] assoc-map [ nip ] assoc-filter ; : assoc-answers ( table questions answer -- new-table ) diff --git a/basis/regexp/combinators/combinators-tests.factor b/basis/regexp/combinators/combinators-tests.factor index ddfd0dcaad..85fa190bfe 100644 --- a/basis/regexp/combinators/combinators-tests.factor +++ b/basis/regexp/combinators/combinators-tests.factor @@ -9,9 +9,6 @@ IN: regexp.combinators.tests [ t t t ] [ "foo" "bar" "baz" [ strings matches? ] tri@ ] unit-test [ f f f ] [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test -USE: multiline -/* -! Why is conjuction broken? : conj ( -- regexp ) { R' .*a' R' b.*' } ; @@ -22,7 +19,6 @@ USE: multiline [ f ] [ "bljhasflsda" conj matches? ] unit-test [ t ] [ "bsdfdfs" conj matches? ] unit-test [ t ] [ "fsfa" conj matches? ] unit-test -*/ [ f f ] [ "" "hi" [ matches? ] bi@ ] unit-test [ t t ] [ "" "hi" [ matches? ] bi@ ] unit-test diff --git a/basis/regexp/minimize/minimize-tests.factor b/basis/regexp/minimize/minimize-tests.factor index a7a9b50327..17a1d51b88 100644 --- a/basis/regexp/minimize/minimize-tests.factor +++ b/basis/regexp/minimize/minimize-tests.factor @@ -54,5 +54,5 @@ IN: regexp.minimize.tests [ [ ] [ ] while-changes ] must-infer -[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ] +[ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } ] [ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test