From 5cd1c8db525c38c312a00021fab843ee7a1809ae Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Sun, 8 Mar 2009 22:34:11 -0500 Subject: [PATCH] Fixing regexp minimization --- basis/regexp/minimize/minimize-tests.factor | 3 --- basis/regexp/minimize/minimize.factor | 26 ++++++++++++--------- basis/regexp/negation/negation.factor | 3 --- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/basis/regexp/minimize/minimize-tests.factor b/basis/regexp/minimize/minimize-tests.factor index 9c9f61c33c..a7a9b50327 100644 --- a/basis/regexp/minimize/minimize-tests.factor +++ b/basis/regexp/minimize/minimize-tests.factor @@ -14,8 +14,6 @@ IN: regexp.minimize.tests [ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test -USE: multiline -/* : regexp-states ( string -- n ) parse-regexp ast>dfa transitions>> assoc-size ; @@ -26,7 +24,6 @@ USE: multiline [ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test [ 4 ] [ "ab|cd" regexp-states ] unit-test [ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test -*/ [ T{ transition-table diff --git a/basis/regexp/minimize/minimize.factor b/basis/regexp/minimize/minimize.factor index e0e1585c11..bdb53c51cb 100644 --- a/basis/regexp/minimize/minimize.factor +++ b/basis/regexp/minimize/minimize.factor @@ -11,8 +11,8 @@ IN: regexp.minimize : number-states ( table -- newtable ) dup table>state-numbers transitions-at ; -: has-conditions? ( state transitions -- ? ) - at values [ condition? ] any? ; +: has-conditions? ( assoc -- ? ) + values [ condition? ] any? ; : initially-same? ( s1 s2 transition-table -- ? ) { @@ -24,9 +24,7 @@ IN: regexp.minimize :: initialize-partitions ( transition-table -- partitions ) ! Partition table is sorted-array => ? H{ } clone :> out - transition-table transitions>> keys - [ transition-table transitions>> has-conditions? ] partition :> states - [ dup 2array out conjoin ] each + transition-table transitions>> keys :> states states [| s1 | states [| s2 | s1 s2 transition-table initially-same? @@ -35,7 +33,7 @@ IN: regexp.minimize ] each out ; : same-partition? ( s1 s2 partitions -- ? ) - [ 2array natural-sort ] dip key? ; + { [ [ 2array natural-sort ] dip key? ] [ drop = ] } 3|| ; : assemble-values ( assoc1 assoc2 -- values ) dup keys '[ _ swap [ at ] curry map ] bi@ zip ; @@ -64,13 +62,19 @@ IN: regexp.minimize : while-changes ( obj quot pred -- obj' ) 3dup nip call (while-changes) ; inline -: state-classes ( transition-table -- synonyms ) +: (state-classes) ( transition-table -- partition ) [ initialize-partitions ] keep - '[ _ partition-more ] [ assoc-size ] while-changes - partition>classes ; + '[ _ partition-more ] [ assoc-size ] while-changes ; + +: assoc>set ( assoc -- keys-set ) + [ drop dup ] assoc-map ; + +: state-classes ( transition-table -- synonyms ) + clone [ [ nip has-conditions? ] assoc-partition ] change-transitions + [ assoc>set ] [ (state-classes) partition>classes ] bi* assoc-union ; : canonical-state? ( state transitions state-classes -- ? ) - '[ dup _ at = ] swap '[ _ has-conditions? ] bi or ; + '[ dup _ at = ] swap '[ _ at has-conditions? ] bi or ; : delete-duplicates ( transitions state-classes -- new-transitions ) dupd '[ drop _ _ canonical-state? ] assoc-filter ; @@ -91,5 +95,5 @@ IN: regexp.minimize : minimize ( table -- minimal-table ) clone number-states - ! combine-states + combine-states combine-transitions ; diff --git a/basis/regexp/negation/negation.factor b/basis/regexp/negation/negation.factor index fd2a4510c6..0633dca192 100644 --- a/basis/regexp/negation/negation.factor +++ b/basis/regexp/negation/negation.factor @@ -23,9 +23,6 @@ CONSTANT: fail-state -1 [ add-default-transition ] assoc-map fail-state-recurses ; -: assoc>set ( assoc -- keys-set ) - [ drop dup ] assoc-map ; - : inverse-final-states ( transition-table -- final-states ) [ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ;