Fixing regexp minimization

db4
Daniel Ehrenberg 2009-03-08 22:34:11 -05:00
parent 8418f8f39a
commit 5cd1c8db52
3 changed files with 15 additions and 17 deletions

View File

@ -14,8 +14,6 @@ IN: regexp.minimize.tests
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test [ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
USE: multiline
/*
: regexp-states ( string -- n ) : regexp-states ( string -- n )
parse-regexp ast>dfa transitions>> assoc-size ; parse-regexp ast>dfa transitions>> assoc-size ;
@ -26,7 +24,6 @@ USE: multiline
[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test [ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test
[ 4 ] [ "ab|cd" regexp-states ] unit-test [ 4 ] [ "ab|cd" regexp-states ] unit-test
[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test [ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test
*/
[ [
T{ transition-table T{ transition-table

View File

@ -11,8 +11,8 @@ IN: regexp.minimize
: number-states ( table -- newtable ) : number-states ( table -- newtable )
dup table>state-numbers transitions-at ; dup table>state-numbers transitions-at ;
: has-conditions? ( state transitions -- ? ) : has-conditions? ( assoc -- ? )
at values [ condition? ] any? ; values [ condition? ] any? ;
: initially-same? ( s1 s2 transition-table -- ? ) : initially-same? ( s1 s2 transition-table -- ? )
{ {
@ -24,9 +24,7 @@ IN: regexp.minimize
:: initialize-partitions ( transition-table -- partitions ) :: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ? ! Partition table is sorted-array => ?
H{ } clone :> out H{ } clone :> out
transition-table transitions>> keys transition-table transitions>> keys :> states
[ transition-table transitions>> has-conditions? ] partition :> states
[ dup 2array out conjoin ] each
states [| s1 | states [| s1 |
states [| s2 | states [| s2 |
s1 s2 transition-table initially-same? s1 s2 transition-table initially-same?
@ -35,7 +33,7 @@ IN: regexp.minimize
] each out ; ] each out ;
: same-partition? ( s1 s2 partitions -- ? ) : same-partition? ( s1 s2 partitions -- ? )
[ 2array natural-sort ] dip key? ; { [ [ 2array natural-sort ] dip key? ] [ drop = ] } 3|| ;
: assemble-values ( assoc1 assoc2 -- values ) : assemble-values ( assoc1 assoc2 -- values )
dup keys '[ _ swap [ at ] curry map ] bi@ zip ; dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
@ -64,13 +62,19 @@ IN: regexp.minimize
: while-changes ( obj quot pred -- obj' ) : while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline 3dup nip call (while-changes) ; inline
: state-classes ( transition-table -- synonyms ) : (state-classes) ( transition-table -- partition )
[ initialize-partitions ] keep [ initialize-partitions ] keep
'[ _ partition-more ] [ assoc-size ] while-changes '[ _ partition-more ] [ assoc-size ] while-changes ;
partition>classes ;
: assoc>set ( assoc -- keys-set )
[ drop dup ] assoc-map ;
: state-classes ( transition-table -- synonyms )
clone [ [ nip has-conditions? ] assoc-partition ] change-transitions
[ assoc>set ] [ (state-classes) partition>classes ] bi* assoc-union ;
: canonical-state? ( state transitions state-classes -- ? ) : canonical-state? ( state transitions state-classes -- ? )
'[ dup _ at = ] swap '[ _ has-conditions? ] bi or ; '[ dup _ at = ] swap '[ _ at has-conditions? ] bi or ;
: delete-duplicates ( transitions state-classes -- new-transitions ) : delete-duplicates ( transitions state-classes -- new-transitions )
dupd '[ drop _ _ canonical-state? ] assoc-filter ; dupd '[ drop _ _ canonical-state? ] assoc-filter ;
@ -91,5 +95,5 @@ IN: regexp.minimize
: minimize ( table -- minimal-table ) : minimize ( table -- minimal-table )
clone clone
number-states number-states
! combine-states combine-states
combine-transitions ; combine-transitions ;

View File

@ -23,9 +23,6 @@ CONSTANT: fail-state -1
[ add-default-transition ] assoc-map [ add-default-transition ] assoc-map
fail-state-recurses ; fail-state-recurses ;
: assoc>set ( assoc -- keys-set )
[ drop dup ] assoc-map ;
: inverse-final-states ( transition-table -- final-states ) : inverse-final-states ( transition-table -- final-states )
[ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ; [ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ;