Fixing regexp minimization
parent
8418f8f39a
commit
5cd1c8db52
|
@ -14,8 +14,6 @@ IN: regexp.minimize.tests
|
||||||
|
|
||||||
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
||||||
|
|
||||||
USE: multiline
|
|
||||||
/*
|
|
||||||
: regexp-states ( string -- n )
|
: regexp-states ( string -- n )
|
||||||
parse-regexp ast>dfa transitions>> assoc-size ;
|
parse-regexp ast>dfa transitions>> assoc-size ;
|
||||||
|
|
||||||
|
@ -26,7 +24,6 @@ USE: multiline
|
||||||
[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test
|
[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test
|
||||||
[ 4 ] [ "ab|cd" regexp-states ] unit-test
|
[ 4 ] [ "ab|cd" regexp-states ] unit-test
|
||||||
[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test
|
[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test
|
||||||
*/
|
|
||||||
|
|
||||||
[
|
[
|
||||||
T{ transition-table
|
T{ transition-table
|
||||||
|
|
|
@ -11,8 +11,8 @@ IN: regexp.minimize
|
||||||
: number-states ( table -- newtable )
|
: number-states ( table -- newtable )
|
||||||
dup table>state-numbers transitions-at ;
|
dup table>state-numbers transitions-at ;
|
||||||
|
|
||||||
: has-conditions? ( state transitions -- ? )
|
: has-conditions? ( assoc -- ? )
|
||||||
at values [ condition? ] any? ;
|
values [ condition? ] any? ;
|
||||||
|
|
||||||
: initially-same? ( s1 s2 transition-table -- ? )
|
: initially-same? ( s1 s2 transition-table -- ? )
|
||||||
{
|
{
|
||||||
|
@ -24,9 +24,7 @@ IN: regexp.minimize
|
||||||
:: initialize-partitions ( transition-table -- partitions )
|
:: initialize-partitions ( transition-table -- partitions )
|
||||||
! Partition table is sorted-array => ?
|
! Partition table is sorted-array => ?
|
||||||
H{ } clone :> out
|
H{ } clone :> out
|
||||||
transition-table transitions>> keys
|
transition-table transitions>> keys :> states
|
||||||
[ transition-table transitions>> has-conditions? ] partition :> states
|
|
||||||
[ dup 2array out conjoin ] each
|
|
||||||
states [| s1 |
|
states [| s1 |
|
||||||
states [| s2 |
|
states [| s2 |
|
||||||
s1 s2 transition-table initially-same?
|
s1 s2 transition-table initially-same?
|
||||||
|
@ -35,7 +33,7 @@ IN: regexp.minimize
|
||||||
] each out ;
|
] each out ;
|
||||||
|
|
||||||
: same-partition? ( s1 s2 partitions -- ? )
|
: same-partition? ( s1 s2 partitions -- ? )
|
||||||
[ 2array natural-sort ] dip key? ;
|
{ [ [ 2array natural-sort ] dip key? ] [ drop = ] } 3|| ;
|
||||||
|
|
||||||
: assemble-values ( assoc1 assoc2 -- values )
|
: assemble-values ( assoc1 assoc2 -- values )
|
||||||
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
|
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
|
||||||
|
@ -64,13 +62,19 @@ IN: regexp.minimize
|
||||||
: while-changes ( obj quot pred -- obj' )
|
: while-changes ( obj quot pred -- obj' )
|
||||||
3dup nip call (while-changes) ; inline
|
3dup nip call (while-changes) ; inline
|
||||||
|
|
||||||
: state-classes ( transition-table -- synonyms )
|
: (state-classes) ( transition-table -- partition )
|
||||||
[ initialize-partitions ] keep
|
[ initialize-partitions ] keep
|
||||||
'[ _ partition-more ] [ assoc-size ] while-changes
|
'[ _ partition-more ] [ assoc-size ] while-changes ;
|
||||||
partition>classes ;
|
|
||||||
|
: assoc>set ( assoc -- keys-set )
|
||||||
|
[ drop dup ] assoc-map ;
|
||||||
|
|
||||||
|
: state-classes ( transition-table -- synonyms )
|
||||||
|
clone [ [ nip has-conditions? ] assoc-partition ] change-transitions
|
||||||
|
[ assoc>set ] [ (state-classes) partition>classes ] bi* assoc-union ;
|
||||||
|
|
||||||
: canonical-state? ( state transitions state-classes -- ? )
|
: canonical-state? ( state transitions state-classes -- ? )
|
||||||
'[ dup _ at = ] swap '[ _ has-conditions? ] bi or ;
|
'[ dup _ at = ] swap '[ _ at has-conditions? ] bi or ;
|
||||||
|
|
||||||
: delete-duplicates ( transitions state-classes -- new-transitions )
|
: delete-duplicates ( transitions state-classes -- new-transitions )
|
||||||
dupd '[ drop _ _ canonical-state? ] assoc-filter ;
|
dupd '[ drop _ _ canonical-state? ] assoc-filter ;
|
||||||
|
@ -91,5 +95,5 @@ IN: regexp.minimize
|
||||||
: minimize ( table -- minimal-table )
|
: minimize ( table -- minimal-table )
|
||||||
clone
|
clone
|
||||||
number-states
|
number-states
|
||||||
! combine-states
|
combine-states
|
||||||
combine-transitions ;
|
combine-transitions ;
|
||||||
|
|
|
@ -23,9 +23,6 @@ CONSTANT: fail-state -1
|
||||||
[ add-default-transition ] assoc-map
|
[ add-default-transition ] assoc-map
|
||||||
fail-state-recurses ;
|
fail-state-recurses ;
|
||||||
|
|
||||||
: assoc>set ( assoc -- keys-set )
|
|
||||||
[ drop dup ] assoc-map ;
|
|
||||||
|
|
||||||
: inverse-final-states ( transition-table -- final-states )
|
: inverse-final-states ( transition-table -- final-states )
|
||||||
[ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ;
|
[ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue