regexp.minimize: a little bit simpler, a little bit faster.
parent
a3bc9cf192
commit
0e494d31f4
|
@ -5,14 +5,12 @@ accessors regexp.transition-tables regexp.parser
|
||||||
regexp.classes regexp.negation ;
|
regexp.classes regexp.negation ;
|
||||||
IN: regexp.minimize.tests
|
IN: regexp.minimize.tests
|
||||||
|
|
||||||
{ t } [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
{ t } [ 1 2 HS{ { 1 2 } } same-partition? ] unit-test
|
||||||
{ t } [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
|
{ t } [ 2 1 HS{ { 1 2 } } same-partition? ] unit-test
|
||||||
{ f } [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
|
{ f } [ 2 3 HS{ { 1 2 } } same-partition? ] unit-test
|
||||||
|
|
||||||
{ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } }
|
{ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } }
|
||||||
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
|
[ HS{ { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } partition>classes ] unit-test
|
||||||
|
|
||||||
{ { { 1 2 } { 3 4 } } } [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
|
||||||
|
|
||||||
: regexp-states ( string -- n )
|
: regexp-states ( string -- n )
|
||||||
parse-regexp ast>dfa transitions>> assoc-size ;
|
parse-regexp ast>dfa transitions>> assoc-size ;
|
||||||
|
@ -52,7 +50,5 @@ IN: regexp.minimize.tests
|
||||||
} combine-states
|
} combine-states
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[ [ ] [ ] while-changes ] must-infer
|
|
||||||
|
|
||||||
{ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } }
|
{ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } }
|
||||||
[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
|
[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators.short-circuit fry
|
USING: accessors arrays assocs combinators.short-circuit fry
|
||||||
hashtables kernel locals math regexp.classes
|
hash-sets kernel locals math regexp.classes
|
||||||
regexp.transition-tables sequences sets sorting ;
|
regexp.transition-tables sequences sets sorting ;
|
||||||
IN: regexp.minimize
|
IN: regexp.minimize
|
||||||
|
|
||||||
|
@ -17,54 +17,40 @@ IN: regexp.minimize
|
||||||
: initially-same? ( s1 s2 transition-table -- ? )
|
: initially-same? ( s1 s2 transition-table -- ? )
|
||||||
{
|
{
|
||||||
[ drop <= ]
|
[ drop <= ]
|
||||||
[ transitions>> '[ _ at keys ] bi@ set= ]
|
|
||||||
[ final-states>> '[ _ in? ] bi@ = ]
|
[ final-states>> '[ _ in? ] bi@ = ]
|
||||||
|
[ transitions>> '[ _ at keys ] bi@ set= ]
|
||||||
} 3&& ;
|
} 3&& ;
|
||||||
|
|
||||||
:: initialize-partitions ( transition-table -- partitions )
|
:: initialize-partitions ( transition-table -- partitions )
|
||||||
! Partition table is sorted-array => ?
|
! Partition table is sorted-array => ?
|
||||||
H{ } clone :> out
|
transition-table transitions>> keys natural-sort :> states
|
||||||
transition-table transitions>> keys :> states
|
states length 2/ sq <hash-set> :> out
|
||||||
states [| s1 |
|
states [| s1 i1 |
|
||||||
states [| s2 |
|
states [| s2 |
|
||||||
s1 s2 transition-table initially-same?
|
s1 s2 transition-table initially-same?
|
||||||
[ s1 s2 2array out conjoin ] when
|
[ s1 s2 2array out adjoin ] when
|
||||||
] each
|
] i1 each-from
|
||||||
] each out ;
|
] each-index out ;
|
||||||
|
|
||||||
: same-partition? ( s1 s2 partitions -- ? )
|
: same-partition? ( s1 s2 partitions -- ? )
|
||||||
{ [ [ sort-pair 2array ] dip key? ] [ drop = ] } 3|| ;
|
{ [ [ sort-pair 2array ] dip in? ] [ drop = ] } 3|| ;
|
||||||
|
|
||||||
: assemble-values ( assoc1 assoc2 -- values )
|
|
||||||
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
|
|
||||||
|
|
||||||
: stay-same? ( s1 s2 transition partitions -- ? )
|
: stay-same? ( s1 s2 transition partitions -- ? )
|
||||||
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
|
[ '[ _ transitions>> at ] bi@ ] dip
|
||||||
'[ _ same-partition? ] assoc-all? ;
|
'[ [ at ] dip _ same-partition? ] with assoc-all? ;
|
||||||
|
|
||||||
: partition-more ( partitions transition-table -- partitions )
|
:: partition-more ( partitions transition-table -- partitions changed? )
|
||||||
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
|
partitions cardinality :> size
|
||||||
|
partitions members [
|
||||||
|
dup first2 transition-table partitions stay-same?
|
||||||
|
[ drop ] [ partitions delete ] if
|
||||||
|
] each partitions dup cardinality size = not ;
|
||||||
|
|
||||||
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
|
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
|
||||||
sort-keys
|
members natural-sort <reversed> [ swap ] H{ } assoc-map-as ;
|
||||||
[ drop first2 swap ] assoc-map
|
|
||||||
<reversed>
|
|
||||||
>hashtable ;
|
|
||||||
|
|
||||||
:: (while-changes) ( ..a obj quot: ( ..a obj -- ..b obj' ) comp: ( ..b obj' -- ..a key ) old-key -- ..a obj )
|
|
||||||
obj quot call :> new-obj
|
|
||||||
new-obj comp call :> new-key
|
|
||||||
new-key old-key =
|
|
||||||
[ new-obj ]
|
|
||||||
[ new-obj quot comp new-key (while-changes) ]
|
|
||||||
if ; inline recursive
|
|
||||||
|
|
||||||
: while-changes ( obj quot pred -- obj' )
|
|
||||||
3dup nip call (while-changes) ; inline
|
|
||||||
|
|
||||||
: (state-classes) ( transition-table -- partition )
|
: (state-classes) ( transition-table -- partition )
|
||||||
[ initialize-partitions ] keep
|
[ initialize-partitions ] keep '[ _ partition-more ] loop ;
|
||||||
'[ _ partition-more ] [ assoc-size ] while-changes ;
|
|
||||||
|
|
||||||
: assoc>set ( assoc -- keys-set )
|
: assoc>set ( assoc -- keys-set )
|
||||||
[ drop dup ] assoc-map ;
|
[ drop dup ] assoc-map ;
|
||||||
|
|
Loading…
Reference in New Issue