regexp.minimize: a little bit simpler, a little bit faster.

char-rename
John Benediktsson 2016-07-18 10:05:38 -07:00
parent a3bc9cf192
commit 0e494d31f4
2 changed files with 23 additions and 41 deletions

View File

@ -5,14 +5,12 @@ accessors regexp.transition-tables regexp.parser
regexp.classes regexp.negation ;
IN: regexp.minimize.tests
{ t } [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
{ t } [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
{ f } [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
{ t } [ 1 2 HS{ { 1 2 } } same-partition? ] unit-test
{ t } [ 2 1 HS{ { 1 2 } } same-partition? ] unit-test
{ f } [ 2 3 HS{ { 1 2 } } same-partition? ] unit-test
{ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } }
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
{ { { 1 2 } { 3 4 } } } [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
[ HS{ { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } partition>classes ] unit-test
: regexp-states ( string -- n )
parse-regexp ast>dfa transitions>> assoc-size ;
@ -52,7 +50,5 @@ IN: regexp.minimize.tests
} combine-states
] unit-test
[ [ ] [ ] while-changes ] must-infer
{ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } }
[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators.short-circuit fry
hashtables kernel locals math regexp.classes
hash-sets kernel locals math regexp.classes
regexp.transition-tables sequences sets sorting ;
IN: regexp.minimize
@ -17,54 +17,40 @@ IN: regexp.minimize
: initially-same? ( s1 s2 transition-table -- ? )
{
[ drop <= ]
[ transitions>> '[ _ at keys ] bi@ set= ]
[ final-states>> '[ _ in? ] bi@ = ]
[ transitions>> '[ _ at keys ] bi@ set= ]
} 3&& ;
:: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ?
H{ } clone :> out
transition-table transitions>> keys :> states
states [| s1 |
transition-table transitions>> keys natural-sort :> states
states length 2/ sq <hash-set> :> out
states [| s1 i1 |
states [| s2 |
s1 s2 transition-table initially-same?
[ s1 s2 2array out conjoin ] when
] each
] each out ;
[ s1 s2 2array out adjoin ] when
] i1 each-from
] each-index out ;
: same-partition? ( s1 s2 partitions -- ? )
{ [ [ sort-pair 2array ] dip key? ] [ drop = ] } 3|| ;
: assemble-values ( assoc1 assoc2 -- values )
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
{ [ [ sort-pair 2array ] dip in? ] [ drop = ] } 3|| ;
: stay-same? ( s1 s2 transition partitions -- ? )
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
'[ _ same-partition? ] assoc-all? ;
[ '[ _ transitions>> at ] bi@ ] dip
'[ [ at ] dip _ same-partition? ] with assoc-all? ;
: partition-more ( partitions transition-table -- partitions )
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
:: partition-more ( partitions transition-table -- partitions changed? )
partitions cardinality :> size
partitions members [
dup first2 transition-table partitions stay-same?
[ drop ] [ partitions delete ] if
] each partitions dup cardinality size = not ;
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
sort-keys
[ drop first2 swap ] assoc-map
<reversed>
>hashtable ;
:: (while-changes) ( ..a obj quot: ( ..a obj -- ..b obj' ) comp: ( ..b obj' -- ..a key ) old-key -- ..a obj )
obj quot call :> new-obj
new-obj comp call :> new-key
new-key old-key =
[ new-obj ]
[ new-obj quot comp new-key (while-changes) ]
if ; inline recursive
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline
members natural-sort <reversed> [ swap ] H{ } assoc-map-as ;
: (state-classes) ( transition-table -- partition )
[ initialize-partitions ] keep
'[ _ partition-more ] [ assoc-size ] while-changes ;
[ initialize-partitions ] keep '[ _ partition-more ] loop ;
: assoc>set ( assoc -- keys-set )
[ drop dup ] assoc-map ;