DFAs are minimized now

db4
Daniel Ehrenberg 2009-02-19 00:11:45 -06:00
parent 77b069ee5c
commit fa84f4c752
6 changed files with 149 additions and 9 deletions

View File

@ -0,0 +1,5 @@
USING: regexp.dfa tools.test ;
IN: regexp.dfa.tests
[ [ ] [ ] while-changes ] must-infer

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp.nfa regexp.transition-tables sequences math math.order regexp.nfa regexp.transition-tables sequences
@ -6,9 +6,13 @@ sets sorting vectors sequences.deep ;
USING: io prettyprint threads ; USING: io prettyprint threads ;
IN: regexp.dfa IN: regexp.dfa
: (while-changes) ( obj quot: ( obj -- obj' ) pred: ( obj -- <=> ) pred-ret -- obj ) :: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
[ [ dup slip ] dip pick over call ] dip dupd = obj quot call :> new-obj
[ 3drop ] [ (while-changes) ] if ; inline recursive new-obj comp call :> new-key
new-key old-key =
[ new-obj ]
[ new-obj quot comp new-key (while-changes) ]
if ; inline recursive
: while-changes ( obj quot pred -- obj' ) : while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline 3dup nip call (while-changes) ; inline

View File

@ -0,0 +1,48 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test regexp.minimize assocs regexp accessors regexp.transition-tables ;
IN: regexp.minimize.tests
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
[ t ] [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
[ f ] [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
[ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } ]
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
[
T{ transition-table
{ transitions H{
{ 0 H{ { CHAR: a 1 } { CHAR: b 1 } } }
{ 1 H{ { CHAR: a 2 } { CHAR: b 2 } } }
{ 2 H{ { CHAR: c 3 } } }
{ 3 H{ } }
} }
{ start-state 0 }
{ final-states H{ { 3 3 } } }
}
] [
T{ transition-table
{ transitions H{
{ 0 H{ { CHAR: a 1 } { CHAR: b 4 } } }
{ 1 H{ { CHAR: a 2 } { CHAR: b 5 } } }
{ 2 H{ { CHAR: c 3 } } }
{ 3 H{ } }
{ 4 H{ { CHAR: a 2 } { CHAR: b 5 } } }
{ 5 H{ { CHAR: c 6 } } }
{ 6 H{ } }
} }
{ start-state 0 }
{ final-states H{ { 3 3 } { 6 6 } } }
} combine-states
] unit-test

View File

@ -0,0 +1,84 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences regexp.transition-tables fry assocs
accessors locals math sorting arrays sets hashtables regexp.dfa ;
IN: regexp.minimize
:: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ?
H{ } clone :> out
transition-table transitions>> keys :> states
states [| s1 |
states [| s2 |
s1 s2 <= [
s1 s2 [ transition-table transitions>> at keys ] bi@ set=
s1 s2 [ transition-table final-states>> key? ] bi@ = and
[ t s1 s2 2array out set-at ] when
] when
] each
] each out ;
: same-partition? ( s1 s2 partitions -- ? )
[ 2array natural-sort ] dip key? ;
: assemble-values ( assoc1 assoc2 -- values )
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
: stay-same? ( s1 s2 transition partitions -- ? )
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
'[ _ same-partition? ] assoc-all? ;
: partition-more ( partitions transition-table -- partitions )
! This is horribly slow!
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
>alist sort-keys
[ drop first2 swap ] assoc-map
<reversed>
>hashtable ;
: state-classes ( transition-table -- synonyms )
[ initialize-partitions ] keep
'[ _ partition-more ] [ ] while-changes
partition>classes ;
: canonical-state? ( state state-classes -- ? )
dupd at = ;
: delete-duplicates ( transitions state-classes -- new-transitions )
'[ drop _ canonical-state? ] assoc-filter ;
: rewrite-duplicates ( new-transitions state-classes -- new-transitions )
'[ [ _ at ] assoc-map ] assoc-map ;
: map-set ( assoc quot -- new-assoc )
'[ drop @ dup ] assoc-map ; inline
: combine-states ( table -- smaller-table )
dup state-classes
[
'[
_ [ delete-duplicates ]
[ rewrite-duplicates ] bi
] change-transitions
]
[ '[ [ _ at ] map-set ] change-final-states ]
[ '[ _ at ] change-start-state ]
tri ;
: number-transitions ( transitions numbering -- new-transitions )
[
[ at ]
[ '[ first _ at ] assoc-map ]
bi-curry bi*
] curry assoc-map ;
: number-states ( table -- newtable )
dup transitions>> keys <enum> [ swap ] H{ } assoc-map-as
[ '[ _ at ] change-start-state ]
[ '[ [ _ at ] map-set ] change-final-states ]
[ '[ _ number-transitions ] change-transitions ] tri ;
: minimize ( table -- minimal-table )
clone number-states combine-states ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators kernel math sequences strings sets USING: accessors combinators kernel math sequences strings sets
assocs prettyprint.backend prettyprint.custom make lexer assocs prettyprint.backend prettyprint.custom make lexer
namespaces parser arrays fry locals namespaces parser arrays fry locals regexp.minimize
regexp.parser regexp.nfa regexp.dfa regexp.traversal regexp.parser regexp.nfa regexp.dfa regexp.traversal
regexp.transition-tables splitting sorting regexp.ast ; regexp.transition-tables splitting sorting regexp.ast ;
IN: regexp IN: regexp
@ -11,7 +11,7 @@ TUPLE: regexp raw parse-tree options dfa ;
: <optioned-regexp> ( string options -- regexp ) : <optioned-regexp> ( string options -- regexp )
[ dup parse-regexp ] [ string>options ] bi* [ dup parse-regexp ] [ string>options ] bi*
2dup <with-options> construct-nfa construct-dfa 2dup <with-options> construct-nfa construct-dfa minimize
regexp boa ; regexp boa ;
: <regexp> ( string -- regexp ) "" <optioned-regexp> ; : <regexp> ( string -- regexp ) "" <optioned-regexp> ;

View File

@ -33,7 +33,7 @@ TUPLE: dfa-traverser
: text-finished? ( dfa-traverser -- ? ) : text-finished? ( dfa-traverser -- ? )
{ {
[ current-state>> empty? ] [ current-state>> not ]
[ end-of-text? ] [ end-of-text? ]
[ match-failed?>> ] [ match-failed?>> ]
} 1|| ; } 1|| ;
@ -59,8 +59,7 @@ TUPLE: dfa-traverser
1 text-character ; 1 text-character ;
: increment-state ( dfa-traverser state -- dfa-traverser ) : increment-state ( dfa-traverser state -- dfa-traverser )
[ [ 1 + ] change-current-index ] [ [ 1 + ] change-current-index ] dip >>current-state ;
[ first ] bi* >>current-state ;
: match-literal ( transition from-state table -- to-state/f ) : match-literal ( transition from-state table -- to-state/f )
transitions>> at at ; transitions>> at at ;