DFAs are minimized now

db4
Daniel Ehrenberg 2009-02-19 00:11:45 -06:00
parent 77b069ee5c
commit fa84f4c752
6 changed files with 149 additions and 9 deletions

View File

@ -0,0 +1,5 @@
USING: regexp.dfa tools.test ;
IN: regexp.dfa.tests
[ [ ] [ ] while-changes ] must-infer

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Doug Coleman.
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp.nfa regexp.transition-tables sequences
@ -6,9 +6,13 @@ sets sorting vectors sequences.deep ;
USING: io prettyprint threads ;
IN: regexp.dfa
: (while-changes) ( obj quot: ( obj -- obj' ) pred: ( obj -- <=> ) pred-ret -- obj )
[ [ dup slip ] dip pick over call ] dip dupd =
[ 3drop ] [ (while-changes) ] if ; inline recursive
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
obj quot call :> new-obj
new-obj comp call :> new-key
new-key old-key =
[ new-obj ]
[ new-obj quot comp new-key (while-changes) ]
if ; inline recursive
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline

View File

@ -0,0 +1,48 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test regexp.minimize assocs regexp accessors regexp.transition-tables ;
IN: regexp.minimize.tests
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
[ t ] [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
[ f ] [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
[ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } ]
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
[
T{ transition-table
{ transitions H{
{ 0 H{ { CHAR: a 1 } { CHAR: b 1 } } }
{ 1 H{ { CHAR: a 2 } { CHAR: b 2 } } }
{ 2 H{ { CHAR: c 3 } } }
{ 3 H{ } }
} }
{ start-state 0 }
{ final-states H{ { 3 3 } } }
}
] [
T{ transition-table
{ transitions H{
{ 0 H{ { CHAR: a 1 } { CHAR: b 4 } } }
{ 1 H{ { CHAR: a 2 } { CHAR: b 5 } } }
{ 2 H{ { CHAR: c 3 } } }
{ 3 H{ } }
{ 4 H{ { CHAR: a 2 } { CHAR: b 5 } } }
{ 5 H{ { CHAR: c 6 } } }
{ 6 H{ } }
} }
{ start-state 0 }
{ final-states H{ { 3 3 } { 6 6 } } }
} combine-states
] unit-test

View File

@ -0,0 +1,84 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences regexp.transition-tables fry assocs
accessors locals math sorting arrays sets hashtables regexp.dfa ;
IN: regexp.minimize
:: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ?
H{ } clone :> out
transition-table transitions>> keys :> states
states [| s1 |
states [| s2 |
s1 s2 <= [
s1 s2 [ transition-table transitions>> at keys ] bi@ set=
s1 s2 [ transition-table final-states>> key? ] bi@ = and
[ t s1 s2 2array out set-at ] when
] when
] each
] each out ;
: same-partition? ( s1 s2 partitions -- ? )
[ 2array natural-sort ] dip key? ;
: assemble-values ( assoc1 assoc2 -- values )
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
: stay-same? ( s1 s2 transition partitions -- ? )
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
'[ _ same-partition? ] assoc-all? ;
: partition-more ( partitions transition-table -- partitions )
! This is horribly slow!
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
>alist sort-keys
[ drop first2 swap ] assoc-map
<reversed>
>hashtable ;
: state-classes ( transition-table -- synonyms )
[ initialize-partitions ] keep
'[ _ partition-more ] [ ] while-changes
partition>classes ;
: canonical-state? ( state state-classes -- ? )
dupd at = ;
: delete-duplicates ( transitions state-classes -- new-transitions )
'[ drop _ canonical-state? ] assoc-filter ;
: rewrite-duplicates ( new-transitions state-classes -- new-transitions )
'[ [ _ at ] assoc-map ] assoc-map ;
: map-set ( assoc quot -- new-assoc )
'[ drop @ dup ] assoc-map ; inline
: combine-states ( table -- smaller-table )
dup state-classes
[
'[
_ [ delete-duplicates ]
[ rewrite-duplicates ] bi
] change-transitions
]
[ '[ [ _ at ] map-set ] change-final-states ]
[ '[ _ at ] change-start-state ]
tri ;
: number-transitions ( transitions numbering -- new-transitions )
[
[ at ]
[ '[ first _ at ] assoc-map ]
bi-curry bi*
] curry assoc-map ;
: number-states ( table -- newtable )
dup transitions>> keys <enum> [ swap ] H{ } assoc-map-as
[ '[ _ at ] change-start-state ]
[ '[ [ _ at ] map-set ] change-final-states ]
[ '[ _ number-transitions ] change-transitions ] tri ;
: minimize ( table -- minimal-table )
clone number-states combine-states ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators kernel math sequences strings sets
assocs prettyprint.backend prettyprint.custom make lexer
namespaces parser arrays fry locals
namespaces parser arrays fry locals regexp.minimize
regexp.parser regexp.nfa regexp.dfa regexp.traversal
regexp.transition-tables splitting sorting regexp.ast ;
IN: regexp
@ -11,7 +11,7 @@ TUPLE: regexp raw parse-tree options dfa ;
: <optioned-regexp> ( string options -- regexp )
[ dup parse-regexp ] [ string>options ] bi*
2dup <with-options> construct-nfa construct-dfa
2dup <with-options> construct-nfa construct-dfa minimize
regexp boa ;
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;

View File

@ -33,7 +33,7 @@ TUPLE: dfa-traverser
: text-finished? ( dfa-traverser -- ? )
{
[ current-state>> empty? ]
[ current-state>> not ]
[ end-of-text? ]
[ match-failed?>> ]
} 1|| ;
@ -59,8 +59,7 @@ TUPLE: dfa-traverser
1 text-character ;
: increment-state ( dfa-traverser state -- dfa-traverser )
[ [ 1 + ] change-current-index ]
[ first ] bi* >>current-state ;
[ [ 1 + ] change-current-index ] dip >>current-state ;
: match-literal ( transition from-state table -- to-state/f )
transitions>> at at ;