DFAs are minimized now
parent
77b069ee5c
commit
fa84f4c752
|
@ -0,0 +1,5 @@
|
|||
USING: regexp.dfa tools.test ;
|
||||
IN: regexp.dfa.tests
|
||||
|
||||
[ [ ] [ ] while-changes ] must-infer
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators fry kernel locals
|
||||
math math.order regexp.nfa regexp.transition-tables sequences
|
||||
|
@ -6,9 +6,13 @@ sets sorting vectors sequences.deep ;
|
|||
USING: io prettyprint threads ;
|
||||
IN: regexp.dfa
|
||||
|
||||
: (while-changes) ( obj quot: ( obj -- obj' ) pred: ( obj -- <=> ) pred-ret -- obj )
|
||||
[ [ dup slip ] dip pick over call ] dip dupd =
|
||||
[ 3drop ] [ (while-changes) ] if ; inline recursive
|
||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||
obj quot call :> new-obj
|
||||
new-obj comp call :> new-key
|
||||
new-key old-key =
|
||||
[ new-obj ]
|
||||
[ new-obj quot comp new-key (while-changes) ]
|
||||
if ; inline recursive
|
||||
|
||||
: while-changes ( obj quot pred -- obj' )
|
||||
3dup nip call (while-changes) ; inline
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test regexp.minimize assocs regexp accessors regexp.transition-tables ;
|
||||
IN: regexp.minimize.tests
|
||||
|
||||
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||
[ t ] [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||
[ f ] [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||
|
||||
[ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } ]
|
||||
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
|
||||
|
||||
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
||||
|
||||
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
|
||||
|
||||
[
|
||||
T{ transition-table
|
||||
{ transitions H{
|
||||
{ 0 H{ { CHAR: a 1 } { CHAR: b 1 } } }
|
||||
{ 1 H{ { CHAR: a 2 } { CHAR: b 2 } } }
|
||||
{ 2 H{ { CHAR: c 3 } } }
|
||||
{ 3 H{ } }
|
||||
} }
|
||||
{ start-state 0 }
|
||||
{ final-states H{ { 3 3 } } }
|
||||
}
|
||||
] [
|
||||
T{ transition-table
|
||||
{ transitions H{
|
||||
{ 0 H{ { CHAR: a 1 } { CHAR: b 4 } } }
|
||||
{ 1 H{ { CHAR: a 2 } { CHAR: b 5 } } }
|
||||
{ 2 H{ { CHAR: c 3 } } }
|
||||
{ 3 H{ } }
|
||||
{ 4 H{ { CHAR: a 2 } { CHAR: b 5 } } }
|
||||
{ 5 H{ { CHAR: c 6 } } }
|
||||
{ 6 H{ } }
|
||||
} }
|
||||
{ start-state 0 }
|
||||
{ final-states H{ { 3 3 } { 6 6 } } }
|
||||
} combine-states
|
||||
] unit-test
|
|
@ -0,0 +1,84 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences regexp.transition-tables fry assocs
|
||||
accessors locals math sorting arrays sets hashtables regexp.dfa ;
|
||||
IN: regexp.minimize
|
||||
|
||||
:: initialize-partitions ( transition-table -- partitions )
|
||||
! Partition table is sorted-array => ?
|
||||
H{ } clone :> out
|
||||
transition-table transitions>> keys :> states
|
||||
states [| s1 |
|
||||
states [| s2 |
|
||||
s1 s2 <= [
|
||||
s1 s2 [ transition-table transitions>> at keys ] bi@ set=
|
||||
s1 s2 [ transition-table final-states>> key? ] bi@ = and
|
||||
[ t s1 s2 2array out set-at ] when
|
||||
] when
|
||||
] each
|
||||
] each out ;
|
||||
|
||||
: same-partition? ( s1 s2 partitions -- ? )
|
||||
[ 2array natural-sort ] dip key? ;
|
||||
|
||||
: assemble-values ( assoc1 assoc2 -- values )
|
||||
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
|
||||
|
||||
: stay-same? ( s1 s2 transition partitions -- ? )
|
||||
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
|
||||
'[ _ same-partition? ] assoc-all? ;
|
||||
|
||||
: partition-more ( partitions transition-table -- partitions )
|
||||
! This is horribly slow!
|
||||
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
|
||||
|
||||
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
|
||||
>alist sort-keys
|
||||
[ drop first2 swap ] assoc-map
|
||||
<reversed>
|
||||
>hashtable ;
|
||||
|
||||
: state-classes ( transition-table -- synonyms )
|
||||
[ initialize-partitions ] keep
|
||||
'[ _ partition-more ] [ ] while-changes
|
||||
partition>classes ;
|
||||
|
||||
: canonical-state? ( state state-classes -- ? )
|
||||
dupd at = ;
|
||||
|
||||
: delete-duplicates ( transitions state-classes -- new-transitions )
|
||||
'[ drop _ canonical-state? ] assoc-filter ;
|
||||
|
||||
: rewrite-duplicates ( new-transitions state-classes -- new-transitions )
|
||||
'[ [ _ at ] assoc-map ] assoc-map ;
|
||||
|
||||
: map-set ( assoc quot -- new-assoc )
|
||||
'[ drop @ dup ] assoc-map ; inline
|
||||
|
||||
: combine-states ( table -- smaller-table )
|
||||
dup state-classes
|
||||
[
|
||||
'[
|
||||
_ [ delete-duplicates ]
|
||||
[ rewrite-duplicates ] bi
|
||||
] change-transitions
|
||||
]
|
||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||
[ '[ _ at ] change-start-state ]
|
||||
tri ;
|
||||
|
||||
: number-transitions ( transitions numbering -- new-transitions )
|
||||
[
|
||||
[ at ]
|
||||
[ '[ first _ at ] assoc-map ]
|
||||
bi-curry bi*
|
||||
] curry assoc-map ;
|
||||
|
||||
: number-states ( table -- newtable )
|
||||
dup transitions>> keys <enum> [ swap ] H{ } assoc-map-as
|
||||
[ '[ _ at ] change-start-state ]
|
||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||
[ '[ _ number-transitions ] change-transitions ] tri ;
|
||||
|
||||
: minimize ( table -- minimal-table )
|
||||
clone number-states combine-states ;
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators kernel math sequences strings sets
|
||||
assocs prettyprint.backend prettyprint.custom make lexer
|
||||
namespaces parser arrays fry locals
|
||||
namespaces parser arrays fry locals regexp.minimize
|
||||
regexp.parser regexp.nfa regexp.dfa regexp.traversal
|
||||
regexp.transition-tables splitting sorting regexp.ast ;
|
||||
IN: regexp
|
||||
|
@ -11,7 +11,7 @@ TUPLE: regexp raw parse-tree options dfa ;
|
|||
|
||||
: <optioned-regexp> ( string options -- regexp )
|
||||
[ dup parse-regexp ] [ string>options ] bi*
|
||||
2dup <with-options> construct-nfa construct-dfa
|
||||
2dup <with-options> construct-nfa construct-dfa minimize
|
||||
regexp boa ;
|
||||
|
||||
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
||||
|
|
|
@ -33,7 +33,7 @@ TUPLE: dfa-traverser
|
|||
|
||||
: text-finished? ( dfa-traverser -- ? )
|
||||
{
|
||||
[ current-state>> empty? ]
|
||||
[ current-state>> not ]
|
||||
[ end-of-text? ]
|
||||
[ match-failed?>> ]
|
||||
} 1|| ;
|
||||
|
@ -59,8 +59,7 @@ TUPLE: dfa-traverser
|
|||
1 text-character ;
|
||||
|
||||
: increment-state ( dfa-traverser state -- dfa-traverser )
|
||||
[ [ 1 + ] change-current-index ]
|
||||
[ first ] bi* >>current-state ;
|
||||
[ [ 1 + ] change-current-index ] dip >>current-state ;
|
||||
|
||||
: match-literal ( transition from-state table -- to-state/f )
|
||||
transitions>> at at ;
|
||||
|
|
Loading…
Reference in New Issue