DFAs are minimized now
parent
77b069ee5c
commit
fa84f4c752
|
@ -0,0 +1,5 @@
|
||||||
|
USING: regexp.dfa tools.test ;
|
||||||
|
IN: regexp.dfa.tests
|
||||||
|
|
||||||
|
[ [ ] [ ] while-changes ] must-infer
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators fry kernel locals
|
USING: accessors arrays assocs combinators fry kernel locals
|
||||||
math math.order regexp.nfa regexp.transition-tables sequences
|
math math.order regexp.nfa regexp.transition-tables sequences
|
||||||
|
@ -6,9 +6,13 @@ sets sorting vectors sequences.deep ;
|
||||||
USING: io prettyprint threads ;
|
USING: io prettyprint threads ;
|
||||||
IN: regexp.dfa
|
IN: regexp.dfa
|
||||||
|
|
||||||
: (while-changes) ( obj quot: ( obj -- obj' ) pred: ( obj -- <=> ) pred-ret -- obj )
|
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||||
[ [ dup slip ] dip pick over call ] dip dupd =
|
obj quot call :> new-obj
|
||||||
[ 3drop ] [ (while-changes) ] if ; inline recursive
|
new-obj comp call :> new-key
|
||||||
|
new-key old-key =
|
||||||
|
[ new-obj ]
|
||||||
|
[ new-obj quot comp new-key (while-changes) ]
|
||||||
|
if ; inline recursive
|
||||||
|
|
||||||
: while-changes ( obj quot pred -- obj' )
|
: while-changes ( obj quot pred -- obj' )
|
||||||
3dup nip call (while-changes) ; inline
|
3dup nip call (while-changes) ; inline
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: tools.test regexp.minimize assocs regexp accessors regexp.transition-tables ;
|
||||||
|
IN: regexp.minimize.tests
|
||||||
|
|
||||||
|
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||||
|
[ t ] [ 2 1 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||||
|
[ f ] [ 2 3 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||||
|
|
||||||
|
[ H{ { 1 1 } { 2 1 } { 3 3 } { 4 3 } } ]
|
||||||
|
[ { { 1 1 } { 1 2 } { 2 2 } { 3 3 } { 3 4 } { 4 4 } } [ t ] H{ } map>assoc partition>classes ] unit-test
|
||||||
|
|
||||||
|
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
||||||
|
|
||||||
|
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
T{ transition-table
|
||||||
|
{ transitions H{
|
||||||
|
{ 0 H{ { CHAR: a 1 } { CHAR: b 1 } } }
|
||||||
|
{ 1 H{ { CHAR: a 2 } { CHAR: b 2 } } }
|
||||||
|
{ 2 H{ { CHAR: c 3 } } }
|
||||||
|
{ 3 H{ } }
|
||||||
|
} }
|
||||||
|
{ start-state 0 }
|
||||||
|
{ final-states H{ { 3 3 } } }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
T{ transition-table
|
||||||
|
{ transitions H{
|
||||||
|
{ 0 H{ { CHAR: a 1 } { CHAR: b 4 } } }
|
||||||
|
{ 1 H{ { CHAR: a 2 } { CHAR: b 5 } } }
|
||||||
|
{ 2 H{ { CHAR: c 3 } } }
|
||||||
|
{ 3 H{ } }
|
||||||
|
{ 4 H{ { CHAR: a 2 } { CHAR: b 5 } } }
|
||||||
|
{ 5 H{ { CHAR: c 6 } } }
|
||||||
|
{ 6 H{ } }
|
||||||
|
} }
|
||||||
|
{ start-state 0 }
|
||||||
|
{ final-states H{ { 3 3 } { 6 6 } } }
|
||||||
|
} combine-states
|
||||||
|
] unit-test
|
|
@ -0,0 +1,84 @@
|
||||||
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: kernel sequences regexp.transition-tables fry assocs
|
||||||
|
accessors locals math sorting arrays sets hashtables regexp.dfa ;
|
||||||
|
IN: regexp.minimize
|
||||||
|
|
||||||
|
:: initialize-partitions ( transition-table -- partitions )
|
||||||
|
! Partition table is sorted-array => ?
|
||||||
|
H{ } clone :> out
|
||||||
|
transition-table transitions>> keys :> states
|
||||||
|
states [| s1 |
|
||||||
|
states [| s2 |
|
||||||
|
s1 s2 <= [
|
||||||
|
s1 s2 [ transition-table transitions>> at keys ] bi@ set=
|
||||||
|
s1 s2 [ transition-table final-states>> key? ] bi@ = and
|
||||||
|
[ t s1 s2 2array out set-at ] when
|
||||||
|
] when
|
||||||
|
] each
|
||||||
|
] each out ;
|
||||||
|
|
||||||
|
: same-partition? ( s1 s2 partitions -- ? )
|
||||||
|
[ 2array natural-sort ] dip key? ;
|
||||||
|
|
||||||
|
: assemble-values ( assoc1 assoc2 -- values )
|
||||||
|
dup keys '[ _ swap [ at ] curry map ] bi@ zip ;
|
||||||
|
|
||||||
|
: stay-same? ( s1 s2 transition partitions -- ? )
|
||||||
|
[ '[ _ transitions>> at ] bi@ assemble-values ] dip
|
||||||
|
'[ _ same-partition? ] assoc-all? ;
|
||||||
|
|
||||||
|
: partition-more ( partitions transition-table -- partitions )
|
||||||
|
! This is horribly slow!
|
||||||
|
over '[ drop first2 _ _ stay-same? ] assoc-filter ;
|
||||||
|
|
||||||
|
: partition>classes ( partitions -- synonyms ) ! old-state => new-state
|
||||||
|
>alist sort-keys
|
||||||
|
[ drop first2 swap ] assoc-map
|
||||||
|
<reversed>
|
||||||
|
>hashtable ;
|
||||||
|
|
||||||
|
: state-classes ( transition-table -- synonyms )
|
||||||
|
[ initialize-partitions ] keep
|
||||||
|
'[ _ partition-more ] [ ] while-changes
|
||||||
|
partition>classes ;
|
||||||
|
|
||||||
|
: canonical-state? ( state state-classes -- ? )
|
||||||
|
dupd at = ;
|
||||||
|
|
||||||
|
: delete-duplicates ( transitions state-classes -- new-transitions )
|
||||||
|
'[ drop _ canonical-state? ] assoc-filter ;
|
||||||
|
|
||||||
|
: rewrite-duplicates ( new-transitions state-classes -- new-transitions )
|
||||||
|
'[ [ _ at ] assoc-map ] assoc-map ;
|
||||||
|
|
||||||
|
: map-set ( assoc quot -- new-assoc )
|
||||||
|
'[ drop @ dup ] assoc-map ; inline
|
||||||
|
|
||||||
|
: combine-states ( table -- smaller-table )
|
||||||
|
dup state-classes
|
||||||
|
[
|
||||||
|
'[
|
||||||
|
_ [ delete-duplicates ]
|
||||||
|
[ rewrite-duplicates ] bi
|
||||||
|
] change-transitions
|
||||||
|
]
|
||||||
|
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||||
|
[ '[ _ at ] change-start-state ]
|
||||||
|
tri ;
|
||||||
|
|
||||||
|
: number-transitions ( transitions numbering -- new-transitions )
|
||||||
|
[
|
||||||
|
[ at ]
|
||||||
|
[ '[ first _ at ] assoc-map ]
|
||||||
|
bi-curry bi*
|
||||||
|
] curry assoc-map ;
|
||||||
|
|
||||||
|
: number-states ( table -- newtable )
|
||||||
|
dup transitions>> keys <enum> [ swap ] H{ } assoc-map-as
|
||||||
|
[ '[ _ at ] change-start-state ]
|
||||||
|
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||||
|
[ '[ _ number-transitions ] change-transitions ] tri ;
|
||||||
|
|
||||||
|
: minimize ( table -- minimal-table )
|
||||||
|
clone number-states combine-states ;
|
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors combinators kernel math sequences strings sets
|
USING: accessors combinators kernel math sequences strings sets
|
||||||
assocs prettyprint.backend prettyprint.custom make lexer
|
assocs prettyprint.backend prettyprint.custom make lexer
|
||||||
namespaces parser arrays fry locals
|
namespaces parser arrays fry locals regexp.minimize
|
||||||
regexp.parser regexp.nfa regexp.dfa regexp.traversal
|
regexp.parser regexp.nfa regexp.dfa regexp.traversal
|
||||||
regexp.transition-tables splitting sorting regexp.ast ;
|
regexp.transition-tables splitting sorting regexp.ast ;
|
||||||
IN: regexp
|
IN: regexp
|
||||||
|
@ -11,7 +11,7 @@ TUPLE: regexp raw parse-tree options dfa ;
|
||||||
|
|
||||||
: <optioned-regexp> ( string options -- regexp )
|
: <optioned-regexp> ( string options -- regexp )
|
||||||
[ dup parse-regexp ] [ string>options ] bi*
|
[ dup parse-regexp ] [ string>options ] bi*
|
||||||
2dup <with-options> construct-nfa construct-dfa
|
2dup <with-options> construct-nfa construct-dfa minimize
|
||||||
regexp boa ;
|
regexp boa ;
|
||||||
|
|
||||||
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
||||||
|
|
|
@ -33,7 +33,7 @@ TUPLE: dfa-traverser
|
||||||
|
|
||||||
: text-finished? ( dfa-traverser -- ? )
|
: text-finished? ( dfa-traverser -- ? )
|
||||||
{
|
{
|
||||||
[ current-state>> empty? ]
|
[ current-state>> not ]
|
||||||
[ end-of-text? ]
|
[ end-of-text? ]
|
||||||
[ match-failed?>> ]
|
[ match-failed?>> ]
|
||||||
} 1|| ;
|
} 1|| ;
|
||||||
|
@ -59,8 +59,7 @@ TUPLE: dfa-traverser
|
||||||
1 text-character ;
|
1 text-character ;
|
||||||
|
|
||||||
: increment-state ( dfa-traverser state -- dfa-traverser )
|
: increment-state ( dfa-traverser state -- dfa-traverser )
|
||||||
[ [ 1 + ] change-current-index ]
|
[ [ 1 + ] change-current-index ] dip >>current-state ;
|
||||||
[ first ] bi* >>current-state ;
|
|
||||||
|
|
||||||
: match-literal ( transition from-state table -- to-state/f )
|
: match-literal ( transition from-state table -- to-state/f )
|
||||||
transitions>> at at ;
|
transitions>> at at ;
|
||||||
|
|
Loading…
Reference in New Issue