Regexps use new sets rather than assocs for final states
parent
7364608417
commit
3ad5ca4636
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg.
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.classes kernel sequences regexp.negation
|
USING: regexp.classes kernel sequences regexp.negation
|
||||||
quotations assocs fry math locals combinators
|
quotations assocs fry math locals combinators sets
|
||||||
accessors words compiler.units kernel.private strings
|
accessors words compiler.units kernel.private strings
|
||||||
sequences.private arrays namespaces unicode.breaks
|
sequences.private arrays namespaces unicode.breaks
|
||||||
regexp.transition-tables combinators.short-circuit ;
|
regexp.transition-tables combinators.short-circuit ;
|
||||||
|
@ -106,7 +106,7 @@ C: <box> box
|
||||||
|
|
||||||
: word>quot ( word dfa -- quot )
|
: word>quot ( word dfa -- quot )
|
||||||
[ transitions>> at ]
|
[ transitions>> at ]
|
||||||
[ final-states>> key? ] 2bi
|
[ final-states>> in? ] 2bi
|
||||||
transitions>quot ;
|
transitions>quot ;
|
||||||
|
|
||||||
: states>code ( words dfa -- )
|
: states>code ( words dfa -- )
|
||||||
|
|
|
@ -69,10 +69,10 @@ IN: regexp.dfa
|
||||||
|
|
||||||
: set-final-states ( nfa dfa -- )
|
: set-final-states ( nfa dfa -- )
|
||||||
[
|
[
|
||||||
[ final-states>> keys ]
|
[ final-states>> members ]
|
||||||
[ transitions>> keys ] bi*
|
[ transitions>> keys ] bi*
|
||||||
[ intersects? ] with filter
|
[ intersects? ] with filter
|
||||||
unique
|
fast-set
|
||||||
] keep (>>final-states) ;
|
] keep (>>final-states) ;
|
||||||
|
|
||||||
: initialize-dfa ( nfa -- dfa )
|
: initialize-dfa ( nfa -- dfa )
|
||||||
|
|
|
@ -34,7 +34,7 @@ IN: regexp.minimize.tests
|
||||||
{ 3 H{ } }
|
{ 3 H{ } }
|
||||||
} }
|
} }
|
||||||
{ start-state 0 }
|
{ start-state 0 }
|
||||||
{ final-states H{ { 3 3 } } }
|
{ final-states HS{ 3 } }
|
||||||
}
|
}
|
||||||
] [
|
] [
|
||||||
T{ transition-table
|
T{ transition-table
|
||||||
|
@ -48,7 +48,7 @@ IN: regexp.minimize.tests
|
||||||
{ 6 H{ } }
|
{ 6 H{ } }
|
||||||
} }
|
} }
|
||||||
{ start-state 0 }
|
{ start-state 0 }
|
||||||
{ final-states H{ { 3 3 } { 6 6 } } }
|
{ final-states HS{ 3 6 } }
|
||||||
} combine-states
|
} combine-states
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ IN: regexp.minimize
|
||||||
{
|
{
|
||||||
[ drop <= ]
|
[ drop <= ]
|
||||||
[ transitions>> '[ _ at keys ] bi@ set= ]
|
[ transitions>> '[ _ at keys ] bi@ set= ]
|
||||||
[ final-states>> '[ _ key? ] bi@ = ]
|
[ final-states>> '[ _ in? ] bi@ = ]
|
||||||
} 3&& ;
|
} 3&& ;
|
||||||
|
|
||||||
:: initialize-partitions ( transition-table -- partitions )
|
:: initialize-partitions ( transition-table -- partitions )
|
||||||
|
|
|
@ -12,7 +12,7 @@ IN: regexp.negation.tests
|
||||||
{ -1 H{ { t -1 } } }
|
{ -1 H{ { t -1 } } }
|
||||||
} }
|
} }
|
||||||
{ start-state 0 }
|
{ start-state 0 }
|
||||||
{ final-states H{ { 0 0 } { -1 -1 } } }
|
{ final-states HS{ 0 -1 } }
|
||||||
}
|
}
|
||||||
] [
|
] [
|
||||||
! R/ a/
|
! R/ a/
|
||||||
|
@ -22,6 +22,6 @@ IN: regexp.negation.tests
|
||||||
{ 1 H{ } }
|
{ 1 H{ } }
|
||||||
} }
|
} }
|
||||||
{ start-state 0 }
|
{ start-state 0 }
|
||||||
{ final-states H{ { 1 1 } } }
|
{ final-states HS{ 1 } }
|
||||||
} negate-table
|
} negate-table
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
USING: regexp.nfa regexp.disambiguate kernel sequences
|
USING: regexp.nfa regexp.disambiguate kernel sequences
|
||||||
assocs regexp.classes hashtables accessors fry vectors
|
assocs regexp.classes hashtables accessors fry vectors
|
||||||
regexp.ast regexp.transition-tables regexp.minimize
|
regexp.ast regexp.transition-tables regexp.minimize
|
||||||
regexp.dfa namespaces ;
|
regexp.dfa namespaces sets ;
|
||||||
IN: regexp.negation
|
IN: regexp.negation
|
||||||
|
|
||||||
CONSTANT: fail-state -1
|
CONSTANT: fail-state -1
|
||||||
|
@ -21,7 +21,7 @@ CONSTANT: fail-state -1
|
||||||
fail-state-recurses ;
|
fail-state-recurses ;
|
||||||
|
|
||||||
: inverse-final-states ( transition-table -- final-states )
|
: inverse-final-states ( transition-table -- final-states )
|
||||||
[ transitions>> assoc>set ] [ final-states>> ] bi assoc-diff ;
|
[ transitions>> keys ] [ final-states>> ] bi diff fast-set ;
|
||||||
|
|
||||||
: negate-table ( transition-table -- transition-table )
|
: negate-table ( transition-table -- transition-table )
|
||||||
clone
|
clone
|
||||||
|
@ -36,14 +36,14 @@ CONSTANT: fail-state -1
|
||||||
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
|
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
|
||||||
|
|
||||||
: unify-final-state ( transition-table -- transition-table )
|
: unify-final-state ( transition-table -- transition-table )
|
||||||
dup [ final-states>> keys ] keep
|
dup [ final-states>> members ] keep
|
||||||
'[ -2 epsilon _ set-transition ] each
|
'[ -2 epsilon _ set-transition ] each
|
||||||
H{ { -2 -2 } } >>final-states ;
|
HS{ -2 } clone >>final-states ;
|
||||||
|
|
||||||
: adjoin-dfa ( transition-table -- start end )
|
: adjoin-dfa ( transition-table -- start end )
|
||||||
unify-final-state renumber-states box-transitions
|
unify-final-state renumber-states box-transitions
|
||||||
[ start-state>> ]
|
[ start-state>> ]
|
||||||
[ final-states>> keys first ]
|
[ final-states>> members first ]
|
||||||
[ nfa-table get [ transitions>> ] bi@ swap assoc-union! drop ] tri ;
|
[ nfa-table get [ transitions>> ] bi@ swap assoc-union! drop ] tri ;
|
||||||
|
|
||||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||||
|
|
|
@ -163,6 +163,6 @@ M: with-options nfa-node ( node -- start end )
|
||||||
<transition-table> nfa-table set
|
<transition-table> nfa-table set
|
||||||
nfa-node
|
nfa-node
|
||||||
nfa-table get
|
nfa-table get
|
||||||
swap dup associate >>final-states
|
swap 1array fast-set >>final-states
|
||||||
swap >>start-state
|
swap >>start-state
|
||||||
] with-scope ;
|
] with-scope ;
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||||
vectors locals regexp.classes ;
|
vectors locals regexp.classes sets ;
|
||||||
IN: regexp.transition-tables
|
IN: regexp.transition-tables
|
||||||
|
|
||||||
TUPLE: transition-table transitions start-state final-states ;
|
TUPLE: transition-table transitions start-state final-states ;
|
||||||
|
@ -9,7 +9,7 @@ TUPLE: transition-table transitions start-state final-states ;
|
||||||
: <transition-table> ( -- transition-table )
|
: <transition-table> ( -- transition-table )
|
||||||
transition-table new
|
transition-table new
|
||||||
H{ } clone >>transitions
|
H{ } clone >>transitions
|
||||||
H{ } clone >>final-states ;
|
HS{ } clone >>final-states ;
|
||||||
|
|
||||||
:: (set-transition) ( from to obj hash -- )
|
:: (set-transition) ( from to obj hash -- )
|
||||||
from hash at
|
from hash at
|
||||||
|
@ -27,8 +27,8 @@ TUPLE: transition-table transitions start-state final-states ;
|
||||||
: add-transition ( from to obj transition-table -- )
|
: add-transition ( from to obj transition-table -- )
|
||||||
transitions>> (add-transition) ;
|
transitions>> (add-transition) ;
|
||||||
|
|
||||||
: map-set ( assoc quot -- new-assoc )
|
: map-set ( set quot -- new-set )
|
||||||
'[ drop @ dup ] assoc-map ; inline
|
over [ [ members ] dip map ] dip set-like ; inline
|
||||||
|
|
||||||
: number-transitions ( transitions numbering -- new-transitions )
|
: number-transitions ( transitions numbering -- new-transitions )
|
||||||
dup '[
|
dup '[
|
||||||
|
|
Loading…
Reference in New Issue