Disambiguation of overlapping regexp transitions
parent
484112ad2b
commit
be177fefa0
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order words
|
||||
USING: accessors kernel math math.order words combinators
|
||||
ascii unicode.categories combinators.short-circuit sequences ;
|
||||
IN: regexp.classes
|
||||
|
||||
|
@ -107,20 +107,47 @@ M: end-of-line class-member? ( obj class -- ? )
|
|||
2drop f ;
|
||||
|
||||
TUPLE: or-class seq ;
|
||||
C: <or-class> or-class
|
||||
|
||||
TUPLE: not-class class ;
|
||||
C: <not-class> not-class
|
||||
|
||||
: <and-class> ( classes -- class )
|
||||
[ <not-class> ] map <or-class> <not-class> ;
|
||||
TUPLE: and-class seq ;
|
||||
|
||||
TUPLE: primitive-class class ;
|
||||
C: <primitive-class> primitive-class
|
||||
|
||||
: <and-class> ( seq -- class )
|
||||
t swap remove
|
||||
f over member? [ drop f ] [
|
||||
dup length {
|
||||
{ 0 [ drop t ] }
|
||||
{ 1 [ first ] }
|
||||
[ drop and-class boa ]
|
||||
} case
|
||||
] if ;
|
||||
|
||||
M: and-class class-member?
|
||||
seq>> [ class-member? ] with all? ;
|
||||
|
||||
: <or-class> ( seq -- class )
|
||||
f swap remove
|
||||
t over member? [ drop t ] [
|
||||
dup length {
|
||||
{ 0 [ drop f ] }
|
||||
{ 1 [ first ] }
|
||||
[ drop or-class boa ]
|
||||
} case
|
||||
] if ;
|
||||
|
||||
M: or-class class-member?
|
||||
seq>> [ class-member? ] with any? ;
|
||||
|
||||
: <not-class> ( class -- inverse )
|
||||
{
|
||||
{ t [ f ] }
|
||||
{ f [ t ] }
|
||||
[ not-class boa ]
|
||||
} case ;
|
||||
|
||||
M: not-class class-member?
|
||||
class>> class-member? not ;
|
||||
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators fry kernel locals
|
||||
math math.order regexp.nfa regexp.transition-tables sequences
|
||||
sets sorting vectors sequences.deep math.functions regexp.classes ;
|
||||
USING: io prettyprint threads ;
|
||||
sets sorting vectors ;
|
||||
IN: regexp.dfa
|
||||
|
||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||
|
@ -17,34 +16,6 @@ IN: regexp.dfa
|
|||
: while-changes ( obj quot pred -- obj' )
|
||||
3dup nip call (while-changes) ; inline
|
||||
|
||||
TUPLE: parts in out ;
|
||||
|
||||
: make-partition ( choices classes -- partition )
|
||||
zip [ first ] partition parts boa ;
|
||||
|
||||
: powerset-partition ( classes -- partitions )
|
||||
! Here is where class algebra will happen, when I implement it
|
||||
[ length [ 2^ ] keep ] keep '[
|
||||
_ [ ] map-bits _ make-partition
|
||||
] map ;
|
||||
|
||||
: partition>class ( parts -- class )
|
||||
[ in>> ] [ out>> ] bi
|
||||
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
|
||||
|
||||
: get-transitions ( partition state-transitions -- next-states )
|
||||
[ in>> ] dip '[ at ] gather ;
|
||||
|
||||
: disambiguate-overlap ( nfa -- nfa' )
|
||||
[
|
||||
[
|
||||
[ keys powerset-partition ] keep '[
|
||||
[ partition>class ]
|
||||
[ _ get-transitions ] bi
|
||||
] H{ } map>assoc
|
||||
] assoc-map
|
||||
] change-transitions ;
|
||||
|
||||
: find-delta ( states transition nfa -- new-states )
|
||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||
|
||||
|
@ -85,7 +56,8 @@ TUPLE: parts in out ;
|
|||
|
||||
: states ( hashtable -- array )
|
||||
[ keys ]
|
||||
[ values [ values concat ] map concat append ] bi ;
|
||||
[ values [ values concat ] map concat ] bi
|
||||
append ;
|
||||
|
||||
: set-final-states ( nfa dfa -- )
|
||||
[
|
||||
|
@ -100,7 +72,6 @@ TUPLE: parts in out ;
|
|||
swap find-start-state >>start-state ;
|
||||
|
||||
: construct-dfa ( nfa -- dfa )
|
||||
disambiguate-overlap
|
||||
dup initialize-dfa
|
||||
dup start-state>> 1vector
|
||||
H{ } clone
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel accessors regexp.classes math.bits assocs sequences
|
||||
arrays sets regexp.dfa math fry regexp.minimize ;
|
||||
IN: regexp.disambiguate
|
||||
|
||||
TUPLE: parts in out ;
|
||||
|
||||
: make-partition ( choices classes -- partition )
|
||||
zip [ first ] partition [ values ] bi@ parts boa ;
|
||||
|
||||
: powerset-partition ( classes -- partitions )
|
||||
[ length [ 2^ ] keep ] keep '[
|
||||
_ <bits> _ make-partition
|
||||
] map ;
|
||||
|
||||
: partition>class ( parts -- class )
|
||||
[ in>> ] [ out>> ] bi
|
||||
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
|
||||
|
||||
: get-transitions ( partition state-transitions -- next-states )
|
||||
[ in>> ] dip '[ _ at ] map prune ;
|
||||
|
||||
: disambiguate ( dfa -- nfa )
|
||||
[
|
||||
[
|
||||
[ keys powerset-partition ] keep '[
|
||||
[ partition>class ]
|
||||
[ _ get-transitions ] bi
|
||||
] H{ } map>assoc
|
||||
[ drop ] assoc-filter
|
||||
] assoc-map
|
||||
] change-transitions ;
|
||||
|
||||
: nfa>dfa ( nfa -- dfa )
|
||||
construct-dfa
|
||||
minimize disambiguate
|
||||
construct-dfa minimize ;
|
|
@ -1,12 +1,12 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
|
||||
USING: regexp.nfa regexp.disambiguate kernel sequences
|
||||
assocs regexp.classes hashtables accessors fry vectors
|
||||
regexp.ast regexp.transition-tables ;
|
||||
regexp.ast regexp.transition-tables regexp.minimize ;
|
||||
IN: regexp.negation
|
||||
|
||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||
construct-nfa construct-dfa minimize ;
|
||||
construct-nfa nfa>dfa ;
|
||||
|
||||
CONSTANT: fail-state -1
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ M: with-options remove-lookahead
|
|||
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
|
||||
|
||||
M: alternation remove-lookahead
|
||||
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ ;
|
||||
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
|
||||
|
||||
M: concatenation remove-lookahead ;
|
||||
|
||||
|
|
Loading…
Reference in New Issue