Disambiguation of overlapping regexp transitions

db4
Daniel Ehrenberg 2009-02-21 12:09:41 -06:00
parent 484112ad2b
commit be177fefa0
5 changed files with 77 additions and 41 deletions

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words
USING: accessors kernel math math.order words combinators
ascii unicode.categories combinators.short-circuit sequences ;
IN: regexp.classes
@ -107,20 +107,47 @@ M: end-of-line class-member? ( obj class -- ? )
2drop f ;
TUPLE: or-class seq ;
C: <or-class> or-class
TUPLE: not-class class ;
C: <not-class> not-class
: <and-class> ( classes -- class )
[ <not-class> ] map <or-class> <not-class> ;
TUPLE: and-class seq ;
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
: <and-class> ( seq -- class )
t swap remove
f over member? [ drop f ] [
dup length {
{ 0 [ drop t ] }
{ 1 [ first ] }
[ drop and-class boa ]
} case
] if ;
M: and-class class-member?
seq>> [ class-member? ] with all? ;
: <or-class> ( seq -- class )
f swap remove
t over member? [ drop t ] [
dup length {
{ 0 [ drop f ] }
{ 1 [ first ] }
[ drop or-class boa ]
} case
] if ;
M: or-class class-member?
seq>> [ class-member? ] with any? ;
: <not-class> ( class -- inverse )
{
{ t [ f ] }
{ f [ t ] }
[ not-class boa ]
} case ;
M: not-class class-member?
class>> class-member? not ;

View File

@ -2,8 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp.nfa regexp.transition-tables sequences
sets sorting vectors sequences.deep math.functions regexp.classes ;
USING: io prettyprint threads ;
sets sorting vectors ;
IN: regexp.dfa
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
@ -17,34 +16,6 @@ IN: regexp.dfa
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline
TUPLE: parts in out ;
: make-partition ( choices classes -- partition )
zip [ first ] partition parts boa ;
: powerset-partition ( classes -- partitions )
! Here is where class algebra will happen, when I implement it
[ length [ 2^ ] keep ] keep '[
_ [ ] map-bits _ make-partition
] map ;
: partition>class ( parts -- class )
[ in>> ] [ out>> ] bi
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
: get-transitions ( partition state-transitions -- next-states )
[ in>> ] dip '[ at ] gather ;
: disambiguate-overlap ( nfa -- nfa' )
[
[
[ keys powerset-partition ] keep '[
[ partition>class ]
[ _ get-transitions ] bi
] H{ } map>assoc
] assoc-map
] change-transitions ;
: find-delta ( states transition nfa -- new-states )
transitions>> '[ _ swap _ at at ] gather sift ;
@ -85,7 +56,8 @@ TUPLE: parts in out ;
: states ( hashtable -- array )
[ keys ]
[ values [ values concat ] map concat append ] bi ;
[ values [ values concat ] map concat ] bi
append ;
: set-final-states ( nfa dfa -- )
[
@ -100,7 +72,6 @@ TUPLE: parts in out ;
swap find-start-state >>start-state ;
: construct-dfa ( nfa -- dfa )
disambiguate-overlap
dup initialize-dfa
dup start-state>> 1vector
H{ } clone

View File

@ -0,0 +1,38 @@
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors regexp.classes math.bits assocs sequences
arrays sets regexp.dfa math fry regexp.minimize ;
IN: regexp.disambiguate
TUPLE: parts in out ;
: make-partition ( choices classes -- partition )
zip [ first ] partition [ values ] bi@ parts boa ;
: powerset-partition ( classes -- partitions )
[ length [ 2^ ] keep ] keep '[
_ <bits> _ make-partition
] map ;
: partition>class ( parts -- class )
[ in>> ] [ out>> ] bi
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
: get-transitions ( partition state-transitions -- next-states )
[ in>> ] dip '[ _ at ] map prune ;
: disambiguate ( dfa -- nfa )
[
[
[ keys powerset-partition ] keep '[
[ partition>class ]
[ _ get-transitions ] bi
] H{ } map>assoc
[ drop ] assoc-filter
] assoc-map
] change-transitions ;
: nfa>dfa ( nfa -- dfa )
construct-dfa
minimize disambiguate
construct-dfa minimize ;

View File

@ -1,12 +1,12 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
USING: regexp.nfa regexp.disambiguate kernel sequences
assocs regexp.classes hashtables accessors fry vectors
regexp.ast regexp.transition-tables ;
regexp.ast regexp.transition-tables regexp.minimize ;
IN: regexp.negation
: ast>dfa ( parse-tree -- minimal-dfa )
construct-nfa construct-dfa minimize ;
construct-nfa nfa>dfa ;
CONSTANT: fail-state -1

View File

@ -20,7 +20,7 @@ M: with-options remove-lookahead
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
M: alternation remove-lookahead
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ ;
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
M: concatenation remove-lookahead ;