Disambiguation of overlapping regexp transitions
parent
484112ad2b
commit
be177fefa0
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors kernel math math.order words
|
USING: accessors kernel math math.order words combinators
|
||||||
ascii unicode.categories combinators.short-circuit sequences ;
|
ascii unicode.categories combinators.short-circuit sequences ;
|
||||||
IN: regexp.classes
|
IN: regexp.classes
|
||||||
|
|
||||||
|
@ -107,20 +107,47 @@ M: end-of-line class-member? ( obj class -- ? )
|
||||||
2drop f ;
|
2drop f ;
|
||||||
|
|
||||||
TUPLE: or-class seq ;
|
TUPLE: or-class seq ;
|
||||||
C: <or-class> or-class
|
|
||||||
|
|
||||||
TUPLE: not-class class ;
|
TUPLE: not-class class ;
|
||||||
C: <not-class> not-class
|
|
||||||
|
|
||||||
: <and-class> ( classes -- class )
|
TUPLE: and-class seq ;
|
||||||
[ <not-class> ] map <or-class> <not-class> ;
|
|
||||||
|
|
||||||
TUPLE: primitive-class class ;
|
TUPLE: primitive-class class ;
|
||||||
C: <primitive-class> primitive-class
|
C: <primitive-class> primitive-class
|
||||||
|
|
||||||
|
: <and-class> ( seq -- class )
|
||||||
|
t swap remove
|
||||||
|
f over member? [ drop f ] [
|
||||||
|
dup length {
|
||||||
|
{ 0 [ drop t ] }
|
||||||
|
{ 1 [ first ] }
|
||||||
|
[ drop and-class boa ]
|
||||||
|
} case
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
M: and-class class-member?
|
||||||
|
seq>> [ class-member? ] with all? ;
|
||||||
|
|
||||||
|
: <or-class> ( seq -- class )
|
||||||
|
f swap remove
|
||||||
|
t over member? [ drop t ] [
|
||||||
|
dup length {
|
||||||
|
{ 0 [ drop f ] }
|
||||||
|
{ 1 [ first ] }
|
||||||
|
[ drop or-class boa ]
|
||||||
|
} case
|
||||||
|
] if ;
|
||||||
|
|
||||||
M: or-class class-member?
|
M: or-class class-member?
|
||||||
seq>> [ class-member? ] with any? ;
|
seq>> [ class-member? ] with any? ;
|
||||||
|
|
||||||
|
: <not-class> ( class -- inverse )
|
||||||
|
{
|
||||||
|
{ t [ f ] }
|
||||||
|
{ f [ t ] }
|
||||||
|
[ not-class boa ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
M: not-class class-member?
|
M: not-class class-member?
|
||||||
class>> class-member? not ;
|
class>> class-member? not ;
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators fry kernel locals
|
USING: accessors arrays assocs combinators fry kernel locals
|
||||||
math math.order regexp.nfa regexp.transition-tables sequences
|
math math.order regexp.nfa regexp.transition-tables sequences
|
||||||
sets sorting vectors sequences.deep math.functions regexp.classes ;
|
sets sorting vectors ;
|
||||||
USING: io prettyprint threads ;
|
|
||||||
IN: regexp.dfa
|
IN: regexp.dfa
|
||||||
|
|
||||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||||
|
@ -17,34 +16,6 @@ IN: regexp.dfa
|
||||||
: while-changes ( obj quot pred -- obj' )
|
: while-changes ( obj quot pred -- obj' )
|
||||||
3dup nip call (while-changes) ; inline
|
3dup nip call (while-changes) ; inline
|
||||||
|
|
||||||
TUPLE: parts in out ;
|
|
||||||
|
|
||||||
: make-partition ( choices classes -- partition )
|
|
||||||
zip [ first ] partition parts boa ;
|
|
||||||
|
|
||||||
: powerset-partition ( classes -- partitions )
|
|
||||||
! Here is where class algebra will happen, when I implement it
|
|
||||||
[ length [ 2^ ] keep ] keep '[
|
|
||||||
_ [ ] map-bits _ make-partition
|
|
||||||
] map ;
|
|
||||||
|
|
||||||
: partition>class ( parts -- class )
|
|
||||||
[ in>> ] [ out>> ] bi
|
|
||||||
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
|
|
||||||
|
|
||||||
: get-transitions ( partition state-transitions -- next-states )
|
|
||||||
[ in>> ] dip '[ at ] gather ;
|
|
||||||
|
|
||||||
: disambiguate-overlap ( nfa -- nfa' )
|
|
||||||
[
|
|
||||||
[
|
|
||||||
[ keys powerset-partition ] keep '[
|
|
||||||
[ partition>class ]
|
|
||||||
[ _ get-transitions ] bi
|
|
||||||
] H{ } map>assoc
|
|
||||||
] assoc-map
|
|
||||||
] change-transitions ;
|
|
||||||
|
|
||||||
: find-delta ( states transition nfa -- new-states )
|
: find-delta ( states transition nfa -- new-states )
|
||||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||||
|
|
||||||
|
@ -85,7 +56,8 @@ TUPLE: parts in out ;
|
||||||
|
|
||||||
: states ( hashtable -- array )
|
: states ( hashtable -- array )
|
||||||
[ keys ]
|
[ keys ]
|
||||||
[ values [ values concat ] map concat append ] bi ;
|
[ values [ values concat ] map concat ] bi
|
||||||
|
append ;
|
||||||
|
|
||||||
: set-final-states ( nfa dfa -- )
|
: set-final-states ( nfa dfa -- )
|
||||||
[
|
[
|
||||||
|
@ -100,7 +72,6 @@ TUPLE: parts in out ;
|
||||||
swap find-start-state >>start-state ;
|
swap find-start-state >>start-state ;
|
||||||
|
|
||||||
: construct-dfa ( nfa -- dfa )
|
: construct-dfa ( nfa -- dfa )
|
||||||
disambiguate-overlap
|
|
||||||
dup initialize-dfa
|
dup initialize-dfa
|
||||||
dup start-state>> 1vector
|
dup start-state>> 1vector
|
||||||
H{ } clone
|
H{ } clone
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: kernel accessors regexp.classes math.bits assocs sequences
|
||||||
|
arrays sets regexp.dfa math fry regexp.minimize ;
|
||||||
|
IN: regexp.disambiguate
|
||||||
|
|
||||||
|
TUPLE: parts in out ;
|
||||||
|
|
||||||
|
: make-partition ( choices classes -- partition )
|
||||||
|
zip [ first ] partition [ values ] bi@ parts boa ;
|
||||||
|
|
||||||
|
: powerset-partition ( classes -- partitions )
|
||||||
|
[ length [ 2^ ] keep ] keep '[
|
||||||
|
_ <bits> _ make-partition
|
||||||
|
] map ;
|
||||||
|
|
||||||
|
: partition>class ( parts -- class )
|
||||||
|
[ in>> ] [ out>> ] bi
|
||||||
|
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
|
||||||
|
|
||||||
|
: get-transitions ( partition state-transitions -- next-states )
|
||||||
|
[ in>> ] dip '[ _ at ] map prune ;
|
||||||
|
|
||||||
|
: disambiguate ( dfa -- nfa )
|
||||||
|
[
|
||||||
|
[
|
||||||
|
[ keys powerset-partition ] keep '[
|
||||||
|
[ partition>class ]
|
||||||
|
[ _ get-transitions ] bi
|
||||||
|
] H{ } map>assoc
|
||||||
|
[ drop ] assoc-filter
|
||||||
|
] assoc-map
|
||||||
|
] change-transitions ;
|
||||||
|
|
||||||
|
: nfa>dfa ( nfa -- dfa )
|
||||||
|
construct-dfa
|
||||||
|
minimize disambiguate
|
||||||
|
construct-dfa minimize ;
|
|
@ -1,12 +1,12 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg.
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
|
USING: regexp.nfa regexp.disambiguate kernel sequences
|
||||||
assocs regexp.classes hashtables accessors fry vectors
|
assocs regexp.classes hashtables accessors fry vectors
|
||||||
regexp.ast regexp.transition-tables ;
|
regexp.ast regexp.transition-tables regexp.minimize ;
|
||||||
IN: regexp.negation
|
IN: regexp.negation
|
||||||
|
|
||||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||||
construct-nfa construct-dfa minimize ;
|
construct-nfa nfa>dfa ;
|
||||||
|
|
||||||
CONSTANT: fail-state -1
|
CONSTANT: fail-state -1
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ M: with-options remove-lookahead
|
||||||
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
|
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
|
||||||
|
|
||||||
M: alternation remove-lookahead
|
M: alternation remove-lookahead
|
||||||
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ ;
|
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
|
||||||
|
|
||||||
M: concatenation remove-lookahead ;
|
M: concatenation remove-lookahead ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue