Fixing bug in disambiguation in regexps
parent
a28a80abcf
commit
1740b85598
|
@ -23,3 +23,4 @@ IN: regexp.classes.tests
|
|||
[ 1 ] [ { 1 1 } <or-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order words combinators locals
|
||||
ascii unicode.categories combinators.short-circuit sequences ;
|
||||
ascii unicode.categories combinators.short-circuit sequences
|
||||
fry macros arrays ;
|
||||
IN: regexp.classes
|
||||
|
||||
SINGLETONS: any-char any-char-no-nl
|
||||
|
@ -150,6 +151,12 @@ M: not-class combine-or
|
|||
M: integer combine-or
|
||||
2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
|
||||
|
||||
MACRO: instance? ( class -- ? )
|
||||
"predicate" word-prop ;
|
||||
|
||||
: flatten ( seq class -- newseq )
|
||||
'[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline
|
||||
|
||||
: try-combine ( elt1 elt2 quot -- combined/f ? )
|
||||
3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
|
||||
|
||||
|
@ -160,7 +167,8 @@ M: integer combine-or
|
|||
[ seq elt prefix ] if* ; inline
|
||||
|
||||
:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
|
||||
seq { } [ quot prefix-combining ] reduce
|
||||
seq class flatten
|
||||
{ } [ quot prefix-combining ] reduce
|
||||
dup length {
|
||||
{ 0 [ drop empty ] }
|
||||
{ 1 [ first ] }
|
||||
|
@ -179,12 +187,19 @@ M: and-class class-member?
|
|||
M: or-class class-member?
|
||||
seq>> [ class-member? ] with any? ;
|
||||
|
||||
: <not-class> ( class -- inverse )
|
||||
{
|
||||
{ t [ f ] }
|
||||
{ f [ t ] }
|
||||
[ dup not-class? [ class>> ] [ not-class boa ] if ]
|
||||
} case ;
|
||||
GENERIC: <not-class> ( class -- inverse )
|
||||
|
||||
M: object <not-class>
|
||||
not-class boa ;
|
||||
|
||||
M: not-class <not-class>
|
||||
class>> ;
|
||||
|
||||
M: and-class <not-class>
|
||||
seq>> [ <not-class> ] map <or-class> ;
|
||||
|
||||
M: or-class <not-class>
|
||||
seq>> [ <not-class> ] map <and-class> ;
|
||||
|
||||
M: not-class class-member?
|
||||
class>> class-member? not ;
|
||||
|
@ -192,4 +207,4 @@ M: not-class class-member?
|
|||
M: primitive-class class-member?
|
||||
class>> class-member? ;
|
||||
|
||||
UNION: class primitive-class not-class or-class range ;
|
||||
UNION: class primitive-class not-class or-class and-class range ;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel accessors regexp.classes math.bits assocs sequences
|
||||
arrays sets regexp.dfa math fry regexp.minimize ;
|
||||
arrays sets regexp.dfa math fry regexp.minimize regexp.ast ;
|
||||
IN: regexp.disambiguate
|
||||
|
||||
TUPLE: parts in out ;
|
||||
|
@ -20,22 +20,28 @@ TUPLE: parts in out ;
|
|||
prefix <and-class> ;
|
||||
|
||||
: get-transitions ( partition state-transitions -- next-states )
|
||||
[ in>> ] dip '[ _ at ] map prune ;
|
||||
[ in>> ] dip '[ _ at ] gather sift ;
|
||||
|
||||
: disambiguate ( dfa -- nfa )
|
||||
: new-transitions ( transitions -- assoc ) ! assoc is class, partition
|
||||
values [ keys ] gather
|
||||
[ tagged-epsilon? not ] filter
|
||||
powerset-partition
|
||||
[ [ partition>class ] keep ] { } map>assoc
|
||||
[ drop ] assoc-filter ;
|
||||
|
||||
: preserving-epsilon ( state-transitions quot -- new-state-transitions )
|
||||
[ [ drop tagged-epsilon? ] assoc-filter ] bi
|
||||
assoc-union H{ } assoc-like ; inline
|
||||
|
||||
: disambiguate ( nfa -- nfa )
|
||||
[
|
||||
[
|
||||
[ keys powerset-partition ] keep '[
|
||||
[ partition>class ]
|
||||
[ _ get-transitions ] bi
|
||||
] H{ } map>assoc
|
||||
[ drop ] assoc-filter
|
||||
dup new-transitions '[
|
||||
[
|
||||
_ swap '[ _ get-transitions ] assoc-map
|
||||
[ nip empty? not ] assoc-filter
|
||||
] preserving-epsilon
|
||||
] assoc-map
|
||||
] change-transitions ;
|
||||
|
||||
USE: sorting
|
||||
|
||||
: nfa>dfa ( nfa -- dfa )
|
||||
construct-dfa minimize
|
||||
disambiguate
|
||||
construct-dfa minimize ;
|
||||
disambiguate construct-dfa minimize ;
|
||||
|
|
|
@ -11,19 +11,6 @@ IN: regexp.nfa
|
|||
! but case-insensitive matching should be done by case-folding everything
|
||||
! before processing starts
|
||||
|
||||
GENERIC: remove-lookahead ( syntax-tree -- syntax-tree' )
|
||||
! This is unfinished and does nothing right now!
|
||||
|
||||
M: object remove-lookahead ;
|
||||
|
||||
M: with-options remove-lookahead
|
||||
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
|
||||
|
||||
M: alternation remove-lookahead
|
||||
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
|
||||
|
||||
M: concatenation remove-lookahead ;
|
||||
|
||||
SYMBOL: option-stack
|
||||
|
||||
SYMBOL: state
|
||||
|
@ -148,7 +135,7 @@ M: with-options nfa-node ( node -- start end )
|
|||
[
|
||||
0 state set
|
||||
<transition-table> nfa-table set
|
||||
remove-lookahead nfa-node
|
||||
nfa-node
|
||||
nfa-table get
|
||||
swap dup associate >>final-states
|
||||
swap >>start-state
|
||||
|
|
Loading…
Reference in New Issue