Fixing bug in disambiguation in regexps

db4
Daniel Ehrenberg 2009-03-03 19:22:53 -06:00
parent a28a80abcf
commit 1740b85598
4 changed files with 46 additions and 37 deletions

View File

@ -23,3 +23,4 @@ IN: regexp.classes.tests
[ 1 ] [ { 1 1 } <or-class> ] unit-test [ 1 ] [ { 1 1 } <or-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test

View File

@ -1,7 +1,8 @@
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words combinators locals USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences ; ascii unicode.categories combinators.short-circuit sequences
fry macros arrays ;
IN: regexp.classes IN: regexp.classes
SINGLETONS: any-char any-char-no-nl SINGLETONS: any-char any-char-no-nl
@ -150,6 +151,12 @@ M: not-class combine-or
M: integer combine-or M: integer combine-or
2dup swap class-member? [ drop t ] [ 2drop f f ] if ; 2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
MACRO: instance? ( class -- ? )
"predicate" word-prop ;
: flatten ( seq class -- newseq )
'[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline
: try-combine ( elt1 elt2 quot -- combined/f ? ) : try-combine ( elt1 elt2 quot -- combined/f ? )
3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline 3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
@ -160,7 +167,8 @@ M: integer combine-or
[ seq elt prefix ] if* ; inline [ seq elt prefix ] if* ; inline
:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq ) :: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
seq { } [ quot prefix-combining ] reduce seq class flatten
{ } [ quot prefix-combining ] reduce
dup length { dup length {
{ 0 [ drop empty ] } { 0 [ drop empty ] }
{ 1 [ first ] } { 1 [ first ] }
@ -179,12 +187,19 @@ M: and-class class-member?
M: or-class class-member? M: or-class class-member?
seq>> [ class-member? ] with any? ; seq>> [ class-member? ] with any? ;
: <not-class> ( class -- inverse ) GENERIC: <not-class> ( class -- inverse )
{
{ t [ f ] } M: object <not-class>
{ f [ t ] } not-class boa ;
[ dup not-class? [ class>> ] [ not-class boa ] if ]
} case ; M: not-class <not-class>
class>> ;
M: and-class <not-class>
seq>> [ <not-class> ] map <or-class> ;
M: or-class <not-class>
seq>> [ <not-class> ] map <and-class> ;
M: not-class class-member? M: not-class class-member?
class>> class-member? not ; class>> class-member? not ;
@ -192,4 +207,4 @@ M: not-class class-member?
M: primitive-class class-member? M: primitive-class class-member?
class>> class-member? ; class>> class-member? ;
UNION: class primitive-class not-class or-class range ; UNION: class primitive-class not-class or-class and-class range ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors regexp.classes math.bits assocs sequences USING: kernel accessors regexp.classes math.bits assocs sequences
arrays sets regexp.dfa math fry regexp.minimize ; arrays sets regexp.dfa math fry regexp.minimize regexp.ast ;
IN: regexp.disambiguate IN: regexp.disambiguate
TUPLE: parts in out ; TUPLE: parts in out ;
@ -20,22 +20,28 @@ TUPLE: parts in out ;
prefix <and-class> ; prefix <and-class> ;
: get-transitions ( partition state-transitions -- next-states ) : get-transitions ( partition state-transitions -- next-states )
[ in>> ] dip '[ _ at ] map prune ; [ in>> ] dip '[ _ at ] gather sift ;
: disambiguate ( dfa -- nfa ) : new-transitions ( transitions -- assoc ) ! assoc is class, partition
values [ keys ] gather
[ tagged-epsilon? not ] filter
powerset-partition
[ [ partition>class ] keep ] { } map>assoc
[ drop ] assoc-filter ;
: preserving-epsilon ( state-transitions quot -- new-state-transitions )
[ [ drop tagged-epsilon? ] assoc-filter ] bi
assoc-union H{ } assoc-like ; inline
: disambiguate ( nfa -- nfa )
[ [
[ dup new-transitions '[
[ keys powerset-partition ] keep '[ [
[ partition>class ] _ swap '[ _ get-transitions ] assoc-map
[ _ get-transitions ] bi [ nip empty? not ] assoc-filter
] H{ } map>assoc ] preserving-epsilon
[ drop ] assoc-filter
] assoc-map ] assoc-map
] change-transitions ; ] change-transitions ;
USE: sorting
: nfa>dfa ( nfa -- dfa ) : nfa>dfa ( nfa -- dfa )
construct-dfa minimize disambiguate construct-dfa minimize ;
disambiguate
construct-dfa minimize ;

View File

@ -11,19 +11,6 @@ IN: regexp.nfa
! but case-insensitive matching should be done by case-folding everything ! but case-insensitive matching should be done by case-folding everything
! before processing starts ! before processing starts
GENERIC: remove-lookahead ( syntax-tree -- syntax-tree' )
! This is unfinished and does nothing right now!
M: object remove-lookahead ;
M: with-options remove-lookahead
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
M: alternation remove-lookahead
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
M: concatenation remove-lookahead ;
SYMBOL: option-stack SYMBOL: option-stack
SYMBOL: state SYMBOL: state
@ -148,7 +135,7 @@ M: with-options nfa-node ( node -- start end )
[ [
0 state set 0 state set
<transition-table> nfa-table set <transition-table> nfa-table set
remove-lookahead nfa-node nfa-node
nfa-table get nfa-table get
swap dup associate >>final-states swap dup associate >>final-states
swap >>start-state swap >>start-state