diff --git a/basis/regexp/disambiguate/disambiguate.factor b/basis/regexp/disambiguate/disambiguate.factor index 67b1503f9b..876d898cb4 100644 --- a/basis/regexp/disambiguate/disambiguate.factor +++ b/basis/regexp/disambiguate/disambiguate.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors regexp.classes math.bits assocs sequences -arrays sets regexp.dfa math fry regexp.minimize regexp.ast regexp.transition-tables ; +arrays sets regexp.dfa math fry regexp.minimize regexp.ast +locals regexp.transition-tables ; IN: regexp.disambiguate TUPLE: parts in out ; @@ -9,7 +10,7 @@ TUPLE: parts in out ; : make-partition ( choices classes -- partition ) zip [ first ] partition [ values ] bi@ parts boa ; -: powerset-partition ( classes -- partitions ) +: powerset-partition ( sequence -- partitions ) [ length [ 2^ ] keep ] keep '[ _ _ make-partition ] map rest ; @@ -19,19 +20,49 @@ TUPLE: parts in out ; [ in>> ] bi prefix ; -: get-transitions ( partition state-transitions -- next-states ) - [ in>> ] dip '[ _ at ] gather sift ; +: singleton-partition ( integer non-integers -- {class,partition} ) + dupd + '[ _ [ class-member? ] with filter ] keep + prefix f parts boa + 2array ; + +: add-out ( seq partition -- partition' ) + [ out>> append ] [ in>> ] bi swap parts boa ; + +: intersection ( seq -- elts ) + [ f ] [ unclip [ intersect ] reduce ] if-empty ; + +: meaningful-integers ( partition table -- integers ) + [ [ in>> ] [ out>> ] bi ] dip + '[ [ _ at ] map intersection ] bi@ diff ; + +: class-integers ( classes integers -- table ) + '[ _ over '[ _ class-member? ] filter ] H{ } map>assoc ; + +: add-integers ( partitions classes integers -- partitions ) + class-integers '[ + [ _ meaningful-integers ] keep add-out + ] map ; + +: class-partitions ( classes -- assoc ) + [ integer? ] partition [ + dup powerset-partition spin add-integers + [ [ partition>class ] keep 2array ] map + [ first ] filter + ] [ '[ _ singleton-partition ] map ] 2bi append ; : new-transitions ( transitions -- assoc ) ! assoc is class, partition values [ keys ] gather [ tagged-epsilon? not ] filter - powerset-partition - [ [ partition>class ] keep ] { } map>assoc - [ drop ] assoc-filter ; + class-partitions ; + +: get-transitions ( partition state-transitions -- next-states ) + [ in>> ] dip '[ _ at ] gather sift ; : preserving-epsilon ( state-transitions quot -- new-state-transitions ) [ [ drop tagged-epsilon? ] assoc-filter ] bi assoc-union H{ } assoc-like ; inline + : disambiguate ( nfa -- nfa ) expand-ors [ dup new-transitions '[