diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 516b6b4a1d..c7106c9154 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel math math.order words +USING: accessors kernel math math.order words combinators ascii unicode.categories combinators.short-circuit sequences ; IN: regexp.classes @@ -107,20 +107,47 @@ M: end-of-line class-member? ( obj class -- ? ) 2drop f ; TUPLE: or-class seq ; -C: or-class TUPLE: not-class class ; -C: not-class -: ( classes -- class ) - [ ] map ; +TUPLE: and-class seq ; TUPLE: primitive-class class ; C: primitive-class +: ( seq -- class ) + t swap remove + f over member? [ drop f ] [ + dup length { + { 0 [ drop t ] } + { 1 [ first ] } + [ drop and-class boa ] + } case + ] if ; + +M: and-class class-member? + seq>> [ class-member? ] with all? ; + +: ( seq -- class ) + f swap remove + t over member? [ drop t ] [ + dup length { + { 0 [ drop f ] } + { 1 [ first ] } + [ drop or-class boa ] + } case + ] if ; + M: or-class class-member? seq>> [ class-member? ] with any? ; +: ( class -- inverse ) + { + { t [ f ] } + { f [ t ] } + [ not-class boa ] + } case ; + M: not-class class-member? class>> class-member? not ; diff --git a/basis/regexp/dfa/dfa.factor b/basis/regexp/dfa/dfa.factor index 9834ca4ca0..8c2e995163 100644 --- a/basis/regexp/dfa/dfa.factor +++ b/basis/regexp/dfa/dfa.factor @@ -2,8 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs combinators fry kernel locals math math.order regexp.nfa regexp.transition-tables sequences -sets sorting vectors sequences.deep math.functions regexp.classes ; -USING: io prettyprint threads ; +sets sorting vectors ; IN: regexp.dfa :: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj ) @@ -17,34 +16,6 @@ IN: regexp.dfa : while-changes ( obj quot pred -- obj' ) 3dup nip call (while-changes) ; inline -TUPLE: parts in out ; - -: make-partition ( choices classes -- partition ) - zip [ first ] partition parts boa ; - -: powerset-partition ( classes -- partitions ) - ! Here is where class algebra will happen, when I implement it - [ length [ 2^ ] keep ] keep '[ - _ [ ] map-bits _ make-partition - ] map ; - -: partition>class ( parts -- class ) - [ in>> ] [ out>> ] bi - [ ] bi@ 2array ; - -: get-transitions ( partition state-transitions -- next-states ) - [ in>> ] dip '[ at ] gather ; - -: disambiguate-overlap ( nfa -- nfa' ) - [ - [ - [ keys powerset-partition ] keep '[ - [ partition>class ] - [ _ get-transitions ] bi - ] H{ } map>assoc - ] assoc-map - ] change-transitions ; - : find-delta ( states transition nfa -- new-states ) transitions>> '[ _ swap _ at at ] gather sift ; @@ -85,7 +56,8 @@ TUPLE: parts in out ; : states ( hashtable -- array ) [ keys ] - [ values [ values concat ] map concat append ] bi ; + [ values [ values concat ] map concat ] bi + append ; : set-final-states ( nfa dfa -- ) [ @@ -100,7 +72,6 @@ TUPLE: parts in out ; swap find-start-state >>start-state ; : construct-dfa ( nfa -- dfa ) - disambiguate-overlap dup initialize-dfa dup start-state>> 1vector H{ } clone diff --git a/basis/regexp/disambiguate/disambiguate.factor b/basis/regexp/disambiguate/disambiguate.factor new file mode 100644 index 0000000000..2e26e43625 --- /dev/null +++ b/basis/regexp/disambiguate/disambiguate.factor @@ -0,0 +1,38 @@ +! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel accessors regexp.classes math.bits assocs sequences +arrays sets regexp.dfa math fry regexp.minimize ; +IN: regexp.disambiguate + +TUPLE: parts in out ; + +: make-partition ( choices classes -- partition ) + zip [ first ] partition [ values ] bi@ parts boa ; + +: powerset-partition ( classes -- partitions ) + [ length [ 2^ ] keep ] keep '[ + _ _ make-partition + ] map ; + +: partition>class ( parts -- class ) + [ in>> ] [ out>> ] bi + [ ] bi@ 2array ; + +: get-transitions ( partition state-transitions -- next-states ) + [ in>> ] dip '[ _ at ] map prune ; + +: disambiguate ( dfa -- nfa ) + [ + [ + [ keys powerset-partition ] keep '[ + [ partition>class ] + [ _ get-transitions ] bi + ] H{ } map>assoc + [ drop ] assoc-filter + ] assoc-map + ] change-transitions ; + +: nfa>dfa ( nfa -- dfa ) + construct-dfa + minimize disambiguate + construct-dfa minimize ; diff --git a/basis/regexp/negation/negation.factor b/basis/regexp/negation/negation.factor index 6b0e6b519e..f235dc1bf5 100644 --- a/basis/regexp/negation/negation.factor +++ b/basis/regexp/negation/negation.factor @@ -1,12 +1,12 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences +USING: regexp.nfa regexp.disambiguate kernel sequences assocs regexp.classes hashtables accessors fry vectors -regexp.ast regexp.transition-tables ; +regexp.ast regexp.transition-tables regexp.minimize ; IN: regexp.negation : ast>dfa ( parse-tree -- minimal-dfa ) - construct-nfa construct-dfa minimize ; + construct-nfa nfa>dfa ; CONSTANT: fail-state -1 diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 370b354276..eff023c278 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -20,7 +20,7 @@ M: with-options remove-lookahead [ tree>> remove-lookahead ] [ options>> ] bi ; M: alternation remove-lookahead - [ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ ; + [ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ; M: concatenation remove-lookahead ;