More class algebra; fixing eliminating the DFA interpreter

db4
Daniel Ehrenberg 2009-03-04 15:54:56 -06:00
parent ca19a1b728
commit 39011fd062
6 changed files with 47 additions and 35 deletions

View File

@ -27,20 +27,23 @@ IN: regexp.classes.tests
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
[ f ] [ t <not-class> ] unit-test
[ t ] [ f <not-class> ] unit-test
! Making classes into nested conditionals
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
[ { 3 } ] [ { { 3 t } } table>condition ] unit-test
[ { T{ primitive-class } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>questions ] unit-test
[ { { 1 t } { 2 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } t answer ] unit-test
[ { { 1 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } f answer ] unit-test
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>condition ] unit-test
SYMBOL: foo
SYMBOL: bar
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 2 T{ primitive-class f foo } } { 3 T{ primitive-class f bar } } } table>condition ] unit-test
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences
fry macros arrays assocs sets ;
fry macros arrays assocs sets classes ;
IN: regexp.classes
SINGLETONS: any-char any-char-no-nl
@ -130,7 +130,13 @@ M: f combine-and
nip t ;
M: not-class combine-and
class>> = [ f t ] [ f f ] if ;
class>> 2dup = [ 2drop f t ] [
dup integer? [
2dup swap class-member?
[ 2drop f f ]
[ drop t ] if
] [ 2drop f f ] if
] if ;
M: integer combine-and
swap 2dup class-member? [ drop t ] [ 2drop f t ] if ;
@ -151,9 +157,6 @@ M: not-class combine-or
M: integer combine-or
2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
MACRO: instance? ( class -- ? )
"predicate" word-prop ;
: flatten ( seq class -- newseq )
'[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline
@ -201,6 +204,9 @@ M: and-class <not-class>
M: or-class <not-class>
seq>> [ <not-class> ] map <and-class> ;
M: t <not-class> drop f ;
M: f <not-class> drop t ;
M: not-class class-member?
class>> class-member? not ;
@ -230,8 +236,8 @@ M: not-class replace-question
class>> replace-question <not-class> ;
: answer ( table question answer -- new-table )
'[ [ _ _ replace-question ] dip ] assoc-map
[ drop ] assoc-filter ;
'[ _ _ replace-question ] assoc-map
[ nip ] assoc-filter ;
DEFER: make-condition
@ -242,7 +248,7 @@ DEFER: make-condition
2dup = [ 2nip ] [ <condition> ] if ;
: make-condition ( table questions -- condition )
[ values ] [ unclip (make-condition) ] if-empty ;
[ keys ] [ unclip (make-condition) ] if-empty ;
GENERIC: class>questions ( class -- questions )
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
@ -252,9 +258,10 @@ M: not-class class>questions class>> class>questions ;
M: object class>questions 1array ;
: table>questions ( table -- questions )
keys <and-class> class>questions t swap remove ;
values <and-class> class>questions t swap remove ;
: table>condition ( table -- condition )
! input table is state => class
>alist dup table>questions make-condition ;
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )

View File

@ -18,9 +18,13 @@ IN: regexp.compiler
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
] if ;
: non-literals>dispatch ( non-literal-transitions -- quot )
: new-non-literals>dispatch ( non-literal-transitions -- quot )
table>condition condition>quot ;
: non-literals>dispatch ( non-literal-transitions -- quot )
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
[ 3drop ] suffix '[ _ cond ] ;
: expand-one-or ( or-class transition -- alist )
[ seq>> ] dip '[ _ 2array ] map ;

View File

@ -29,7 +29,7 @@ IN: regexp.dfa
'[ _ _ t epsilon-loop ] each ;
: find-epsilon-closure ( states nfa -- dfa-state )
epsilon-table [ swap ] assoc-map table>condition ;
epsilon-table table>condition ;
: find-closure ( states transition nfa -- new-states )
[ find-delta ] keep find-epsilon-closure ;
@ -59,18 +59,13 @@ IN: regexp.dfa
nfa dfa new-states visited-states new-transitions
] if-empty ;
: states ( hashtable -- array )
[ keys ]
[ values [ values concat ] map concat ] bi
append ;
: set-final-states ( nfa dfa -- )
[
[ final-states>> keys ]
[ transitions>> states ] bi*
[ transitions>> keys ] bi*
[ intersects? ] with filter
] [ final-states>> ] bi
[ conjoin ] curry each ;
unique
] keep (>>final-states) ;
: initialize-dfa ( nfa -- dfa )
<transition-table>

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test regexp.minimize assocs regexp
accessors regexp.transition-tables ;
accessors regexp.transition-tables regexp.parser regexp.negation ;
IN: regexp.minimize.tests
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
@ -13,13 +13,16 @@ IN: regexp.minimize.tests
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
: regexp-states ( string -- n )
parse-regexp ast>dfa transitions>> assoc-size ;
[ 3 ] [ "ab|ac" regexp-states ] unit-test
[ 3 ] [ "a(b|c)" regexp-states ] unit-test
[ 1 ] [ "((aa*)*)*" regexp-states ] unit-test
[ 1 ] [ "a|((aa*)*)*" regexp-states ] unit-test
[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test
[ 4 ] [ "ab|cd" regexp-states ] unit-test
[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test
[
T{ transition-table

View File

@ -48,7 +48,7 @@ C: <reverse-matcher> reverse-matcher
] change-reverse-dfa ;
M: regexp match-index-from ( string regexp -- index/f )
compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
compile-regexp dfa>> <quot-matcher> match-index-from ;
M: reverse-matcher match-index-from ( string regexp -- index/f )
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
@ -81,7 +81,7 @@ M: reverse-matcher match-index-from ( string regexp -- index/f )
: parsing-regexp ( accum end -- accum )
lexer get [ take-until ] [ parse-noblank-token ] bi
<optioned-regexp> compile-dfa-quot parsed ;
<optioned-regexp> compile-regexp parsed ;
PRIVATE>