More class algebra; fixing eliminating the DFA interpreter
parent
ca19a1b728
commit
39011fd062
|
@ -27,20 +27,23 @@ IN: regexp.classes.tests
|
|||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||
[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
|
||||
[ f ] [ t <not-class> ] unit-test
|
||||
[ t ] [ f <not-class> ] unit-test
|
||||
|
||||
! Making classes into nested conditionals
|
||||
|
||||
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
|
||||
[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
|
||||
[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
|
||||
[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
|
||||
[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
|
||||
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
|
||||
[ { 3 } ] [ { { 3 t } } table>condition ] unit-test
|
||||
[ { T{ primitive-class } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>questions ] unit-test
|
||||
[ { { 1 t } { 2 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } t answer ] unit-test
|
||||
[ { { 1 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } f answer ] unit-test
|
||||
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>condition ] unit-test
|
||||
|
||||
SYMBOL: foo
|
||||
SYMBOL: bar
|
||||
|
||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
|
||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 2 T{ primitive-class f foo } } { 3 T{ primitive-class f bar } } } table>condition ] unit-test
|
||||
|
||||
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
|
||||
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order words combinators locals
|
||||
ascii unicode.categories combinators.short-circuit sequences
|
||||
fry macros arrays assocs sets ;
|
||||
fry macros arrays assocs sets classes ;
|
||||
IN: regexp.classes
|
||||
|
||||
SINGLETONS: any-char any-char-no-nl
|
||||
|
@ -130,7 +130,13 @@ M: f combine-and
|
|||
nip t ;
|
||||
|
||||
M: not-class combine-and
|
||||
class>> = [ f t ] [ f f ] if ;
|
||||
class>> 2dup = [ 2drop f t ] [
|
||||
dup integer? [
|
||||
2dup swap class-member?
|
||||
[ 2drop f f ]
|
||||
[ drop t ] if
|
||||
] [ 2drop f f ] if
|
||||
] if ;
|
||||
|
||||
M: integer combine-and
|
||||
swap 2dup class-member? [ drop t ] [ 2drop f t ] if ;
|
||||
|
@ -151,9 +157,6 @@ M: not-class combine-or
|
|||
M: integer combine-or
|
||||
2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
|
||||
|
||||
MACRO: instance? ( class -- ? )
|
||||
"predicate" word-prop ;
|
||||
|
||||
: flatten ( seq class -- newseq )
|
||||
'[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline
|
||||
|
||||
|
@ -201,6 +204,9 @@ M: and-class <not-class>
|
|||
M: or-class <not-class>
|
||||
seq>> [ <not-class> ] map <and-class> ;
|
||||
|
||||
M: t <not-class> drop f ;
|
||||
M: f <not-class> drop t ;
|
||||
|
||||
M: not-class class-member?
|
||||
class>> class-member? not ;
|
||||
|
||||
|
@ -230,8 +236,8 @@ M: not-class replace-question
|
|||
class>> replace-question <not-class> ;
|
||||
|
||||
: answer ( table question answer -- new-table )
|
||||
'[ [ _ _ replace-question ] dip ] assoc-map
|
||||
[ drop ] assoc-filter ;
|
||||
'[ _ _ replace-question ] assoc-map
|
||||
[ nip ] assoc-filter ;
|
||||
|
||||
DEFER: make-condition
|
||||
|
||||
|
@ -242,7 +248,7 @@ DEFER: make-condition
|
|||
2dup = [ 2nip ] [ <condition> ] if ;
|
||||
|
||||
: make-condition ( table questions -- condition )
|
||||
[ values ] [ unclip (make-condition) ] if-empty ;
|
||||
[ keys ] [ unclip (make-condition) ] if-empty ;
|
||||
|
||||
GENERIC: class>questions ( class -- questions )
|
||||
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||
|
@ -252,9 +258,10 @@ M: not-class class>questions class>> class>questions ;
|
|||
M: object class>questions 1array ;
|
||||
|
||||
: table>questions ( table -- questions )
|
||||
keys <and-class> class>questions t swap remove ;
|
||||
values <and-class> class>questions t swap remove ;
|
||||
|
||||
: table>condition ( table -- condition )
|
||||
! input table is state => class
|
||||
>alist dup table>questions make-condition ;
|
||||
|
||||
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
|
||||
|
|
|
@ -18,9 +18,13 @@ IN: regexp.compiler
|
|||
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
|
||||
] if ;
|
||||
|
||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
: new-non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
table>condition condition>quot ;
|
||||
|
||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
|
||||
[ 3drop ] suffix '[ _ cond ] ;
|
||||
|
||||
: expand-one-or ( or-class transition -- alist )
|
||||
[ seq>> ] dip '[ _ 2array ] map ;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ IN: regexp.dfa
|
|||
'[ _ _ t epsilon-loop ] each ;
|
||||
|
||||
: find-epsilon-closure ( states nfa -- dfa-state )
|
||||
epsilon-table [ swap ] assoc-map table>condition ;
|
||||
epsilon-table table>condition ;
|
||||
|
||||
: find-closure ( states transition nfa -- new-states )
|
||||
[ find-delta ] keep find-epsilon-closure ;
|
||||
|
@ -59,18 +59,13 @@ IN: regexp.dfa
|
|||
nfa dfa new-states visited-states new-transitions
|
||||
] if-empty ;
|
||||
|
||||
: states ( hashtable -- array )
|
||||
[ keys ]
|
||||
[ values [ values concat ] map concat ] bi
|
||||
append ;
|
||||
|
||||
: set-final-states ( nfa dfa -- )
|
||||
[
|
||||
[ final-states>> keys ]
|
||||
[ transitions>> states ] bi*
|
||||
[ transitions>> keys ] bi*
|
||||
[ intersects? ] with filter
|
||||
] [ final-states>> ] bi
|
||||
[ conjoin ] curry each ;
|
||||
unique
|
||||
] keep (>>final-states) ;
|
||||
|
||||
: initialize-dfa ( nfa -- dfa )
|
||||
<transition-table>
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test regexp.minimize assocs regexp
|
||||
accessors regexp.transition-tables ;
|
||||
accessors regexp.transition-tables regexp.parser regexp.negation ;
|
||||
IN: regexp.minimize.tests
|
||||
|
||||
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||
|
@ -13,13 +13,16 @@ IN: regexp.minimize.tests
|
|||
|
||||
[ { { 1 2 } { 3 4 } } ] [ H{ { "elephant" 1 } { "tiger" 3 } } H{ { "elephant" 2 } { "tiger" 4 } } assemble-values ] unit-test
|
||||
|
||||
[ 3 ] [ R/ ab|ac/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 3 ] [ R/ a(b|c)/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ ((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ a|((aa*)*)*/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 2 ] [ R/ ab|((aa*)*)*b/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 4 ] [ R/ ab|cd/ dfa>> transitions>> assoc-size ] unit-test
|
||||
[ 1 ] [ R/ [a-z]*|[A-Z]*/i dfa>> transitions>> assoc-size ] unit-test
|
||||
: regexp-states ( string -- n )
|
||||
parse-regexp ast>dfa transitions>> assoc-size ;
|
||||
|
||||
[ 3 ] [ "ab|ac" regexp-states ] unit-test
|
||||
[ 3 ] [ "a(b|c)" regexp-states ] unit-test
|
||||
[ 1 ] [ "((aa*)*)*" regexp-states ] unit-test
|
||||
[ 1 ] [ "a|((aa*)*)*" regexp-states ] unit-test
|
||||
[ 2 ] [ "ab|((aa*)*)*b" regexp-states ] unit-test
|
||||
[ 4 ] [ "ab|cd" regexp-states ] unit-test
|
||||
[ 1 ] [ "(?i:[a-z]*|[A-Z]*)" regexp-states ] unit-test
|
||||
|
||||
[
|
||||
T{ transition-table
|
||||
|
|
|
@ -48,7 +48,7 @@ C: <reverse-matcher> reverse-matcher
|
|||
] change-reverse-dfa ;
|
||||
|
||||
M: regexp match-index-from ( string regexp -- index/f )
|
||||
compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
|
||||
compile-regexp dfa>> <quot-matcher> match-index-from ;
|
||||
|
||||
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
||||
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
||||
|
@ -81,7 +81,7 @@ M: reverse-matcher match-index-from ( string regexp -- index/f )
|
|||
|
||||
: parsing-regexp ( accum end -- accum )
|
||||
lexer get [ take-until ] [ parse-noblank-token ] bi
|
||||
<optioned-regexp> compile-dfa-quot parsed ;
|
||||
<optioned-regexp> compile-regexp parsed ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
|
Loading…
Reference in New Issue