Disambiguation works completely in regexp
parent
88f9b3ea92
commit
ba1ac44176
|
@ -0,0 +1,25 @@
|
||||||
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: regexp.classes tools.test arrays kernel ;
|
||||||
|
IN: regexp.classes.tests
|
||||||
|
|
||||||
|
[ f ] [ { 1 2 } <and-class> ] unit-test
|
||||||
|
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
|
||||||
|
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
|
||||||
|
[ CHAR: A ] [ CHAR: A LETTER-class <primitive-class> 2array <and-class> ] unit-test
|
||||||
|
[ CHAR: A ] [ LETTER-class <primitive-class> CHAR: A 2array <and-class> ] unit-test
|
||||||
|
[ T{ primitive-class { class LETTER-class } } ] [ CHAR: A LETTER-class <primitive-class> 2array <or-class> ] unit-test
|
||||||
|
[ T{ primitive-class { class LETTER-class } } ] [ LETTER-class <primitive-class> CHAR: A 2array <or-class> ] unit-test
|
||||||
|
[ t ] [ { t 1 } <or-class> ] unit-test
|
||||||
|
[ t ] [ { 1 t } <or-class> ] unit-test
|
||||||
|
[ f ] [ { f 1 } <and-class> ] unit-test
|
||||||
|
[ f ] [ { 1 f } <and-class> ] unit-test
|
||||||
|
[ 1 ] [ { f 1 } <or-class> ] unit-test
|
||||||
|
[ 1 ] [ { 1 f } <or-class> ] unit-test
|
||||||
|
[ 1 ] [ { t 1 } <and-class> ] unit-test
|
||||||
|
[ 1 ] [ { 1 t } <and-class> ] unit-test
|
||||||
|
[ 1 ] [ 1 <not-class> <not-class> ] unit-test
|
||||||
|
[ 1 ] [ { 1 1 } <and-class> ] unit-test
|
||||||
|
[ 1 ] [ { 1 1 } <or-class> ] unit-test
|
||||||
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||||
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
|
@ -20,8 +20,7 @@ C: <range> range
|
||||||
|
|
||||||
GENERIC: class-member? ( obj class -- ? )
|
GENERIC: class-member? ( obj class -- ? )
|
||||||
|
|
||||||
! When does t get put in?
|
M: t class-member? ( obj class -- ? ) 2drop t ;
|
||||||
M: t class-member? ( obj class -- ? ) 2drop f ;
|
|
||||||
|
|
||||||
M: integer class-member? ( obj class -- ? ) = ;
|
M: integer class-member? ( obj class -- ? ) = ;
|
||||||
|
|
||||||
|
@ -120,7 +119,10 @@ TUPLE: and-class seq ;
|
||||||
|
|
||||||
m:GENERIC: combine-and ( class1 class2 -- combined ? )
|
m:GENERIC: combine-and ( class1 class2 -- combined ? )
|
||||||
|
|
||||||
m:METHOD: combine-and { object object } 2drop f f ;
|
: replace-if-= ( object object -- object ? )
|
||||||
|
over = ;
|
||||||
|
|
||||||
|
m:METHOD: combine-and { object object } replace-if-= ;
|
||||||
|
|
||||||
m:METHOD: combine-and { integer integer }
|
m:METHOD: combine-and { integer integer }
|
||||||
2dup = [ drop t ] [ 2drop f t ] if ;
|
2dup = [ drop t ] [ 2drop f t ] if ;
|
||||||
|
@ -131,12 +133,15 @@ m:METHOD: combine-and { t object }
|
||||||
m:METHOD: combine-and { f object }
|
m:METHOD: combine-and { f object }
|
||||||
drop t ;
|
drop t ;
|
||||||
|
|
||||||
|
m:METHOD: combine-and { not-class object }
|
||||||
|
[ class>> ] dip = [ f t ] [ f f ] if ;
|
||||||
|
|
||||||
m:METHOD: combine-and { integer object }
|
m:METHOD: combine-and { integer object }
|
||||||
2dup class-member? [ drop t ] [ 2drop f t ] if ;
|
2dup class-member? [ drop t ] [ 2drop f t ] if ;
|
||||||
|
|
||||||
m:GENERIC: combine-or ( class1 class2 -- combined ? )
|
m:GENERIC: combine-or ( class1 class2 -- combined ? )
|
||||||
|
|
||||||
m:METHOD: combine-or { object object } 2drop f f ;
|
m:METHOD: combine-or { object object } replace-if-= ;
|
||||||
|
|
||||||
m:METHOD: combine-or { integer integer }
|
m:METHOD: combine-or { integer integer }
|
||||||
2dup = [ drop t ] [ 2drop f f ] if ;
|
2dup = [ drop t ] [ 2drop f f ] if ;
|
||||||
|
@ -147,6 +152,9 @@ m:METHOD: combine-or { t object }
|
||||||
m:METHOD: combine-or { f object }
|
m:METHOD: combine-or { f object }
|
||||||
nip t ;
|
nip t ;
|
||||||
|
|
||||||
|
m:METHOD: combine-or { not-class object }
|
||||||
|
[ class>> ] dip = [ t t ] [ f f ] if ;
|
||||||
|
|
||||||
m:METHOD: combine-or { integer object }
|
m:METHOD: combine-or { integer object }
|
||||||
2dup class-member? [ nip t ] [ 2drop f f ] if ;
|
2dup class-member? [ nip t ] [ 2drop f f ] if ;
|
||||||
|
|
||||||
|
@ -174,7 +182,7 @@ M: and-class class-member?
|
||||||
seq>> [ class-member? ] with all? ;
|
seq>> [ class-member? ] with all? ;
|
||||||
|
|
||||||
: <or-class> ( seq -- class )
|
: <or-class> ( seq -- class )
|
||||||
[ combine-or ] t or-class combine ;
|
[ combine-or ] f or-class combine ;
|
||||||
|
|
||||||
M: or-class class-member?
|
M: or-class class-member?
|
||||||
seq>> [ class-member? ] with any? ;
|
seq>> [ class-member? ] with any? ;
|
||||||
|
@ -183,7 +191,7 @@ M: or-class class-member?
|
||||||
{
|
{
|
||||||
{ t [ f ] }
|
{ t [ f ] }
|
||||||
{ f [ t ] }
|
{ f [ t ] }
|
||||||
[ not-class boa ]
|
[ dup not-class? [ class>> ] [ not-class boa ] if ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
M: not-class class-member?
|
M: not-class class-member?
|
||||||
|
|
|
@ -12,11 +12,12 @@ TUPLE: parts in out ;
|
||||||
: powerset-partition ( classes -- partitions )
|
: powerset-partition ( classes -- partitions )
|
||||||
[ length [ 2^ ] keep ] keep '[
|
[ length [ 2^ ] keep ] keep '[
|
||||||
_ <bits> _ make-partition
|
_ <bits> _ make-partition
|
||||||
] map ;
|
] map rest ;
|
||||||
|
|
||||||
: partition>class ( parts -- class )
|
: partition>class ( parts -- class )
|
||||||
[ in>> ] [ out>> ] bi
|
[ out>> [ <not-class> ] map ]
|
||||||
[ <or-class> ] bi@ <not-class> 2array <and-class> ;
|
[ in>> <and-class> ] bi
|
||||||
|
prefix <and-class> ;
|
||||||
|
|
||||||
: get-transitions ( partition state-transitions -- next-states )
|
: get-transitions ( partition state-transitions -- next-states )
|
||||||
[ in>> ] dip '[ _ at ] map prune ;
|
[ in>> ] dip '[ _ at ] map prune ;
|
||||||
|
|
|
@ -7,9 +7,9 @@ IN: regexp.negation.tests
|
||||||
! R/ |[^a]|.+/
|
! R/ |[^a]|.+/
|
||||||
T{ transition-table
|
T{ transition-table
|
||||||
{ transitions H{
|
{ transitions H{
|
||||||
{ 0 H{ { CHAR: a 1 } { T{ not-class f T{ or-class f { CHAR: a } } } -1 } } }
|
{ 0 H{ { CHAR: a 1 } { T{ not-class f CHAR: a } -1 } } }
|
||||||
{ 1 H{ { T{ not-class f T{ or-class f { } } } -1 } } }
|
{ 1 H{ { t -1 } } }
|
||||||
{ -1 H{ { any-char -1 } } }
|
{ -1 H{ { t -1 } } }
|
||||||
} }
|
} }
|
||||||
{ start-state 0 }
|
{ start-state 0 }
|
||||||
{ final-states H{ { 0 0 } { -1 -1 } } }
|
{ final-states H{ { 0 0 } { -1 -1 } } }
|
||||||
|
|
|
@ -12,11 +12,11 @@ CONSTANT: fail-state -1
|
||||||
|
|
||||||
: add-default-transition ( state's-transitions -- new-state's-transitions )
|
: add-default-transition ( state's-transitions -- new-state's-transitions )
|
||||||
clone dup
|
clone dup
|
||||||
[ [ fail-state ] dip keys <or-class> <not-class> ] keep set-at ;
|
[ [ fail-state ] dip keys [ <not-class> ] map <and-class> ] keep set-at ;
|
||||||
|
|
||||||
: fail-state-recurses ( transitions -- new-transitions )
|
: fail-state-recurses ( transitions -- new-transitions )
|
||||||
clone dup
|
clone dup
|
||||||
[ fail-state any-char associate fail-state ] dip set-at ;
|
[ fail-state t associate fail-state ] dip set-at ;
|
||||||
|
|
||||||
: add-fail-state ( transitions -- new-transitions )
|
: add-fail-state ( transitions -- new-transitions )
|
||||||
[ add-default-transition ] assoc-map
|
[ add-default-transition ] assoc-map
|
||||||
|
@ -48,8 +48,8 @@ CONSTANT: fail-state -1
|
||||||
|
|
||||||
: unify-final-state ( transition-table -- transition-table )
|
: unify-final-state ( transition-table -- transition-table )
|
||||||
dup [ final-states>> keys ] keep
|
dup [ final-states>> keys ] keep
|
||||||
'[ -1 eps <literal-transition> _ add-transition ] each
|
'[ -2 eps <literal-transition> _ add-transition ] each
|
||||||
H{ { -1 -1 } } >>final-states ;
|
H{ { -2 -2 } } >>final-states ;
|
||||||
|
|
||||||
: adjoin-dfa ( transition-table -- start end )
|
: adjoin-dfa ( transition-table -- start end )
|
||||||
box-transitions unify-final-state renumber-states
|
box-transitions unify-final-state renumber-states
|
||||||
|
|
|
@ -120,7 +120,7 @@ M: not-class modify-class
|
||||||
class>> modify-class <not-class> ;
|
class>> modify-class <not-class> ;
|
||||||
|
|
||||||
M: any-char modify-class
|
M: any-char modify-class
|
||||||
[ dotall option? ] dip any-char-no-nl ? ;
|
drop dotall option? t any-char-no-nl ? ;
|
||||||
|
|
||||||
: modify-letter-class ( class -- newclass )
|
: modify-letter-class ( class -- newclass )
|
||||||
case-insensitive option? [ drop Letter-class ] when ;
|
case-insensitive option? [ drop Letter-class ] when ;
|
||||||
|
|
Loading…
Reference in New Issue