Negation almost complete in regexp
parent
478c1d2928
commit
f535b66aed
|
@ -1,12 +1,9 @@
|
||||||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel arrays accessors fry sequences ;
|
USING: kernel arrays accessors fry sequences regexp.classes ;
|
||||||
FROM: math.ranges => [a,b] ;
|
FROM: math.ranges => [a,b] ;
|
||||||
IN: regexp.ast
|
IN: regexp.ast
|
||||||
|
|
||||||
TUPLE: primitive-class class ;
|
|
||||||
C: <primitive-class> primitive-class
|
|
||||||
|
|
||||||
TUPLE: negation term ;
|
TUPLE: negation term ;
|
||||||
C: <negation> negation
|
C: <negation> negation
|
||||||
|
|
||||||
|
@ -56,4 +53,4 @@ M: from-to <times>
|
||||||
[ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map <alternation> ;
|
[ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map <alternation> ;
|
||||||
|
|
||||||
: char-class ( ranges ? -- term )
|
: char-class ( ranges ? -- term )
|
||||||
[ <alternation> ] dip [ <negation> ] when ;
|
[ <or-class> ] dip [ <not-class> ] when ;
|
||||||
|
|
|
@ -119,8 +119,14 @@ C: <or-class> or-class
|
||||||
TUPLE: not-class class ;
|
TUPLE: not-class class ;
|
||||||
C: <not-class> not-class
|
C: <not-class> not-class
|
||||||
|
|
||||||
|
TUPLE: primitive-class class ;
|
||||||
|
C: <primitive-class> primitive-class
|
||||||
|
|
||||||
M: or-class class-member?
|
M: or-class class-member?
|
||||||
seq>> [ class-member? ] with any? ;
|
seq>> [ class-member? ] with any? ;
|
||||||
|
|
||||||
M: not-class class-member?
|
M: not-class class-member?
|
||||||
class>> class-member? not ;
|
class>> class-member? not ;
|
||||||
|
|
||||||
|
M: primitive-class class-member?
|
||||||
|
class>> class-member? ;
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg.
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
|
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
|
||||||
assocs regexp.classes hashtables accessors ;
|
assocs regexp.classes hashtables accessors fry vectors
|
||||||
|
regexp.ast regexp.transition-tables ;
|
||||||
IN: regexp.negation
|
IN: regexp.negation
|
||||||
|
|
||||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||||
|
@ -32,5 +33,29 @@ CONSTANT: fail-state -1
|
||||||
[ add-fail-state ] change-transitions
|
[ add-fail-state ] change-transitions
|
||||||
dup inverse-final-states >>final-states ;
|
dup inverse-final-states >>final-states ;
|
||||||
|
|
||||||
! M: negation nfa-node ( node -- )
|
: renumber-transitions ( transitions numbering -- new-transitions )
|
||||||
! ast>dfa negate-table adjoin-dfa ;
|
dup '[
|
||||||
|
[ _ at ]
|
||||||
|
[ [ [ _ at ] map ] assoc-map ] bi*
|
||||||
|
] assoc-map ;
|
||||||
|
|
||||||
|
: renumber-states ( transition-table -- transition-table )
|
||||||
|
dup transitions>> keys [ next-state ] H{ } map>assoc
|
||||||
|
[ renumber-transitions ] rewrite-transitions ;
|
||||||
|
|
||||||
|
: box-transitions ( transition-table -- transition-table )
|
||||||
|
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
|
||||||
|
|
||||||
|
: unify-final-state ( transition-table -- transition-table )
|
||||||
|
dup [ final-states>> keys ] keep
|
||||||
|
'[ -1 eps <literal-transition> _ add-transition ] each
|
||||||
|
H{ { -1 -1 } } >>final-states ;
|
||||||
|
|
||||||
|
: adjoin-dfa ( transition-table -- start end )
|
||||||
|
box-transitions unify-final-state renumber-states
|
||||||
|
[ start-state>> ]
|
||||||
|
[ final-states>> keys first ]
|
||||||
|
[ table [ transitions>> ] bi@ swap update ] tri ;
|
||||||
|
|
||||||
|
M: negation nfa-node ( node -- start end )
|
||||||
|
term>> ast>dfa negate-table adjoin-dfa ;
|
||||||
|
|
|
@ -102,9 +102,7 @@ M: integer nfa-node ( node -- start end )
|
||||||
[ literal-transition add-simple-entry ] bi@
|
[ literal-transition add-simple-entry ] bi@
|
||||||
alternate-nodes [ nip ] dip
|
alternate-nodes [ nip ] dip
|
||||||
] if
|
] if
|
||||||
] [
|
] [ literal-transition add-simple-entry ] if ;
|
||||||
literal-transition add-simple-entry
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
M: primitive-class nfa-node ( node -- start end )
|
M: primitive-class nfa-node ( node -- start end )
|
||||||
class>> dup
|
class>> dup
|
||||||
|
@ -112,12 +110,15 @@ M: primitive-class nfa-node ( node -- start end )
|
||||||
[ drop Letter-class ] when
|
[ drop Letter-class ] when
|
||||||
class-transition add-simple-entry ;
|
class-transition add-simple-entry ;
|
||||||
|
|
||||||
|
M: or-class nfa-node class-transition add-simple-entry ;
|
||||||
|
M: not-class nfa-node class-transition add-simple-entry ;
|
||||||
|
|
||||||
M: any-char nfa-node ( node -- start end )
|
M: any-char nfa-node ( node -- start end )
|
||||||
[ dotall option? ] dip any-char-no-nl ?
|
[ dotall option? ] dip any-char-no-nl ?
|
||||||
class-transition add-simple-entry ;
|
class-transition add-simple-entry ;
|
||||||
|
|
||||||
M: negation nfa-node ( node -- start end )
|
! M: negation nfa-node ( node -- start end )
|
||||||
negate term>> nfa-node negate ;
|
! negate term>> nfa-node negate ;
|
||||||
|
|
||||||
M: range nfa-node ( node -- start end )
|
M: range nfa-node ( node -- start end )
|
||||||
case-insensitive option? [
|
case-insensitive option? [
|
||||||
|
|
|
@ -47,11 +47,11 @@ ERROR: bad-class name ;
|
||||||
{ CHAR: \\ [ CHAR: \\ ] }
|
{ CHAR: \\ [ CHAR: \\ ] }
|
||||||
|
|
||||||
{ CHAR: w [ c-identifier-class <primitive-class> ] }
|
{ CHAR: w [ c-identifier-class <primitive-class> ] }
|
||||||
{ CHAR: W [ c-identifier-class <primitive-class> <negation> ] }
|
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
|
||||||
{ CHAR: s [ java-blank-class <primitive-class> ] }
|
{ CHAR: s [ java-blank-class <primitive-class> ] }
|
||||||
{ CHAR: S [ java-blank-class <primitive-class> <negation> ] }
|
{ CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
|
||||||
{ CHAR: d [ digit-class <primitive-class> ] }
|
{ CHAR: d [ digit-class <primitive-class> ] }
|
||||||
{ CHAR: D [ digit-class <primitive-class> <negation> ] }
|
{ CHAR: D [ digit-class <primitive-class> <not-class> ] }
|
||||||
|
|
||||||
[ ]
|
[ ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
Loading…
Reference in New Issue