Negation almost complete in regexp
parent
478c1d2928
commit
f535b66aed
|
@ -1,12 +1,9 @@
|
|||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel arrays accessors fry sequences ;
|
||||
USING: kernel arrays accessors fry sequences regexp.classes ;
|
||||
FROM: math.ranges => [a,b] ;
|
||||
IN: regexp.ast
|
||||
|
||||
TUPLE: primitive-class class ;
|
||||
C: <primitive-class> primitive-class
|
||||
|
||||
TUPLE: negation term ;
|
||||
C: <negation> negation
|
||||
|
||||
|
@ -56,4 +53,4 @@ M: from-to <times>
|
|||
[ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map <alternation> ;
|
||||
|
||||
: char-class ( ranges ? -- term )
|
||||
[ <alternation> ] dip [ <negation> ] when ;
|
||||
[ <or-class> ] dip [ <not-class> ] when ;
|
||||
|
|
|
@ -119,8 +119,14 @@ C: <or-class> or-class
|
|||
TUPLE: not-class class ;
|
||||
C: <not-class> not-class
|
||||
|
||||
TUPLE: primitive-class class ;
|
||||
C: <primitive-class> primitive-class
|
||||
|
||||
M: or-class class-member?
|
||||
seq>> [ class-member? ] with any? ;
|
||||
|
||||
M: not-class class-member?
|
||||
class>> class-member? not ;
|
||||
|
||||
M: primitive-class class-member?
|
||||
class>> class-member? ;
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
|
||||
assocs regexp.classes hashtables accessors ;
|
||||
assocs regexp.classes hashtables accessors fry vectors
|
||||
regexp.ast regexp.transition-tables ;
|
||||
IN: regexp.negation
|
||||
|
||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||
|
@ -32,5 +33,29 @@ CONSTANT: fail-state -1
|
|||
[ add-fail-state ] change-transitions
|
||||
dup inverse-final-states >>final-states ;
|
||||
|
||||
! M: negation nfa-node ( node -- )
|
||||
! ast>dfa negate-table adjoin-dfa ;
|
||||
: renumber-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ [ _ at ] map ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: renumber-states ( transition-table -- transition-table )
|
||||
dup transitions>> keys [ next-state ] H{ } map>assoc
|
||||
[ renumber-transitions ] rewrite-transitions ;
|
||||
|
||||
: box-transitions ( transition-table -- transition-table )
|
||||
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
|
||||
|
||||
: unify-final-state ( transition-table -- transition-table )
|
||||
dup [ final-states>> keys ] keep
|
||||
'[ -1 eps <literal-transition> _ add-transition ] each
|
||||
H{ { -1 -1 } } >>final-states ;
|
||||
|
||||
: adjoin-dfa ( transition-table -- start end )
|
||||
box-transitions unify-final-state renumber-states
|
||||
[ start-state>> ]
|
||||
[ final-states>> keys first ]
|
||||
[ table [ transitions>> ] bi@ swap update ] tri ;
|
||||
|
||||
M: negation nfa-node ( node -- start end )
|
||||
term>> ast>dfa negate-table adjoin-dfa ;
|
||||
|
|
|
@ -102,9 +102,7 @@ M: integer nfa-node ( node -- start end )
|
|||
[ literal-transition add-simple-entry ] bi@
|
||||
alternate-nodes [ nip ] dip
|
||||
] if
|
||||
] [
|
||||
literal-transition add-simple-entry
|
||||
] if ;
|
||||
] [ literal-transition add-simple-entry ] if ;
|
||||
|
||||
M: primitive-class nfa-node ( node -- start end )
|
||||
class>> dup
|
||||
|
@ -112,12 +110,15 @@ M: primitive-class nfa-node ( node -- start end )
|
|||
[ drop Letter-class ] when
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: or-class nfa-node class-transition add-simple-entry ;
|
||||
M: not-class nfa-node class-transition add-simple-entry ;
|
||||
|
||||
M: any-char nfa-node ( node -- start end )
|
||||
[ dotall option? ] dip any-char-no-nl ?
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: negation nfa-node ( node -- start end )
|
||||
negate term>> nfa-node negate ;
|
||||
! M: negation nfa-node ( node -- start end )
|
||||
! negate term>> nfa-node negate ;
|
||||
|
||||
M: range nfa-node ( node -- start end )
|
||||
case-insensitive option? [
|
||||
|
|
|
@ -47,11 +47,11 @@ ERROR: bad-class name ;
|
|||
{ CHAR: \\ [ CHAR: \\ ] }
|
||||
|
||||
{ CHAR: w [ c-identifier-class <primitive-class> ] }
|
||||
{ CHAR: W [ c-identifier-class <primitive-class> <negation> ] }
|
||||
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
|
||||
{ CHAR: s [ java-blank-class <primitive-class> ] }
|
||||
{ CHAR: S [ java-blank-class <primitive-class> <negation> ] }
|
||||
{ CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
|
||||
{ CHAR: d [ digit-class <primitive-class> ] }
|
||||
{ CHAR: D [ digit-class <primitive-class> <negation> ] }
|
||||
{ CHAR: D [ digit-class <primitive-class> <not-class> ] }
|
||||
|
||||
[ ]
|
||||
} case ;
|
||||
|
|
Loading…
Reference in New Issue