Negation almost complete in regexp

db4
Daniel Ehrenberg 2009-02-19 18:28:54 -06:00
parent 478c1d2928
commit f535b66aed
5 changed files with 45 additions and 16 deletions

View File

@ -1,12 +1,9 @@
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays accessors fry sequences ;
USING: kernel arrays accessors fry sequences regexp.classes ;
FROM: math.ranges => [a,b] ;
IN: regexp.ast
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
TUPLE: negation term ;
C: <negation> negation
@ -56,4 +53,4 @@ M: from-to <times>
[ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map <alternation> ;
: char-class ( ranges ? -- term )
[ <alternation> ] dip [ <negation> ] when ;
[ <or-class> ] dip [ <not-class> ] when ;

View File

@ -119,8 +119,14 @@ C: <or-class> or-class
TUPLE: not-class class ;
C: <not-class> not-class
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
M: or-class class-member?
seq>> [ class-member? ] with any? ;
M: not-class class-member?
class>> class-member? not ;
M: primitive-class class-member?
class>> class-member? ;

View File

@ -1,7 +1,8 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
assocs regexp.classes hashtables accessors ;
assocs regexp.classes hashtables accessors fry vectors
regexp.ast regexp.transition-tables ;
IN: regexp.negation
: ast>dfa ( parse-tree -- minimal-dfa )
@ -32,5 +33,29 @@ CONSTANT: fail-state -1
[ add-fail-state ] change-transitions
dup inverse-final-states >>final-states ;
! M: negation nfa-node ( node -- )
! ast>dfa negate-table adjoin-dfa ;
: renumber-transitions ( transitions numbering -- new-transitions )
dup '[
[ _ at ]
[ [ [ _ at ] map ] assoc-map ] bi*
] assoc-map ;
: renumber-states ( transition-table -- transition-table )
dup transitions>> keys [ next-state ] H{ } map>assoc
[ renumber-transitions ] rewrite-transitions ;
: box-transitions ( transition-table -- transition-table )
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
: unify-final-state ( transition-table -- transition-table )
dup [ final-states>> keys ] keep
'[ -1 eps <literal-transition> _ add-transition ] each
H{ { -1 -1 } } >>final-states ;
: adjoin-dfa ( transition-table -- start end )
box-transitions unify-final-state renumber-states
[ start-state>> ]
[ final-states>> keys first ]
[ table [ transitions>> ] bi@ swap update ] tri ;
M: negation nfa-node ( node -- start end )
term>> ast>dfa negate-table adjoin-dfa ;

View File

@ -102,9 +102,7 @@ M: integer nfa-node ( node -- start end )
[ literal-transition add-simple-entry ] bi@
alternate-nodes [ nip ] dip
] if
] [
literal-transition add-simple-entry
] if ;
] [ literal-transition add-simple-entry ] if ;
M: primitive-class nfa-node ( node -- start end )
class>> dup
@ -112,12 +110,15 @@ M: primitive-class nfa-node ( node -- start end )
[ drop Letter-class ] when
class-transition add-simple-entry ;
M: or-class nfa-node class-transition add-simple-entry ;
M: not-class nfa-node class-transition add-simple-entry ;
M: any-char nfa-node ( node -- start end )
[ dotall option? ] dip any-char-no-nl ?
class-transition add-simple-entry ;
M: negation nfa-node ( node -- start end )
negate term>> nfa-node negate ;
! M: negation nfa-node ( node -- start end )
! negate term>> nfa-node negate ;
M: range nfa-node ( node -- start end )
case-insensitive option? [

View File

@ -47,11 +47,11 @@ ERROR: bad-class name ;
{ CHAR: \\ [ CHAR: \\ ] }
{ CHAR: w [ c-identifier-class <primitive-class> ] }
{ CHAR: W [ c-identifier-class <primitive-class> <negation> ] }
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
{ CHAR: s [ java-blank-class <primitive-class> ] }
{ CHAR: S [ java-blank-class <primitive-class> <negation> ] }
{ CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
{ CHAR: d [ digit-class <primitive-class> ] }
{ CHAR: D [ digit-class <primitive-class> <negation> ] }
{ CHAR: D [ digit-class <primitive-class> <not-class> ] }
[ ]
} case ;