From f535b66aedc9d79fa0da69a36017356e16d6dc15 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 19 Feb 2009 18:28:54 -0600 Subject: [PATCH] Negation almost complete in regexp --- basis/regexp/ast/ast.factor | 7 ++---- basis/regexp/classes/classes.factor | 6 ++++++ basis/regexp/negation/negation.factor | 31 ++++++++++++++++++++++++--- basis/regexp/nfa/nfa.factor | 11 +++++----- basis/regexp/parser/parser.factor | 6 +++--- 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/basis/regexp/ast/ast.factor b/basis/regexp/ast/ast.factor index ad67d76d12..e1308f0855 100644 --- a/basis/regexp/ast/ast.factor +++ b/basis/regexp/ast/ast.factor @@ -1,12 +1,9 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel arrays accessors fry sequences ; +USING: kernel arrays accessors fry sequences regexp.classes ; FROM: math.ranges => [a,b] ; IN: regexp.ast -TUPLE: primitive-class class ; -C: primitive-class - TUPLE: negation term ; C: negation @@ -56,4 +53,4 @@ M: from-to [ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map ; : char-class ( ranges ? -- term ) - [ ] dip [ ] when ; + [ ] dip [ ] when ; diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 44f33f9fcf..aaa650726c 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -119,8 +119,14 @@ C: or-class TUPLE: not-class class ; C: not-class +TUPLE: primitive-class class ; +C: primitive-class + M: or-class class-member? seq>> [ class-member? ] with any? ; M: not-class class-member? class>> class-member? not ; + +M: primitive-class class-member? + class>> class-member? ; diff --git a/basis/regexp/negation/negation.factor b/basis/regexp/negation/negation.factor index 5a9f772581..6b0e6b519e 100644 --- a/basis/regexp/negation/negation.factor +++ b/basis/regexp/negation/negation.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences -assocs regexp.classes hashtables accessors ; +assocs regexp.classes hashtables accessors fry vectors +regexp.ast regexp.transition-tables ; IN: regexp.negation : ast>dfa ( parse-tree -- minimal-dfa ) @@ -32,5 +33,29 @@ CONSTANT: fail-state -1 [ add-fail-state ] change-transitions dup inverse-final-states >>final-states ; -! M: negation nfa-node ( node -- ) -! ast>dfa negate-table adjoin-dfa ; +: renumber-transitions ( transitions numbering -- new-transitions ) + dup '[ + [ _ at ] + [ [ [ _ at ] map ] assoc-map ] bi* + ] assoc-map ; + +: renumber-states ( transition-table -- transition-table ) + dup transitions>> keys [ next-state ] H{ } map>assoc + [ renumber-transitions ] rewrite-transitions ; + +: box-transitions ( transition-table -- transition-table ) + [ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ; + +: unify-final-state ( transition-table -- transition-table ) + dup [ final-states>> keys ] keep + '[ -1 eps _ add-transition ] each + H{ { -1 -1 } } >>final-states ; + +: adjoin-dfa ( transition-table -- start end ) + box-transitions unify-final-state renumber-states + [ start-state>> ] + [ final-states>> keys first ] + [ table [ transitions>> ] bi@ swap update ] tri ; + +M: negation nfa-node ( node -- start end ) + term>> ast>dfa negate-table adjoin-dfa ; diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index c759ffdf98..6775124e60 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -102,9 +102,7 @@ M: integer nfa-node ( node -- start end ) [ literal-transition add-simple-entry ] bi@ alternate-nodes [ nip ] dip ] if - ] [ - literal-transition add-simple-entry - ] if ; + ] [ literal-transition add-simple-entry ] if ; M: primitive-class nfa-node ( node -- start end ) class>> dup @@ -112,12 +110,15 @@ M: primitive-class nfa-node ( node -- start end ) [ drop Letter-class ] when class-transition add-simple-entry ; +M: or-class nfa-node class-transition add-simple-entry ; +M: not-class nfa-node class-transition add-simple-entry ; + M: any-char nfa-node ( node -- start end ) [ dotall option? ] dip any-char-no-nl ? class-transition add-simple-entry ; -M: negation nfa-node ( node -- start end ) - negate term>> nfa-node negate ; +! M: negation nfa-node ( node -- start end ) +! negate term>> nfa-node negate ; M: range nfa-node ( node -- start end ) case-insensitive option? [ diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 6b2f28dbf6..3a7ba12552 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -47,11 +47,11 @@ ERROR: bad-class name ; { CHAR: \\ [ CHAR: \\ ] } { CHAR: w [ c-identifier-class ] } - { CHAR: W [ c-identifier-class ] } + { CHAR: W [ c-identifier-class ] } { CHAR: s [ java-blank-class ] } - { CHAR: S [ java-blank-class ] } + { CHAR: S [ java-blank-class ] } { CHAR: d [ digit-class ] } - { CHAR: D [ digit-class ] } + { CHAR: D [ digit-class ] } [ ] } case ;