From ba1ac44176858138cd81fe5d96b6e6dcac3a522e Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Mon, 23 Feb 2009 13:10:38 -0600 Subject: [PATCH] Disambiguation works completely in regexp --- basis/regexp/classes/classes-tests.factor | 25 +++++++++++++++++++ basis/regexp/classes/classes.factor | 20 ++++++++++----- basis/regexp/disambiguate/disambiguate.factor | 7 +++--- basis/regexp/negation/negation-tests.factor | 6 ++--- basis/regexp/negation/negation.factor | 8 +++--- basis/regexp/nfa/nfa.factor | 2 +- 6 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 basis/regexp/classes/classes-tests.factor diff --git a/basis/regexp/classes/classes-tests.factor b/basis/regexp/classes/classes-tests.factor new file mode 100644 index 0000000000..4cbb2e7a57 --- /dev/null +++ b/basis/regexp/classes/classes-tests.factor @@ -0,0 +1,25 @@ +! Copyright (C) 2009 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: regexp.classes tools.test arrays kernel ; +IN: regexp.classes.tests + +[ f ] [ { 1 2 } ] unit-test +[ T{ or-class f { 2 1 } } ] [ { 1 2 } ] unit-test +[ 3 ] [ { 1 2 } 3 2array ] unit-test +[ CHAR: A ] [ CHAR: A LETTER-class 2array ] unit-test +[ CHAR: A ] [ LETTER-class CHAR: A 2array ] unit-test +[ T{ primitive-class { class LETTER-class } } ] [ CHAR: A LETTER-class 2array ] unit-test +[ T{ primitive-class { class LETTER-class } } ] [ LETTER-class CHAR: A 2array ] unit-test +[ t ] [ { t 1 } ] unit-test +[ t ] [ { 1 t } ] unit-test +[ f ] [ { f 1 } ] unit-test +[ f ] [ { 1 f } ] unit-test +[ 1 ] [ { f 1 } ] unit-test +[ 1 ] [ { 1 f } ] unit-test +[ 1 ] [ { t 1 } ] unit-test +[ 1 ] [ { 1 t } ] unit-test +[ 1 ] [ 1 ] unit-test +[ 1 ] [ { 1 1 } ] unit-test +[ 1 ] [ { 1 1 } ] unit-test +[ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test +[ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 8d235daf07..6e68e9e0f6 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -20,8 +20,7 @@ C: range GENERIC: class-member? ( obj class -- ? ) -! When does t get put in? -M: t class-member? ( obj class -- ? ) 2drop f ; +M: t class-member? ( obj class -- ? ) 2drop t ; M: integer class-member? ( obj class -- ? ) = ; @@ -120,7 +119,10 @@ TUPLE: and-class seq ; m:GENERIC: combine-and ( class1 class2 -- combined ? ) -m:METHOD: combine-and { object object } 2drop f f ; +: replace-if-= ( object object -- object ? ) + over = ; + +m:METHOD: combine-and { object object } replace-if-= ; m:METHOD: combine-and { integer integer } 2dup = [ drop t ] [ 2drop f t ] if ; @@ -131,12 +133,15 @@ m:METHOD: combine-and { t object } m:METHOD: combine-and { f object } drop t ; +m:METHOD: combine-and { not-class object } + [ class>> ] dip = [ f t ] [ f f ] if ; + m:METHOD: combine-and { integer object } 2dup class-member? [ drop t ] [ 2drop f t ] if ; m:GENERIC: combine-or ( class1 class2 -- combined ? ) -m:METHOD: combine-or { object object } 2drop f f ; +m:METHOD: combine-or { object object } replace-if-= ; m:METHOD: combine-or { integer integer } 2dup = [ drop t ] [ 2drop f f ] if ; @@ -147,6 +152,9 @@ m:METHOD: combine-or { t object } m:METHOD: combine-or { f object } nip t ; +m:METHOD: combine-or { not-class object } + [ class>> ] dip = [ t t ] [ f f ] if ; + m:METHOD: combine-or { integer object } 2dup class-member? [ nip t ] [ 2drop f f ] if ; @@ -174,7 +182,7 @@ M: and-class class-member? seq>> [ class-member? ] with all? ; : ( seq -- class ) - [ combine-or ] t or-class combine ; + [ combine-or ] f or-class combine ; M: or-class class-member? seq>> [ class-member? ] with any? ; @@ -183,7 +191,7 @@ M: or-class class-member? { { t [ f ] } { f [ t ] } - [ not-class boa ] + [ dup not-class? [ class>> ] [ not-class boa ] if ] } case ; M: not-class class-member? diff --git a/basis/regexp/disambiguate/disambiguate.factor b/basis/regexp/disambiguate/disambiguate.factor index 1243ab7cc1..0b63351e0c 100644 --- a/basis/regexp/disambiguate/disambiguate.factor +++ b/basis/regexp/disambiguate/disambiguate.factor @@ -12,11 +12,12 @@ TUPLE: parts in out ; : powerset-partition ( classes -- partitions ) [ length [ 2^ ] keep ] keep '[ _ _ make-partition - ] map ; + ] map rest ; : partition>class ( parts -- class ) - [ in>> ] [ out>> ] bi - [ ] bi@ 2array ; + [ out>> [ ] map ] + [ in>> ] bi + prefix ; : get-transitions ( partition state-transitions -- next-states ) [ in>> ] dip '[ _ at ] map prune ; diff --git a/basis/regexp/negation/negation-tests.factor b/basis/regexp/negation/negation-tests.factor index 2dbca2e8d8..41dfe7f493 100644 --- a/basis/regexp/negation/negation-tests.factor +++ b/basis/regexp/negation/negation-tests.factor @@ -7,9 +7,9 @@ IN: regexp.negation.tests ! R/ |[^a]|.+/ T{ transition-table { transitions H{ - { 0 H{ { CHAR: a 1 } { T{ not-class f T{ or-class f { CHAR: a } } } -1 } } } - { 1 H{ { T{ not-class f T{ or-class f { } } } -1 } } } - { -1 H{ { any-char -1 } } } + { 0 H{ { CHAR: a 1 } { T{ not-class f CHAR: a } -1 } } } + { 1 H{ { t -1 } } } + { -1 H{ { t -1 } } } } } { start-state 0 } { final-states H{ { 0 0 } { -1 -1 } } } diff --git a/basis/regexp/negation/negation.factor b/basis/regexp/negation/negation.factor index f235dc1bf5..f5a43a2a5e 100644 --- a/basis/regexp/negation/negation.factor +++ b/basis/regexp/negation/negation.factor @@ -12,11 +12,11 @@ CONSTANT: fail-state -1 : add-default-transition ( state's-transitions -- new-state's-transitions ) clone dup - [ [ fail-state ] dip keys ] keep set-at ; + [ [ fail-state ] dip keys [ ] map ] keep set-at ; : fail-state-recurses ( transitions -- new-transitions ) clone dup - [ fail-state any-char associate fail-state ] dip set-at ; + [ fail-state t associate fail-state ] dip set-at ; : add-fail-state ( transitions -- new-transitions ) [ add-default-transition ] assoc-map @@ -48,8 +48,8 @@ CONSTANT: fail-state -1 : unify-final-state ( transition-table -- transition-table ) dup [ final-states>> keys ] keep - '[ -1 eps _ add-transition ] each - H{ { -1 -1 } } >>final-states ; + '[ -2 eps _ add-transition ] each + H{ { -2 -2 } } >>final-states ; : adjoin-dfa ( transition-table -- start end ) box-transitions unify-final-state renumber-states diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index eff023c278..72ce880f8b 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -120,7 +120,7 @@ M: not-class modify-class class>> modify-class ; M: any-char modify-class - [ dotall option? ] dip any-char-no-nl ? ; + drop dotall option? t any-char-no-nl ? ; : modify-letter-class ( class -- newclass ) case-insensitive option? [ drop Letter-class ] when ;