From 271943085dcb80691735d58f9c0466e6995dfe54 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 21 Aug 2008 19:16:56 -0500 Subject: [PATCH] add reversed regexp --- extra/regexp2/parser/parser.factor | 19 +++++++++---------- extra/regexp2/regexp2-tests.factor | 7 ++++++- extra/regexp2/regexp2.factor | 5 +++++ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/extra/regexp2/parser/parser.factor b/extra/regexp2/parser/parser.factor index fef38cc887..6eda3310d0 100644 --- a/extra/regexp2/parser/parser.factor +++ b/extra/regexp2/parser/parser.factor @@ -30,7 +30,7 @@ SINGLETON: back-anchor INSTANCE: back-anchor node TUPLE: option-on option ; INSTANCE: option-on node TUPLE: option-off option ; INSTANCE: option-off node -SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ; +SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ; SINGLETONS: letter-class LETTER-class Letter-class digit-class alpha-class non-newline-blank-class @@ -49,9 +49,12 @@ left-parenthesis pipe caret dash ; : get-comments ( -- ? ) comments get-option ; : get-case-insensitive ( -- ? ) case-insensitive get-option ; : get-unicode-case ( -- ? ) unicode-case get-option ; +: get-reversed-regexp ( -- ? ) reversed-regexp get-option ; : ( obj -- negation ) negation boa ; -: ( seq -- concatenation ) >vector concatenation boa ; +: ( seq -- concatenation ) + >vector get-reversed-regexp [ reverse ] when + concatenation boa ; : ( seq -- alternation ) >vector alternation boa ; : ( obj -- capture-group ) capture-group boa ; : ( obj -- kleene-star ) kleene-star boa ; @@ -76,7 +79,7 @@ left-parenthesis pipe caret dash ; 2array [ [ from>> ] [ to>> ] bi < ] filter [ unmatchable-class ] [ first|alternation ] if-empty ] [ - dup [ from>> ] [ to>> ] bi < + 2dup < [ character-class-range boa ] [ 2drop unmatchable-class ] if ] if ; @@ -109,6 +112,7 @@ ERROR: bad-option ch ; { CHAR: i [ case-insensitive ] } { CHAR: d [ unix-lines ] } { CHAR: m [ multiline ] } + { CHAR: r [ reversed-regexp ] } { CHAR: s [ dotall ] } { CHAR: u [ unicode-case ] } { CHAR: x [ comments ] } @@ -345,13 +349,8 @@ DEFER: handle-left-bracket parse-character-class-first (parse-character-class) ; : finish-regexp-parse ( stack -- obj ) - dup length { - { 1 [ first ] } - [ - drop { pipe } split - [ first|concatenation ] map first|alternation - ] - } case ; + { pipe } split + [ first|concatenation ] map first|alternation ; : handle-right-parenthesis ( -- ) stack beginning-of-group over last-index cut rest diff --git a/extra/regexp2/regexp2-tests.factor b/extra/regexp2/regexp2-tests.factor index 5e1171a28c..54626ea165 100644 --- a/extra/regexp2/regexp2-tests.factor +++ b/extra/regexp2/regexp2-tests.factor @@ -1,4 +1,5 @@ -USING: regexp2 tools.test kernel regexp2.parser regexp2.traversal ; +USING: regexp2 tools.test kernel sequences regexp2.parser +regexp2.traversal ; IN: regexp2-tests [ f ] [ "b" "a*" matches? ] unit-test @@ -248,6 +249,10 @@ IN: regexp2-tests [ f ] [ "A" "\\p{Lower}" matches? ] unit-test [ t ] [ "A" "\\p{Lower}" matches? ] unit-test +[ t ] [ "abc" "abc" matches? ] unit-test +[ t ] [ "abc" "a[bB][cC]" matches? ] unit-test +[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" matches? ] unit-test + ! Bug in parsing word ! [ t ] [ "a" R' a' matches? ] unit-test diff --git a/extra/regexp2/regexp2.factor b/extra/regexp2/regexp2.factor index c227218450..0b8994ca2b 100644 --- a/extra/regexp2/regexp2.factor +++ b/extra/regexp2/regexp2.factor @@ -41,6 +41,11 @@ IN: regexp2 case-insensitive initial-option construct-regexp ; +: ( string -- regexp ) + default-regexp + reversed-regexp initial-option + construct-regexp ; + : R! CHAR: ! ; parsing : R" CHAR: " ; parsing : R# CHAR: # ; parsing