Lookaround and anchors work! (still need to fix some bugs)

2009-03-05 16:34:04 -06:00 · 2009-03-05 16:34:04 -06:00 · a487ed0f32
parent 39011fd062
commit a487ed0f32
12 changed files with 230 additions and 134 deletions
--- a/basis/regexp/classes/classes-tests.factor
+++ b/basis/regexp/classes/classes-tests.factor
@ -30,6 +30,7 @@ IN: regexp.classes.tests
 [ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
 [ f ] [ t <not-class> ] unit-test
 [ t ] [ f <not-class> ] unit-test
 [ f ] [ 1 <not-class> 1 t replace-question ] unit-test
 ! Making classes into nested conditionals
@ -43,7 +44,7 @@ IN: regexp.classes.tests
 SYMBOL: foo
 SYMBOL: bar
-[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 2 T{ primitive-class f foo } } { 3 T{ primitive-class f bar } } } table>condition ] unit-test
+[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 3 2 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 3 T{ primitive-class f bar } } { 2 T{ primitive-class f foo } } } table>condition ] unit-test
 [ t ] [ foo <primitive-class> dup t replace-question ] unit-test
 [ f ] [ foo <primitive-class> dup f replace-question ] unit-test
--- a/basis/regexp/classes/classes.factor
+++ b/basis/regexp/classes/classes.factor
@ -12,7 +12,7 @@ ascii-class punctuation-class java-printable-class blank-class
 control-character-class hex-digit-class java-blank-class c-identifier-class
 unmatchable-class terminator-class word-boundary-class ;
-SINGLETONS: beginning-of-input ^ end-of-input $ ;
+SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file ;
 TUPLE: range from to ;
 C: <range> range
@ -233,7 +233,7 @@ M: or-class replace-question
    replace-compound <or-class> ;
 M: not-class replace-question
-    class>> replace-question <not-class> ;
+    [ class>> ] 2dip replace-question <not-class> ;
 : answer ( table question answer -- new-table )
    '[ _ _ replace-question ] assoc-map
@ -258,7 +258,7 @@ M: not-class class>questions class>> class>questions ;
 M: object class>questions 1array ;
 : table>questions ( table -- questions )
-    values <and-class> class>questions t swap remove ;
+    values [ class>questions ] gather >array t swap remove ;
 : table>condition ( table -- condition )
    ! input table is state => class
@ -269,3 +269,12 @@ M: object class>questions 1array ;
        [ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
        '[ _ condition-map ] bi@ <condition>
    ] [ call ] if ; inline recursive
 : condition-states ( condition -- states )
    dup condition? [
        [ yes>> ] [ no>> ] bi
        [ condition-states ] bi@ append prune
    ] [ 1array ] if ;
 : condition-at ( condition assoc -- new-condition )
    '[ _ at ] condition-map ;
--- a/basis/regexp/compiler/compiler.factor
+++ b/basis/regexp/compiler/compiler.factor
@ -3,27 +3,76 @@
 USING: regexp.classes kernel sequences regexp.negation
 quotations regexp.minimize assocs fry math locals combinators
 accessors words compiler.units kernel.private strings
-sequences.private arrays regexp.matchers call ;
+sequences.private arrays regexp.matchers call namespaces
 regexp.transition-tables combinators.short-circuit ;
 IN: regexp.compiler
-: literals>cases ( literal-transitions -- case-body )
+GENERIC: question>quot ( question -- quot )
-    [ 1quotation ] assoc-map ;
+
 <PRIVATE
 SYMBOL: shortest?
 SYMBOL: backwards?
 M: t question>quot drop [ 2drop t ] ;
 M: beginning-of-input question>quot
    drop [ drop zero? ] ;
 M: end-of-input question>quot
    drop [ length = ] ;
 M: end-of-file question>quot
    drop [
        {
            [ length swap - 2 <= ]
            [ swap tail { "\n" "\r\n" "\r" "" } member? ]
        } 2&&
        [ [ nip [ length ] keep ] when ] keep
    ] ;
 M: $ question>quot
    drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
 M: ^ question>quot
    drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
 ! Maybe the condition>quot things can be combined, given a suitable method
 ! for question>quot on classes, but maybe that'd make stack shuffling annoying
 : execution-quot ( next-state -- quot )
    ! The conditions here are for lookaround and anchors, etc
    dup condition? [
        [ question>> question>quot ] [ yes>> ] [ no>> ] tri
        [ execution-quot ] bi@
        '[ 2dup @ _ _ if ]
    ] [
        ! There shouldn't be a condition like this!
        dup sequence?
        [ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
        [ '[ _ execute ] ] if
    ] if ;
 TUPLE: box contents ;
 C: <box> box
 : condition>quot ( condition -- quot )
    ! Conditions here are for different classes
    dup condition? [
        [ question>> ] [ yes>> ] [ no>> ] tri
        [ condition>quot ] bi@
        '[ dup _ class-member? _ _ if ]
    ] [
-        [ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
+        contents>>
        [ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
    ] if ;
 : new-non-literals>dispatch ( non-literal-transitions -- quot )
    table>condition condition>quot ;
 : non-literals>dispatch ( non-literal-transitions -- quot )
-    [ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
+    [ swap ] assoc-map ! we want state => predicate, and get the opposite as input
-    [ 3drop ] suffix '[ _ cond ] ;
+    table>condition [ <box> ] condition-map condition>quot ;
 : literals>cases ( literal-transitions -- case-body )
    [ execution-quot ] assoc-map ;
 : expand-one-or ( or-class transition -- alist )
    [ seq>> ] dip '[ _ 2array ] map ;
@ -38,17 +87,22 @@ IN: regexp.compiler
    >alist expand-or [ first integer? ] partition
    [ literals>cases ] [ non-literals>dispatch ] bi* ;
-:: step ( last-match index str case-body final? -- last-index/f )
+:: step ( last-match index str quot final? direction -- last-index/f )
    final? index last-match ?
    index str bounds-check? [
-        index 1+ str
+        index direction + str
        index str nth-unsafe
-        case-body case
+        quot call
    ] when ; inline
 : direction ( -- n )
    backwards? get -1 1 ? ;
 : transitions>quot ( transitions final-state? -- quot )
-    [ split-literals suffix ] dip
+    dup shortest? get and [ 2drop [ drop nip ] ] [
-    '[ { array-capacity sequence } declare _ _ step ] ;
+        [ split-literals swap case>quot ] dip direction
        '[ { array-capacity string } declare _ _ _ step ]
    ] if ;
 : word>quot ( word dfa -- quot )
    [ transitions>> at ]
@ -64,30 +118,37 @@ IN: regexp.compiler
        ] each
    ] with-compilation-unit ;
 : transitions-at ( transitions assoc -- new-transitions )
    dup '[
        [ _ at ]
        [ [ _ at ] assoc-map ] bi*
    ] assoc-map ;
 : states>words ( dfa -- words dfa )
    dup transitions>> keys [ gensym ] H{ } map>assoc
-    [ [ transitions-at ] rewrite-transitions ]
+    [ transitions-at ]
    [ values ]
    bi swap ; 
 : dfa>word ( dfa -- word )
    states>words [ states>code ] keep start-state>> ;
-: check-sequence ( string -- string )
+: check-string ( string -- string )
    ! Make this configurable
-    dup sequence? [ "String required" throw ] unless ;
+    dup string? [ "String required" throw ] unless ;
-: run-regexp ( start-index string word -- ? )
+: setup-regexp ( start-index string -- f start-index string )
-    { [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
+    [ f ] [ >fixnum ] [ check-string ] tri* ; inline
 PRIVATE>
 ! The quotation returned is ( start-index string -- i/f )
 : dfa>quotation ( dfa -- quot )
-    dfa>word '[ _ run-regexp ] ;
+    dfa>word execution-quot '[ setup-regexp @ ] ;
 : dfa>shortest-quotation ( dfa -- quot )
    t shortest? [ dfa>quotation ] with-variable ;
 : dfa>reverse-quotation ( dfa -- quot )
    t backwards? [ dfa>quotation ] with-variable ;
 : dfa>reverse-shortest-quotation ( dfa -- quot )
    t backwards? [ dfa>shortest-quotation ] with-variable ;
 TUPLE: quot-matcher quot ;
 C: <quot-matcher> quot-matcher
--- a/basis/regexp/dfa/dfa.factor
+++ b/basis/regexp/dfa/dfa.factor
@ -39,21 +39,26 @@ IN: regexp.dfa
 : find-transitions ( dfa-state nfa -- next-dfa-state )
    transitions>>
-    '[ _ at keys ] gather
+    '[ _ at keys [ condition-states ] map concat ] gather
-    epsilon swap remove ;
+    [ tagged-epsilon? not ] filter ;
 : add-todo-state ( state visited-states new-states -- )
    3dup drop key? [ 3drop ] [
        [ conjoin ] [ push ] bi-curry* bi
    ] if ;
 : add-todo-states ( state/condition visited-states new-states -- )
    [ condition-states ] 2dip
    '[ _ _ add-todo-state ] each ;
 :: new-transitions ( nfa dfa new-states visited-states -- nfa dfa )
    new-states [ nfa dfa ] [
        pop :> state
        state dfa transitions>> maybe-initialize-key
        state nfa find-transitions
        [| trans |
            state trans nfa find-closure :> new-state
-            new-state visited-states new-states add-todo-state
+            new-state visited-states new-states add-todo-states
            state new-state trans dfa set-transition
        ] each
        nfa dfa new-states visited-states new-transitions
@ -73,7 +78,7 @@ IN: regexp.dfa
 : construct-dfa ( nfa -- dfa )
    dup initialize-dfa
-    dup start-state>> 1vector
+    dup start-state>> condition-states >vector
    H{ } clone
    new-transitions
    [ set-final-states ] keep ;
--- a/basis/regexp/disambiguate/disambiguate.factor
+++ b/basis/regexp/disambiguate/disambiguate.factor
@ -42,6 +42,3 @@ TUPLE: parts in out ;
            ] preserving-epsilon
        ] assoc-map
    ] change-transitions ;
 : nfa>dfa ( nfa -- dfa )
    disambiguate construct-dfa minimize ;
--- a/basis/regexp/minimize/minimize.factor
+++ b/basis/regexp/minimize/minimize.factor
@ -5,29 +5,11 @@ accessors locals math sorting arrays sets hashtables regexp.dfa
 combinators.short-circuit regexp.classes ;
 IN: regexp.minimize
 : number-transitions ( transitions numbering -- new-transitions )
    dup '[
        [ _ at ]
        [ [ [ _ at ] condition-map ] assoc-map ] bi*
    ] assoc-map ;
 : table>state-numbers ( table -- assoc )
    transitions>> keys <enum> [ swap ] H{ } assoc-map-as ;
 : map-set ( assoc quot -- new-assoc )
    '[ drop @ dup ] assoc-map ; inline
 : rewrite-transitions ( transition-table assoc quot -- transition-table )
    [
        [ clone ] dip
        [ '[ _ at ] change-start-state ]
        [ '[ [ _ at ] map-set ] change-final-states ]
        [ ] tri
    ] dip '[ _ @ ] change-transitions ; inline
 : number-states ( table -- newtable )
-    dup table>state-numbers
+    dup table>state-numbers transitions-at ;
    [ number-transitions ] rewrite-transitions ;
 : no-conditions? ( state transition-table -- ? )
    transitions>> at values [ condition? ] any? not ;
@ -103,4 +85,4 @@ IN: regexp.minimize
    [ combine-transitions ] rewrite-transitions ;
 : minimize ( table -- minimal-table )
-    clone number-states combine-states ;
+    clone number-states ; ! combine-states ;
--- a/basis/regexp/negation/negation.factor
+++ b/basis/regexp/negation/negation.factor
@ -2,11 +2,12 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: regexp.nfa regexp.disambiguate kernel sequences
 assocs regexp.classes hashtables accessors fry vectors
-regexp.ast regexp.transition-tables regexp.minimize namespaces ;
+regexp.ast regexp.transition-tables regexp.minimize
 regexp.dfa namespaces ;
 IN: regexp.negation
 : ast>dfa ( parse-tree -- minimal-dfa )
-    construct-nfa nfa>dfa ;
+    construct-nfa disambiguate construct-dfa minimize ;
 CONSTANT: fail-state -1
@ -33,15 +34,9 @@ CONSTANT: fail-state -1
        [ add-fail-state ] change-transitions
        dup inverse-final-states >>final-states ;
 : renumber-transitions ( transitions numbering -- new-transitions )
    dup '[
        [ _ at ]
        [ [ [ _ at ] map ] assoc-map ] bi*
    ] assoc-map ;
 : renumber-states ( transition-table -- transition-table )
    dup transitions>> keys [ next-state ] H{ } map>assoc
-    [ renumber-transitions ] rewrite-transitions ;
+    transitions-at ;
 : box-transitions ( transition-table -- transition-table )
    [ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
--- a/basis/regexp/nfa/nfa.factor
+++ b/basis/regexp/nfa/nfa.factor
@ -56,9 +56,16 @@ M:: star nfa-node ( node -- start end )
    s2 s3 ;
 GENERIC: modify-epsilon ( tag -- newtag )
 ! Potential off-by-one errors when lookaround nested in lookbehind
 M: object modify-epsilon ;
 M: $ modify-epsilon
    multiline option? [ drop end-of-input ] unless ;
 M: ^ modify-epsilon
    multiline option? [ drop beginning-of-input ] unless ;
 M: tagged-epsilon nfa-node
    clone [ modify-epsilon ] change-tag add-simple-entry ;
--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@ -54,6 +54,7 @@ ERROR: bad-class name ;
        { CHAR: D [ digit-class <primitive-class> <not-class> ] }
        { CHAR: z [ end-of-input <tagged-epsilon> ] }
        { CHAR: Z [ end-of-file <tagged-epsilon> ] }
        { CHAR: A [ beginning-of-input <tagged-epsilon> ] }
        [ ]
    } case ;
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -45,9 +45,9 @@ IN: regexp-tests
 ! Dotall mode -- when on, . matches newlines.
 ! Off by default.
 [ f ] [ "\n" "." <regexp> matches? ] unit-test
-! [ t ] [ "\n" "(?s)." <regexp> matches? ] unit-test
+[ t ] [ "\n" "(?s:.)" <regexp> matches? ] unit-test
 [ t ] [ "\n" R/ ./s matches? ] unit-test
-! [ f ] [ "\n\n" "(?s).(?-s)." <regexp> matches? ] unit-test
+[ f ] [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
 [ f ] [ "" ".+" <regexp> matches? ] unit-test
 [ t ] [ "a" ".+" <regexp> matches? ] unit-test
@ -221,17 +221,15 @@ IN: regexp-tests
 [ t ] [ "c" R/ [A-Z]/i matches? ] unit-test
 [ f ] [ "3" R/ [A-Z]/i matches? ] unit-test
-/*
+[ t ] [ "a" "(?i:a)" <regexp> matches? ] unit-test
-[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "a" "(?i:a)" <regexp> matches? ] unit-test
-[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "A" "(?i:a)" <regexp> matches? ] unit-test
-[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "A" "(?i:a)" <regexp> matches? ] unit-test
 [ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
-[ t ] [ "a" R/ (?-i)a/i matches? ] unit-test
+[ t ] [ "a" R/ (?-i:a)/i matches? ] unit-test
-[ t ] [ "a" R/ (?-i)a/i matches? ] unit-test
+[ t ] [ "a" R/ (?-i:a)/i matches? ] unit-test
-[ f ] [ "A" R/ (?-i)a/i matches? ] unit-test
+[ f ] [ "A" R/ (?-i:a)/i matches? ] unit-test
-[ f ] [ "A" R/ (?-i)a/i matches? ] unit-test
+[ f ] [ "A" R/ (?-i:a)/i matches? ] unit-test
 */
 [ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
 [ t ] [ "A" R/ [a-z]/i matches? ] unit-test
@ -242,8 +240,8 @@ IN: regexp-tests
 [ t ] [ "abc" reverse R/ abc/r matches? ] unit-test
 [ t ] [ "abc" reverse R/ a[bB][cC]/r matches? ] unit-test
-[ t ] [ "xabc" R/ abc/ <reverse-matcher> match-index-head >boolean ] unit-test
+[ t ] [ 3 "xabc" R/ abc/ <reverse-matcher> match-index-from >boolean ] unit-test
-[ t ] [ "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-head >boolean ] unit-test
+[ t ] [ 3 "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-from >boolean ] unit-test
 [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
@ -276,10 +274,6 @@ IN: regexp-tests
 [ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
 ! [ t ] [ "a:b" ".+:?" <regexp> matches? ] unit-test
 ! [ 1 ] [ "hello" ".+?" <regexp> match length ] unit-test
 [ { "1" "2" "3" "4" } ]
 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
@ -304,18 +298,16 @@ IN: regexp-tests
 [ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
 /*
 [ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
 [ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
 [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
-[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test
-[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test
 [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-index-head ] unit-test
 [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-index-head ] unit-test
 */
 ! Bug in parsing word
 [ t ] [ "a" R' a' matches? ] unit-test
@ -349,56 +341,70 @@ IN: regexp-tests
 [ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
 [ t ] [ R/ foo/ dfa>> >boolean ] unit-test
-! [ t ] [ "a" R/ ^a/ matches? ] unit-test
+[ t ] [ "a" R/ ^a/ matches? ] unit-test
-! [ f ] [ "\na" R/ ^a/ matches? ] unit-test
+[ f ] [ "\na" R/ ^a/ matches? ] unit-test
-! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
+[ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
-! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test
+[ f ] [ "\ra" R/ ^a/ matches? ] unit-test
-! [ t ] [ "a" R/ a$/ matches? ] unit-test
+[ 1 ] [ "a" R/ ^a/ count-matches ] unit-test
-! [ f ] [ "a\n" R/ a$/ matches? ] unit-test
+[ 0 ] [ "\na" R/ ^a/ count-matches ] unit-test
-! [ f ] [ "a\r" R/ a$/ matches? ] unit-test
+[ 0 ] [ "\r\na" R/ ^a/ count-matches ] unit-test
-! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
+[ 0 ] [ "\ra" R/ ^a/ count-matches ] unit-test
-! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test
+[ t ] [ "a" R/ a$/ matches? ] unit-test
-! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test
+[ f ] [ "a\n" R/ a$/ matches? ] unit-test
-! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test
+[ f ] [ "a\r" R/ a$/ matches? ] unit-test
-! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test
+[ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
-! [ t ] [ "a" R/ \Aa/ matches? ] unit-test
+[ 1 ] [ "a" R/ a$/ count-matches ] unit-test
-! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
+[ 0 ] [ "a\n" R/ a$/ count-matches ] unit-test
-! [ f ] [ "\r\na" R/ \Aa/ matches? ] unit-test
+[ 0 ] [ "a\r" R/ a$/ count-matches ] unit-test
-! [ f ] [ "\ra" R/ \Aa/ matches? ] unit-test
+[ 0 ] [ "a\r\n" R/ a$/ count-matches ] unit-test
-! [ t ] [ "a" R/ \Aa/m matches? ] unit-test
+[ t ] [ "a" R/ a$|b$/ matches? ] unit-test
-! [ f ] [ "\na" R/ \Aaa/m matches? ] unit-test
+[ t ] [ "b" R/ a$|b$/ matches? ] unit-test
-! [ f ] [ "\r\na" R/ \Aa/m matches? ] unit-test
+[ f ] [ "ab" R/ a$|b$/ matches? ] unit-test
-! [ f ] [ "\ra" R/ \Aa/m matches? ] unit-test
+[ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test
-! [ t ] [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
+[ t ] [ "a" R/ \Aa/ matches? ] unit-test
 [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
 [ f ] [ "\r\na" R/ \Aa/ matches? ] unit-test
 [ f ] [ "\ra" R/ \Aa/ matches? ] unit-test
-! [ t ] [ "a" R/ \Aa\z/m matches? ] unit-test
+[ t ] [ "a" R/ \Aa/m matches? ] unit-test
-! [ f ] [ "a\n" R/ \Aa\z/m matches? ] unit-test
+[ f ] [ "\na" R/ \Aaa/m matches? ] unit-test
 [ f ] [ "\r\na" R/ \Aa/m matches? ] unit-test
 [ f ] [ "\ra" R/ \Aa/m matches? ] unit-test
 [ 0 ] [ "\ra" R/ \Aa/m count-matches ] unit-test
-! [ t ] [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
+[ f ] [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
-! [ t ] [ "a\n" R/ \Aa\Z/m matches? ] unit-test
+[ 1 ] [ "\r\n\n\n\nam" R/ ^am/m count-matches ] unit-test
-! [ t ] [ "a" R/ \Aa\Z/m matches? ] unit-test
+[ t ] [ "a" R/ \Aa\z/m matches? ] unit-test
-! [ f ] [ "\na" R/ \Aaa\Z/m matches? ] unit-test
+[ f ] [ "a\n" R/ \Aa\z/m matches? ] unit-test
 ! [ f ] [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
 ! [ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
-! [ t ] [ "a" R/ ^a/m matches? ] unit-test
+[ t ] [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
-! [ t ] [ "\na" R/ ^a/m matches? ] unit-test
+[ t ] [ "a\n" R/ \Aa\Z/m matches? ] unit-test
 ! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test
 ! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test
-! [ t ] [ "a" R/ a$/m matches? ] unit-test
+[ t ] [ "a" R/ \Aa\Z/m matches? ] unit-test
-! [ t ] [ "a\n" R/ a$/m matches? ] unit-test
+[ f ] [ "\na" R/ \Aaa\Z/m matches? ] unit-test
-! [ t ] [ "a\r" R/ a$/m matches? ] unit-test
+[ f ] [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
-! [ t ] [ "a\r\n" R/ a$/m matches? ] unit-test
+[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
-! [ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
+[ t ] [ "a" R/ ^a/m matches? ] unit-test
-! [ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
+[ f ] [ "\na" R/ ^a/m matches? ] unit-test
 [ 1 ] [ "\na" R/ ^a/m count-matches ] unit-test
 [ 1 ] [ "\r\na" R/ ^a/m count-matches ] unit-test
 [ 1 ] [ "\ra" R/ ^a/m count-matches ] unit-test
 [ t ] [ "a" R/ a$/m matches? ] unit-test
 [ f ] [ "a\n" R/ a$/m matches? ] unit-test
 [ 1 ] [ "a\n" R/ a$/m count-matches ] unit-test
 [ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
 [ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
 [ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
 [ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
 ! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
 ! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -3,7 +3,7 @@
 USING: accessors combinators kernel math sequences strings sets
 assocs prettyprint.backend prettyprint.custom make lexer
 namespaces parser arrays fry locals regexp.minimize
-regexp.parser regexp.nfa regexp.dfa
+regexp.parser regexp.nfa regexp.dfa regexp.classes
 regexp.transition-tables splitting sorting regexp.ast
 regexp.negation regexp.matchers regexp.compiler ;
 IN: regexp
@ -27,6 +27,7 @@ TUPLE: regexp
 TUPLE: reverse-matcher regexp ;
 C: <reverse-matcher> reverse-matcher
 ! Reverse matchers won't work properly with most combinators, for now
 <PRIVATE
@ -39,21 +40,31 @@ C: <reverse-matcher> reverse-matcher
 : <reversed-option> ( ast -- reversed )
    "r" string>options <with-options> ;
 M: lookahead question>quot ! Returns ( index string -- ? )
    term>> ast>dfa dfa>shortest-quotation ;
 M: lookbehind question>quot ! Returns ( index string -- ? )
    term>> <reversed-option>
    ast>dfa dfa>reverse-shortest-quotation
    [ [ 1- ] dip ] prepose ;
 : compile-reverse ( regexp -- regexp )
    dup '[
        [
            _ get-ast <reversed-option>
-            ast>dfa dfa>quotation
+            ast>dfa dfa>reverse-quotation
        ] unless*
    ] change-reverse-dfa ;
-M: regexp match-index-from ( string regexp -- index/f )
+M: regexp match-index-from
    compile-regexp dfa>> <quot-matcher> match-index-from ;
-M: reverse-matcher match-index-from ( string regexp -- index/f )
+M: reverse-matcher match-index-from
-    [ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
+    regexp>> compile-reverse reverse-dfa>>
    <quot-matcher> match-index-from ;
 ! The following two should do some caching
 : find-regexp-syntax ( string -- prefix suffix )
    {
        { "R/ "  "/"  }
--- a/basis/regexp/transition-tables/transition-tables.factor
+++ b/basis/regexp/transition-tables/transition-tables.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs fry hashtables kernel sequences
-vectors locals ;
+vectors locals regexp.classes ;
 IN: regexp.transition-tables
 TUPLE: transition-table transitions start-state final-states ;
@ -12,10 +12,11 @@ TUPLE: transition-table transitions start-state final-states ;
        H{ } clone >>final-states ;
 : maybe-initialize-key ( key hashtable -- )
    ! Why do we have to do this?
    2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
 :: (set-transition) ( from to obj hash -- )
-    to hash maybe-initialize-key
+    to condition? [ to hash maybe-initialize-key ] unless
    from hash at
    [ [ to obj ] dip set-at ]
    [ to obj associate from hash set-at ] if* ;
@ -31,3 +32,23 @@ TUPLE: transition-table transitions start-state final-states ;
 : add-transition ( from to obj transition-table -- )
    transitions>> (add-transition) ;
 : map-set ( assoc quot -- new-assoc )
    '[ drop @ dup ] assoc-map ; inline
 : rewrite-transitions ( transition-table assoc quot -- transition-table )
    [
        [ clone ] dip
        [ '[ _ condition-at ] change-start-state ]
        [ '[ [ _ at ] map-set ] change-final-states ]
        [ ] tri
    ] dip '[ _ @ ] change-transitions ; inline
 : number-transitions ( transitions numbering -- new-transitions )
    dup '[
        [ _ at ]
        [ [ _ condition-at ] assoc-map ] bi*
    ] assoc-map ;
 : transitions-at ( transitions numbering -- transitions )
    [ number-transitions ] rewrite-transitions ;