Merge branch 'master' of git://factorcode.org/git/factor

2009-03-12 11:30:16 -05:00 · 2009-03-12 11:30:16 -05:00 · da41ae8fbe
parent b38348aae0 3c04d96bc4
commit da41ae8fbe
32 changed files with 277 additions and 249 deletions
--- a/basis/delegate/delegate-tests.factor
+++ b/basis/delegate/delegate-tests.factor
@ -1,7 +1,7 @@
 USING: delegate kernel arrays tools.test words math definitions
 compiler.units parser generic prettyprint io.streams.string
 accessors eval multiline generic.standard delegate.protocols
-delegate.private assocs ;
+delegate.private assocs see ;
 IN: delegate.tests

 TUPLE: hello this that ;
--- a/basis/help/definitions/definitions-tests.factor
+++ b/basis/help/definitions/definitions-tests.factor
@ -1,6 +1,6 @@
 USING: math definitions help.topics help tools.test
 prettyprint parser io.streams.string kernel source-files
-assocs namespaces words io sequences eval accessors ;
+assocs namespaces words io sequences eval accessors see ;
 IN: help.definitions.tests

 [ ] [ \ + >link see ] unit-test
--- a/basis/html/components/components-tests.factor
+++ b/basis/html/components/components-tests.factor
@ -4,6 +4,8 @@ io.streams.null accessors inspector html.streams
 html.components html.forms namespaces
 xml.writer ;

+\ render must-infer
+
 [ ] [ begin-form ] unit-test

 [ ] [ 3 "hi" set-value ] unit-test
--- a/basis/inspector/inspector-tests.factor
+++ b/basis/inspector/inspector-tests.factor
@ -8,7 +8,7 @@ f describe
 H{ } describe
 H{ } describe

-[ "fixnum instance\n" ] [ [ 3 describe ] with-string-writer ] unit-test
+[ "fixnum instance\n\n" ] [ [ 3 describe ] with-string-writer ] unit-test

 [ ] [ H{ } clone inspect ] unit-test

--- a/basis/locals/locals-tests.factor
+++ b/basis/locals/locals-tests.factor
@ -2,7 +2,7 @@ USING: locals math sequences tools.test hashtables words kernel
 namespaces arrays strings prettyprint io.streams.string parser
 accessors generic eval combinators combinators.short-circuit
 combinators.short-circuit.smart math.order math.functions
-definitions compiler.units fry lexer words.symbol ;
+definitions compiler.units fry lexer words.symbol see ;
 IN: locals.tests

 :: foo ( a b -- a a ) a a ;
--- a/basis/macros/macros-tests.factor
+++ b/basis/macros/macros-tests.factor
@ -1,6 +1,6 @@
 IN: macros.tests
 USING: tools.test macros math kernel arrays
-vectors io.streams.string prettyprint parser eval ;
+vectors io.streams.string prettyprint parser eval see ;

 MACRO: see-test ( a b -- c ) + ;

--- a/basis/memoize/memoize-tests.factor
+++ b/basis/memoize/memoize-tests.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel memoize tools.test parser generalizations
-prettyprint io.streams.string sequences eval namespaces ;
+prettyprint io.streams.string sequences eval namespaces see ;
 IN: memoize.tests

 MEMO: fib ( m -- n )
--- a/basis/opengl/textures/textures-tests.factor
+++ b/basis/opengl/textures/textures-tests.factor
@ -5,15 +5,19 @@ images kernel namespaces ;
 IN: opengl.textures.tests

 [ ] [
-    { 3 5 }
-    RGB
-    B{
-        1 2 3 4 5 6 7 8 9
-        10 11 12 13 14 15 16 17 18
-        19 20 21 22 23 24 25 26 27
-        28 29 30 31 32 33 34 35 36
-        37 38 39 40 41 42 43 44 45
-    } image boa "image" set
+    T{ image
+       { dim { 3 5 } }
+       { component-order RGB }
+       { bitmap
+         B{
+             1 2 3 4 5 6 7 8 9
+             10 11 12 13 14 15 16 17 18
+             19 20 21 22 23 24 25 26 27
+             28 29 30 31 32 33 34 35 36
+             37 38 39 40 41 42 43 44 45
+         }
+       }
+    } "image" set
 ] unit-test

 [
--- a/basis/regexp/ast/ast.factor
+++ b/basis/regexp/ast/ast.factor
@ -58,8 +58,8 @@ M: from-to <times>
 : char-class ( ranges ? -- term )
    [ <or-class> ] dip [ <not-class> ] when ;

-TUPLE: lookahead term positive? ;
+TUPLE: lookahead term ;
 C: <lookahead> lookahead

-TUPLE: lookbehind term positive? ;
+TUPLE: lookbehind term ;
 C: <lookbehind> lookbehind
--- a/basis/regexp/classes/classes-tests.factor
+++ b/basis/regexp/classes/classes-tests.factor
@ -6,7 +6,7 @@ IN: regexp.classes.tests
 ! Class algebra

 [ f ] [ { 1 2 } <and-class> ] unit-test
-[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
+[ T{ or-class f { 1 2 } } ] [ { 1 2 } <or-class> ] unit-test
 [ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
 [ CHAR: A ] [ CHAR: A LETTER-class <primitive-class> 2array <and-class> ] unit-test
 [ CHAR: A ] [ LETTER-class <primitive-class> CHAR: A 2array <and-class> ] unit-test
@ -26,11 +26,13 @@ IN: regexp.classes.tests
 [ t ] [ { t t } <or-class> ] unit-test
 [ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
 [ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
-[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
-[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
+[ T{ or-class { seq { 1 2 3 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
+[ T{ or-class { seq { 2 3 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
 [ f ] [ t <not-class> ] unit-test
 [ t ] [ f <not-class> ] unit-test
 [ f ] [ 1 <not-class> 1 t answer ] unit-test
+[ t ] [ { 1 2 } <or-class> <not-class> 1 2 3array <or-class> ] unit-test
+[ f ] [ { 1 2 } <and-class> <not-class> 1 2 3array <and-class> ] unit-test

 ! Making classes into nested conditionals

--- a/basis/regexp/classes/classes.factor
+++ b/basis/regexp/classes/classes.factor
@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors kernel math math.order words combinators locals
 ascii unicode.categories combinators.short-circuit sequences
-fry macros arrays assocs sets classes ;
+fry macros arrays assocs sets classes mirrors ;
 IN: regexp.classes

 SINGLETONS: any-char any-char-no-nl
@ -12,7 +12,7 @@ ascii-class punctuation-class java-printable-class blank-class
 control-character-class hex-digit-class java-blank-class c-identifier-class
 unmatchable-class terminator-class word-boundary-class ;

-SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file ;
+SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file word-break ;

 TUPLE: range from to ;
 C: <range> range
@ -110,97 +110,116 @@ M: f class-member? 2drop f ;
 TUPLE: primitive-class class ;
 C: <primitive-class> primitive-class

+TUPLE: not-class class ;
+
+PREDICATE: not-integer < not-class class>> integer? ;
+PREDICATE: not-primitive < not-class class>> primitive-class? ;
+
+M: not-class class-member?
+    class>> class-member? not ;
+
 TUPLE: or-class seq ;

-TUPLE: not-class class ;
+M: or-class class-member?
+    seq>> [ class-member? ] with any? ;

 TUPLE: and-class seq ;

-GENERIC: combine-and ( class1 class2 -- combined ? )
+M: and-class class-member?
+    seq>> [ class-member? ] with all? ;

-: replace-if-= ( object object -- object ? )
-    over = ;
-
-M: object combine-and replace-if-= ;
-
-M: t combine-and
-    drop t ;
-
-M: f combine-and
-    nip t ;
-
-M: not-class combine-and
-    class>> 2dup = [ 2drop f t ] [
-        dup integer? [
-            2dup swap class-member?
-            [ 2drop f f ]
-            [ drop t ] if
-        ] [ 2drop f f ] if
-    ] if ;
-
-M: integer combine-and
-    swap 2dup class-member? [ drop t ] [ 2drop f t ] if ;
-
-GENERIC: combine-or ( class1 class2 -- combined ? )
-
-M: object combine-or replace-if-= ;
-
-M: t combine-or
-    nip t ;
-
-M: f combine-or
-    drop t ;
-
-M: not-class combine-or
-    class>> = [ t t ] [ f f ] if ;
-
-M: integer combine-or
-    2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
+DEFER: substitute

 : flatten ( seq class -- newseq )
    '[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline

-: try-combine ( elt1 elt2 quot -- combined/f ? )
-    3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
-
-DEFER: answer
-
-:: try-cancel ( elt1 elt2 empty -- combined/f ? )
-    [ elt1 elt2 empty answer dup elt1 = not ] try-combine ;
-
-:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq )
-    f :> combined!
-    seq [ elt quot call swap combined! ] find drop
-    [ seq remove-nth combined prefix ]
-    [ seq elt prefix ] if* ; inline
-
-: combine-by ( seq quot -- new-seq )
-    { } swap '[ _ prefix-combining ] reduce ; inline
-
 :: seq>instance ( seq empty class -- instance )
    seq length {
        { 0 [ empty ] }
        { 1 [ seq first ] }
-        [ drop class new seq >>seq ]
+        [ drop class new seq { } like >>seq ]
    } case ; inline

-:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
-    seq class flatten
-    [ quot try-combine ] combine-by
-    ! [ empty try-cancel ] combine-by ! This makes the algorithm O(n^4)
-    empty class seq>instance ; inline
+TUPLE: class-partition integers not-integers primitives not-primitives and or other ;
+
+: partition-classes ( seq -- class-partition )
+    prune
+    [ integer? ] partition
+    [ not-integer? ] partition
+    [ primitive-class? ] partition ! extend primitive-class to epsilon tags
+    [ not-primitive? ] partition
+    [ and-class? ] partition
+    [ or-class? ] partition
+    class-partition boa ;
+
+: class-partition>seq ( class-partition -- seq )
+    make-mirror values concat ;
+
+: repartition ( partition -- partition' )
+    ! This could be made more efficient; only and and or are effected
+    class-partition>seq partition-classes ;
+
+: filter-not-integers ( partition -- partition' )
+    dup
+    [ primitives>> ] [ not-primitives>> ] [ or>> ] tri
+    3append and-class boa
+    '[ [ class>> _ class-member? ] filter ] change-not-integers ;
+
+: answer-ors ( partition -- partition' )
+    dup [ not-integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append
+    '[ [ _ [ t substitute ] each ] map ] change-or ;
+
+: contradiction? ( partition -- ? )
+    {
+        [ [ primitives>> ] [ not-primitives>> ] bi intersects? ]
+        [ other>> f swap member? ]
+    } 1|| ;
+
+: make-and-class ( partition -- and-class )
+    answer-ors repartition
+    [ t swap remove ] change-other
+    dup contradiction?
+    [ drop f ]
+    [ filter-not-integers class-partition>seq prune t and-class seq>instance ] if ;

 : <and-class> ( seq -- class )
-    [ combine-and ] t and-class combine ;
+    dup and-class flatten partition-classes
+    dup integers>> length {
+        { 0 [ nip make-and-class ] }
+        { 1 [ integers>> first [ '[ _ swap class-member? ] all? ] keep and ] }
+        [ 3drop f ]
+    } case ;

-M: and-class class-member?
-    seq>> [ class-member? ] with all? ;
+: filter-integers ( partition -- partition' )
+    dup
+    [ primitives>> ] [ not-primitives>> ] [ and>> ] tri
+    3append or-class boa
+    '[ [ _ class-member? not ] filter ] change-integers ;
+
+: answer-ands ( partition -- partition' )
+    dup [ integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append
+    '[ [ _ [ f substitute ] each ] map ] change-and ;
+
+: tautology? ( partition -- ? )
+    {
+        [ [ primitives>> ] [ not-primitives>> ] bi intersects? ]
+        [ other>> t swap member? ]
+    } 1|| ;
+
+: make-or-class ( partition -- and-class )
+    answer-ands repartition
+    [ f swap remove ] change-other
+    dup tautology?
+    [ drop t ]
+    [ filter-integers class-partition>seq prune f or-class seq>instance ] if ;

 : <or-class> ( seq -- class )
-    [ combine-or ] f or-class combine ;
-
-M: or-class class-member?
-    seq>> [ class-member? ] with any? ;
+    dup or-class flatten partition-classes
+    dup not-integers>> length {
+        { 0 [ nip make-or-class ] }
+        { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
+        [ 3drop t ]
+    } case ;

 GENERIC: <not-class> ( class -- inverse )

@ -219,9 +238,6 @@ M: or-class <not-class>
 M: t <not-class> drop f ;
 M: f <not-class> drop t ;

-M: not-class class-member?
-    class>> class-member? not ;
-
 M: primitive-class class-member?
    class>> class-member? ;

@ -247,8 +263,12 @@ M: or-class answer
 M: not-class answer
    [ class>> ] 2dip answer <not-class> ;

+GENERIC# substitute 1 ( class from to -- new-class )
+M: object substitute answer ;
+M: not-class substitute [ <not-class> ] bi@ answer ;
+
 : assoc-answer ( table question answer -- new-table )
-    '[ _ _ answer ] assoc-map
+    '[ _ _ substitute ] assoc-map
    [ nip ] assoc-filter ;

 : assoc-answers ( table questions answer -- new-table )
--- a/basis/regexp/combinators/combinators-tests.factor
+++ b/basis/regexp/combinators/combinators-tests.factor
@ -9,9 +9,6 @@ IN: regexp.combinators.tests
 [ t t t ] [ "foo" "bar" "baz" [ strings matches? ] tri@ ] unit-test
 [ f f f ] [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test

-USE: multiline
-/*
-! Why is conjuction broken?
 : conj ( -- regexp )
    { R' .*a' R' b.*' } <and> ;

@ -22,7 +19,6 @@ USE: multiline
 [ f ] [ "bljhasflsda" conj <not> matches? ] unit-test
 [ t ] [ "bsdfdfs" conj <not> matches? ] unit-test
 [ t ] [ "fsfa" conj <not> matches? ] unit-test
-*/

 [ f f ] [ "" "hi" [ <nothing> matches? ] bi@ ] unit-test
 [ t t ] [ "" "hi" [ <nothing> <not> matches? ] bi@ ] unit-test
--- a/basis/regexp/compiler/compiler.factor
+++ b/basis/regexp/compiler/compiler.factor
@ -3,7 +3,7 @@
 USING: regexp.classes kernel sequences regexp.negation
 quotations assocs fry math locals combinators
 accessors words compiler.units kernel.private strings
-sequences.private arrays call namespaces
+sequences.private arrays call namespaces unicode.breaks
 regexp.transition-tables combinators.short-circuit ;
 IN: regexp.compiler

@ -15,6 +15,10 @@ SYMBOL: backwards?
 <PRIVATE

 M: t question>quot drop [ 2drop t ] ;
+M: f question>quot drop [ 2drop f ] ;
+
+M: not-class question>quot
+    class>> question>quot [ not ] compose ;

 M: beginning-of-input question>quot
    drop [ drop zero? ] ;
@ -36,6 +40,9 @@ M: $ question>quot
 M: ^ question>quot
    drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;

+M: word-break question>quot
+    drop [ word-break-at? ] ;
+
 : (execution-quot) ( next-state -- quot )
    ! The conditions here are for lookaround and anchors, etc
    dup condition? [
@ -70,17 +77,8 @@ C: <box> box
 : literals>cases ( literal-transitions -- case-body )
    [ execution-quot ] assoc-map ;

-: expand-one-or ( or-class transition -- alist )
-    [ seq>> ] dip '[ _ 2array ] map ;
-
-: expand-or ( alist -- new-alist )
-    [
-        first2 over or-class?
-        [ expand-one-or ] [ 2array 1array ] if
-    ] map concat ;
-
 : split-literals ( transitions -- case default )
-    >alist expand-or [ first integer? ] partition
+    { } assoc-like [ first integer? ] partition
    [ [ literals>cases ] keep ] dip non-literals>dispatch ;

 :: step ( last-match index str quot final? direction -- last-index/f )
--- a/basis/regexp/disambiguate/disambiguate.factor
+++ b/basis/regexp/disambiguate/disambiguate.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel accessors regexp.classes math.bits assocs sequences
-arrays sets regexp.dfa math fry regexp.minimize regexp.ast ;
+arrays sets regexp.dfa math fry regexp.minimize regexp.ast regexp.transition-tables ;
 IN: regexp.disambiguate

 TUPLE: parts in out ;
@ -32,9 +32,8 @@ TUPLE: parts in out ;
 : preserving-epsilon ( state-transitions quot -- new-state-transitions )
    [ [ drop tagged-epsilon? ] assoc-filter ] bi
    assoc-union H{ } assoc-like ; inline
-
 : disambiguate ( nfa -- nfa )  
-    [
+    expand-ors [
        dup new-transitions '[
            [
                _ swap '[ _ get-transitions ] assoc-map
--- a/basis/regexp/minimize/minimize-tests.factor
+++ b/basis/regexp/minimize/minimize-tests.factor
@ -54,5 +54,5 @@ IN: regexp.minimize.tests

 [ [ ] [ ] while-changes ] must-infer

-[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ]
+[ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } ]
 [ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
--- a/basis/regexp/minimize/minimize.factor
+++ b/basis/regexp/minimize/minimize.factor
@ -96,4 +96,5 @@ IN: regexp.minimize
    clone
    number-states
    combine-states
-    combine-transitions ;
+    combine-transitions
+    expand-ors ;
--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@ -56,6 +56,8 @@ ERROR: bad-class name ;
        { CHAR: z [ end-of-input <tagged-epsilon> ] }
        { CHAR: Z [ end-of-file <tagged-epsilon> ] }
        { CHAR: A [ beginning-of-input <tagged-epsilon> ] }
+        { CHAR: b [ word-break <tagged-epsilon> ] }
+        { CHAR: B [ word-break <not-class> <tagged-epsilon> ] }
        [ ]
    } case ;

@ -138,10 +140,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
                => [[ a on off parse-options <with-options> ]]
            | "?#" [^)]* => [[ f ]]
            | "?~" Alternation:a => [[ a <negation> ]]
-            | "?=" Alternation:a => [[ a t <lookahead> <tagged-epsilon> ]]
-            | "?!" Alternation:a => [[ a f <lookahead> <tagged-epsilon> ]]
-            | "?<=" Alternation:a => [[ a t <lookbehind> <tagged-epsilon> ]]
-            | "?<!" Alternation:a => [[ a f <lookbehind> <tagged-epsilon> ]]
+            | "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
+            | "?!" Alternation:a => [[ a <lookahead> <not-class> <tagged-epsilon> ]]
+            | "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
+            | "?<!" Alternation:a => [[ a <lookbehind> <not-class> <tagged-epsilon> ]]
            | Alternation

 Element = "(" Parenthized:p ")" => [[ p ]]
--- a/basis/regexp/regexp-docs.factor
+++ b/basis/regexp/regexp-docs.factor
@ -42,8 +42,8 @@ ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions
 { $subsection matches? }
 { $subsection re-contains? }
 { $subsection first-match }
-{ $subsection all-matches }
-{ $subsection re-split1 }
+{ $subsection all-matching-slices }
+{ $subsection all-matching-subseqs }
 { $subsection re-split }
 { $subsection re-replace }
 { $subsection count-matches } ;
@ -67,25 +67,21 @@ HELP: matches?
 { $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
 { $description "Tests if the string as a whole matches the given regular expression." } ;

-HELP: re-split1
-{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
-{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
-
-HELP: all-matches
+HELP: all-matching-slices
 { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
 { $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;

 HELP: count-matches
 { $values { "string" string } { "regexp" regexp } { "n" integer } }
-{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
+{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matching-slices } "." } ;

 HELP: re-split
 { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
-{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
+{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matching-slices } "." } ;

 HELP: re-replace
 { $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
-{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
+{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matching-slices } "." } ;

 HELP: first-match
 { $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -287,7 +287,7 @@ IN: regexp-tests
 [ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test

 [ { "ABC" "DEF" "GHI" } ]
-[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
+[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test

 [ 3 ]
 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
@ -431,51 +431,42 @@ IN: regexp-tests
 [ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
 [ t ] [ "foo" R/ foo/ re-contains? ] unit-test

-[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test
+[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test

-! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
-! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
-! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test
-! [ f ] [ "foo" "\\Bfoo\\B" <regexp> matches? ] unit-test
+[ t ] [ "foo" "\\bfoo\\b" <regexp> re-contains? ] unit-test
+[ t ] [ "afoob" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
+[ f ] [ "afoob" "\\bfoo\\b" <regexp> re-contains? ] unit-test
+[ f ] [ "foo" "\\Bfoo\\B" <regexp> re-contains? ] unit-test

-! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-index-head ] unit-test
-! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
-! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
-! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
-! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
-! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
+[ 3 ] [ "foo bar" "foo\\b" <regexp> first-match length ] unit-test
+[ f ] [ "fooxbar" "foo\\b" <regexp> re-contains? ] unit-test
+[ t ] [ "foo" "foo\\b" <regexp> re-contains? ] unit-test
+[ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
+[ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
+[ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test

-! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
-! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-index-head ] unit-test
-! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
-! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
-! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
-! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
+[ f ] [ "foo bar" "foo\\B" <regexp> re-contains? ] unit-test
+[ 3 ] [ "fooxbar" "foo\\B" <regexp> first-match length ] unit-test
+[ f ] [ "foo" "foo\\B" <regexp> re-contains? ] unit-test
+[ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
+[ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
+[ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test

-! [ 1 ] [ "aaacb" "a+?" <regexp> match-index-head ] unit-test
-! [ 1 ] [ "aaacb" "aa??" <regexp> match-index-head ] unit-test
-! [ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
-! [ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
-! [ 3 ] [ "aacb" "aa?c" <regexp> match-index-head ] unit-test
-! [ 3 ] [ "aacb" "aa??c" <regexp> match-index-head ] unit-test
+[ t ] [ "ab" "a(?=b*)" <regexp> re-contains? ] unit-test
+[ t ] [ "abbbbbc" "a(?=b*c)" <regexp> re-contains? ] unit-test
+[ f ] [ "abbbbb" "a(?=b*c)" <regexp> re-contains? ] unit-test
+[ t ] [ "ab" "a(?=b*)" <regexp> re-contains? ] unit-test

-! "ab" "a(?=b*)" <regexp> match
-! "abbbbbc" "a(?=b*c)" <regexp> match
-! "ab" "a(?=b*)" <regexp> match
+[ "az" ] [ "baz" "(?<=b)(az)" <regexp> first-match >string ] unit-test
+[ f ] [ "chaz" "(?<=b)(az)" <regexp> re-contains? ] unit-test
+[ "a" ] [ "cbaz" "(?<=b*)a" <regexp> first-match >string ] unit-test
+[ f ] [ "baz" "a(?<=b)" <regexp> re-contains? ] unit-test

-! "baz" "(az)(?<=b)" <regexp> first-match
-! "cbaz" "a(?<=b*)" <regexp> first-match
-! "baz" "a(?<=b)" <regexp> first-match
+[ f ] [ "baz" "(?<!b)a" <regexp> re-contains? ] unit-test
+[ t ] [ "caz" "(?<!b)a" <regexp> re-contains? ] unit-test

-! "baz" "a(?<!b)" <regexp> first-match
-! "caz" "a(?<!b)" <regexp> first-match
+[ "abcd" ] [ "abcdefg" "a(?=bcdefg)bcd" <regexp> first-match >string ] unit-test
+[ t ] [ "abcdefg" "a(?#bcdefg)bcd" <regexp> re-contains? ] unit-test
+[ t ] [ "abcdefg" "a(?:bcdefg)" <regexp> matches? ] unit-test

-! "abcdefg" "a(?=bcdefg)bcd" <regexp> first-match
-! "abcdefg" "a(?#bcdefg)bcd" <regexp> first-match
-! "abcdefg" "a(?:bcdefg)" <regexp> first-match
-
-! "caba" "a(?<=b)" <regexp> first-match
-
-! capture group 1: "aaaa"  2: ""
-! "aaaa" "(a*)(a*)" <regexp> match*
-! "aaaa" "(a*)(a+)" <regexp> match*
+[ 3 ] [ "caba" "(?<=b)a" <regexp> first-match from>> ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -1,10 +1,10 @@
 ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors combinators kernel math sequences strings sets
-assocs prettyprint.backend prettyprint.custom make lexer
-namespaces parser arrays fry locals regexp.parser splitting
-sorting regexp.ast regexp.negation regexp.compiler words
-call call.private math.ranges ;
+USING: accessors combinators kernel kernel.private math sequences
+sequences.private strings sets assocs prettyprint.backend
+prettyprint.custom make lexer namespaces parser arrays fry locals
+regexp.parser splitting sorting regexp.ast regexp.negation
+regexp.compiler words call call.private math.ranges ;
 IN: regexp

 TUPLE: regexp
@ -17,21 +17,16 @@ TUPLE: reverse-regexp < regexp ;

 <PRIVATE

-: maybe-negated ( lookaround quot -- regexp-quot )
-    '[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ; inline
-
 M: lookahead question>quot ! Returns ( index string -- ? )
-    [ ast>dfa dfa>shortest-word '[ f _ execute ] ] maybe-negated ;
+    term>> ast>dfa dfa>shortest-word '[ f _ execute ] ;

 : <reversed-option> ( ast -- reversed )
    "r" string>options <with-options> ;

 M: lookbehind question>quot ! Returns ( index string -- ? )
-    [
-        <reversed-option>
-        ast>dfa dfa>reverse-shortest-word
-        '[ [ 1- ] dip f _ execute ]
-    ] maybe-negated ;
+    term>> <reversed-option>
+    ast>dfa dfa>reverse-shortest-word
+    '[ [ 1- ] dip f _ execute ] ;

 : check-string ( string -- string )
    ! Make this configurable
@ -49,93 +44,82 @@ M: reverse-regexp end/start drop length 1- -1 swap ;
 PRIVATE>

 : matches? ( string regexp -- ? )
-    [ end/start ] 2keep
    [ check-string ] dip
+    [ end/start ] 2keep
    match-index-from
-    [ swap = ] [ drop f ] if* ;
+    [ = ] [ drop f ] if* ;

 <PRIVATE

-TUPLE: match { i read-only } { j read-only } { seq read-only } ;
-
-: match-slice ( i string quot -- match/f )
-    [ 2dup ] dip call
-    [ swap match boa ] [ 2drop f ] if* ; inline
+:: (next-match) ( i string regexp quot: ( i string regexp -- j ) reverse? -- i start end ? )
+    i string regexp quot call dup [| j |
+        j i j
+        reverse? [ swap [ 1+ ] bi@ ] when
+        string
+    ] [ drop f f f f ] if ; inline

 : search-range ( i string reverse? -- seq )
-    [ drop 0 [a,b] ] [ length [a,b) ] if ; inline
+    [ drop dup 1+ -1 ] [ length 1 ] if range boa ; inline

-: match>result ( match reverse? -- i start end string )
-    over [
-        [ [ i>> ] [ j>> tuck ] [ seq>> ] tri ] dip
-        [ [ swap [ 1+ ] bi@ ] dip ] when
-    ] [ 2drop f f f f ] if ; inline
-
-:: next-match ( i string quot reverse? -- i start end string )
+:: next-match ( i string regexp quot: ( i string regexp -- j ) reverse? -- i start end ? )
+    f f f f
    i string reverse? search-range
-    [ string quot match-slice ] map-find drop
-    reverse? match>result ; inline
+    [ [ 2drop 2drop ] dip string regexp quot reverse? (next-match) dup ] find 2drop ; inline

-: do-next-match ( i string regexp -- i start end string )
+: do-next-match ( i string regexp -- i start end ? )
    dup next-match>>
-    execute-unsafe( i string regexp -- i start end string ) ;
+    execute-unsafe( i string regexp -- i start end ? ) ; inline

-: next-slice ( i string regexp -- i/f slice/f )
-    do-next-match
-    [ slice boa ] [ drop ] if* ; inline
+:: (each-match) ( i string regexp quot: ( start end string -- ) -- )
+    i string regexp do-next-match [| i' start end |
+        start end string quot call
+        i' string regexp quot (each-match)
+    ] [ 3drop ] if ; inline recursive
+
+: prepare-match-iterator ( string regexp -- i string regexp )
+    [ check-string ] dip [ end/start nip ] 2keep ; inline

 PRIVATE>

-TUPLE: match-iterator
-    { string read-only }
-    { regexp read-only }
-    { i read-only }
-    { value read-only } ;
+: each-match ( string regexp quot: ( start end string -- ) -- )
+    [ prepare-match-iterator ] dip (each-match) ; inline

-: iterate ( iterator -- iterator'/f )
-    dup
-    [ i>> ] [ string>> ] [ regexp>> ] tri next-slice
-    [ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ]
-    [ 2drop f ] if* ;
+: map-matches ( string regexp quot: ( start end string -- obj ) -- seq )
+    accumulator [ each-match ] dip >array ; inline

-: value ( iterator/f -- value/f )
-    dup [ value>> ] when ;
+: all-matching-slices ( string regexp -- seq )
+    [ slice boa ] map-matches ;

-: <match-iterator> ( string regexp -- match-iterator )
-    [ check-string ] dip
-    2dup end/start nip f
-    match-iterator boa
-    iterate ; inline
-
-: all-matches ( string regexp -- seq )
-    <match-iterator> [ iterate ] follow [ value ] map ;
+: all-matching-subseqs ( string regexp -- seq )
+    [ subseq ] map-matches ;

 : count-matches ( string regexp -- n )
-    all-matches length ;
+    [ 0 ] 2dip [ 3drop 1+ ] each-match ;

 <PRIVATE

-:: split-slices ( string slices -- new-slices )
-    slices [ to>> ] map 0 prefix
-    slices [ from>> ] map string length suffix
-    [ string <slice> ] 2map ;
+:: (re-split) ( string regexp quot -- new-slices )
+    0 string regexp [| end start end' string |
+        end' ! leave it on the stack for the next iteration
+        end start string quot call
+    ] map-matches
+    ! Final chunk
+    swap string length string quot call suffix ; inline

 PRIVATE>

 : first-match ( string regexp -- slice/f )
-    <match-iterator> value ;
+    [ prepare-match-iterator do-next-match ] [ drop ] 2bi
+    '[ _ slice boa nip ] [ 3drop f ] if ;

 : re-contains? ( string regexp -- ? )
-    first-match >boolean ;
-
-: re-split1 ( string regexp -- before after/f )
-    dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
+    prepare-match-iterator do-next-match [ 3drop ] dip >boolean ;

 : re-split ( string regexp -- seq )
-    dupd all-matches split-slices ;
+    [ slice boa ] (re-split) ;

 : re-replace ( string regexp replacement -- result )
-    [ re-split ] dip join ;
+    [ [ subseq ] (re-split) ] dip join ;

 <PRIVATE

@ -167,8 +151,8 @@ DEFER: compile-next-match
 : compile-next-match ( regexp -- regexp )
    dup '[
        dup \ next-initial-word = [
-            drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi
-            '[ _ '[ _ _ execute ] _ next-match ]
+            drop _ [ compile-regexp dfa>> def>> ] [ reverse-regexp? ] bi
+            '[ { array-capacity string regexp } declare _ _ next-match ]
            (( i string regexp -- i start end string )) simple-define-temp
        ] when
    ] change-next-match ;
--- a/basis/regexp/transition-tables/transition-tables.factor
+++ b/basis/regexp/transition-tables/transition-tables.factor
@ -47,3 +47,15 @@ TUPLE: transition-table transitions start-state final-states ;
    [ '[ _ condition-at ] change-start-state ]
    [ '[ [ _ at ] map-set ] change-final-states ]
    [ '[ _ number-transitions ] change-transitions ] tri ;
+
+: expand-one-or ( or-class transition -- alist )
+    [ seq>> ] dip '[ _ 2array ] map ;
+
+: expand-or ( state-transitions -- new-transitions )
+    >alist [
+        first2 over or-class?
+        [ expand-one-or ] [ 2array 1array ] if
+    ] map concat >hashtable ;
+
+: expand-ors ( transition-table -- transition-table )
+    [ [ expand-or ] assoc-map ] change-transitions ;
--- a/basis/ui/gadgets/panes/panes-tests.factor
+++ b/basis/ui/gadgets/panes/panes-tests.factor
@ -2,7 +2,7 @@ USING: alien ui.gadgets.panes ui.gadgets namespaces
 kernel sequences io io.styles io.streams.string tools.test
 prettyprint definitions help help.syntax help.markup
 help.stylesheet splitting tools.test.ui models math summary
-inspector accessors help.topics ;
+inspector accessors help.topics see ;
 IN: ui.gadgets.panes.tests

 : #children "pane" get children>> length ;
--- a/basis/unicode/breaks/breaks-tests.factor
+++ b/basis/unicode/breaks/breaks-tests.factor
@ -37,3 +37,5 @@ IN: unicode.breaks.tests

 grapheme-break-test parse-test-file [ >graphemes ] test
 word-break-test parse-test-file [ >words ] test
+
+[ { t f t t f t } ] [ 6 [ "as df" word-break-at? ] map ] unit-test
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@ -228,3 +228,20 @@ PRIVATE>

 : >words ( str -- words )
    [ first-word ] >pieces ;
+
+<PRIVATE
+
+: nth-next ( i str -- str[i-1] str[i] )
+    [ [ 1- ] keep ] dip '[ _ nth ] bi@ ;
+
+PRIVATE>
+
+: word-break-at? ( i str -- ? )
+    {
+        [ drop zero? ]
+        [ length = ]
+        [
+            [ nth-next [ word-break-prop ] dip ] 2keep
+            word-break-next nip
+        ]
+    } 2|| ;
--- a/basis/xmode/code2html/code2html-tests.factor
+++ b/basis/xmode/code2html/code2html-tests.factor
@ -3,6 +3,8 @@ USING: xmode.code2html xmode.catalog
 tools.test multiline splitting memoize
 kernel io.streams.string xml.writer ;

+\ htmlize-file must-infer
+
 [ ] [ \ (load-mode) reset-memoized ] unit-test

 [ ] [
--- a/core/classes/singleton/singleton-tests.factor
+++ b/core/classes/singleton/singleton-tests.factor
@ -1,4 +1,4 @@
-USING: kernel classes.singleton tools.test prettyprint io.streams.string ;
+USING: kernel classes.singleton tools.test prettyprint io.streams.string see ;
 IN: classes.singleton.tests

 [ ] [ SINGLETON: bzzt ] unit-test
--- a/core/classes/tuple/tuple-tests.factor
+++ b/core/classes/tuple/tuple-tests.factor
@ -4,7 +4,7 @@ namespaces quotations sequences.private classes continuations
 generic.standard effects classes.tuple classes.tuple.private
 arrays vectors strings compiler.units accessors classes.algebra
 calendar prettyprint io.streams.string splitting summary
-columns math.order classes.private slots slots.private eval ;
+columns math.order classes.private slots slots.private eval see ;
 IN: classes.tuple.tests

 TUPLE: rect x y w h ;
--- a/core/classes/union/union-tests.factor
+++ b/core/classes/union/union-tests.factor
@ -4,7 +4,7 @@ tools.test vectors words quotations classes
 classes.private classes.union classes.mixin classes.predicate
 classes.algebra vectors definitions source-files
 compiler.units kernel.private sorting vocabs io.streams.string
-eval ;
+eval see ;
 IN: classes.union.tests

 ! DEFER: bah
--- a/core/generic/standard/standard-tests.factor
+++ b/core/generic/standard/standard-tests.factor
@ -5,7 +5,7 @@ specialized-arrays.double byte-arrays bit-arrays parser
 namespaces make quotations stack-checker vectors growable
 hashtables sbufs prettyprint byte-vectors bit-vectors
 specialized-vectors.double definitions generic sets graphs assocs
-grouping ;
+grouping see ;

 GENERIC: lo-tag-test ( obj -- obj' )

--- a/core/kernel/kernel-docs.factor
+++ b/core/kernel/kernel-docs.factor
@ -684,7 +684,7 @@ $nl
 "This operation is efficient and does not copy the quotation." }
 { $examples
    { $example "USING: kernel prettyprint ;" "5 [ . ] curry ." "[ 5 . ]" }
-    { $example "USING: kernel prettyprint ;" "\\ = [ see ] curry ." "[ \\ = see ]" }
+    { $example "USING: kernel prettyprint see ;" "\\ = [ see ] curry ." "[ \\ = see ]" }
    { $example "USING: kernel math prettyprint sequences ;" "{ 1 2 3 } 2 [ - ] curry map ." "{ -1 0 1 }" }
 } ;

--- a/extra/descriptive/descriptive-tests.factor
+++ b/extra/descriptive/descriptive-tests.factor
@ -1,4 +1,4 @@
-USING: descriptive kernel math tools.test continuations prettyprint io.streams.string ;
+USING: descriptive kernel math tools.test continuations prettyprint io.streams.string see ;
 IN: descriptive.tests

 DESCRIPTIVE: divide ( num denom -- fraction ) / ;
--- a/extra/multi-methods/tests/syntax.factor
+++ b/extra/multi-methods/tests/syntax.factor
@ -1,7 +1,7 @@
 IN: multi-methods.tests
 USING: multi-methods tools.test math sequences namespaces system
 kernel strings definitions prettyprint debugger arrays
-hashtables continuations classes assocs accessors ;
+hashtables continuations classes assocs accessors see ;

 GENERIC: first-test