From 8b286cea4cadbfff3b9d12a7a23c74c400d8468f Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 15:51:54 -0500 Subject: [PATCH 01/10] Adding word breaks to regexp --- basis/regexp/ast/ast.factor | 4 +-- basis/regexp/classes/classes.factor | 2 +- basis/regexp/compiler/compiler.factor | 9 ++++++- basis/regexp/parser/parser.factor | 10 +++++--- basis/regexp/regexp-tests.factor | 32 ++++++++++++------------ basis/regexp/regexp.factor | 13 +++------- basis/unicode/breaks/breaks-tests.factor | 2 ++ basis/unicode/breaks/breaks.factor | 17 +++++++++++++ 8 files changed, 56 insertions(+), 33 deletions(-) diff --git a/basis/regexp/ast/ast.factor b/basis/regexp/ast/ast.factor index 9288766888..ffaed2db62 100644 --- a/basis/regexp/ast/ast.factor +++ b/basis/regexp/ast/ast.factor @@ -58,8 +58,8 @@ M: from-to : char-class ( ranges ? -- term ) [ ] dip [ ] when ; -TUPLE: lookahead term positive? ; +TUPLE: lookahead term ; C: lookahead -TUPLE: lookbehind term positive? ; +TUPLE: lookbehind term ; C: lookbehind diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 4ddd470189..1959a91cb5 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -12,7 +12,7 @@ ascii-class punctuation-class java-printable-class blank-class control-character-class hex-digit-class java-blank-class c-identifier-class unmatchable-class terminator-class word-boundary-class ; -SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file ; +SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file word-break ; TUPLE: range from to ; C: range diff --git a/basis/regexp/compiler/compiler.factor b/basis/regexp/compiler/compiler.factor index 0e0c0eaae6..c837df0f0f 100644 --- a/basis/regexp/compiler/compiler.factor +++ b/basis/regexp/compiler/compiler.factor @@ -3,7 +3,7 @@ USING: regexp.classes kernel sequences regexp.negation quotations assocs fry math locals combinators accessors words compiler.units kernel.private strings -sequences.private arrays call namespaces +sequences.private arrays call namespaces unicode.breaks regexp.transition-tables combinators.short-circuit ; IN: regexp.compiler @@ -15,6 +15,10 @@ SYMBOL: backwards? quot drop [ 2drop t ] ; +M: f question>quot drop [ 2drop f ] ; + +M: not-class question>quot + class>> question>quot [ not ] compose ; M: beginning-of-input question>quot drop [ drop zero? ] ; @@ -36,6 +40,9 @@ M: $ question>quot M: ^ question>quot drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ; +M: word-break question>quot + drop [ word-break-at? ] ; + : (execution-quot) ( next-state -- quot ) ! The conditions here are for lookaround and anchors, etc dup condition? [ diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index adbf0c53d3..c6a69f2508 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -56,6 +56,8 @@ ERROR: bad-class name ; { CHAR: z [ end-of-input ] } { CHAR: Z [ end-of-file ] } { CHAR: A [ beginning-of-input ] } + { CHAR: b [ word-break ] } + { CHAR: B [ word-break ] } [ ] } case ; @@ -138,10 +140,10 @@ Parenthized = "?:" Alternation:a => [[ a ]] => [[ a on off parse-options ]] | "?#" [^)]* => [[ f ]] | "?~" Alternation:a => [[ a ]] - | "?=" Alternation:a => [[ a t ]] - | "?!" Alternation:a => [[ a f ]] - | "?<=" Alternation:a => [[ a t ]] - | "? [[ a f ]] + | "?=" Alternation:a => [[ a ]] + | "?!" Alternation:a => [[ a ]] + | "?<=" Alternation:a => [[ a ]] + | "? [[ a ]] | Alternation Element = "(" Parenthized:p ")" => [[ p ]] diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index e01241552d..0b94f8296d 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -433,24 +433,24 @@ IN: regexp-tests [ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test -! [ t ] [ "foo" "\\bfoo\\b" matches? ] unit-test -! [ t ] [ "afoob" "\\Bfoo\\B" matches? ] unit-test -! [ t ] [ "afoob" "\\bfoo\\b" matches? ] unit-test -! [ f ] [ "foo" "\\Bfoo\\B" matches? ] unit-test +[ t ] [ "foo" "\\bfoo\\b" re-contains? ] unit-test +[ t ] [ "afoob" "\\Bfoo\\B" re-contains? ] unit-test +[ f ] [ "afoob" "\\bfoo\\b" re-contains? ] unit-test +[ f ] [ "foo" "\\Bfoo\\B" re-contains? ] unit-test -! [ 3 ] [ "foo bar" "foo\\b" match-index-head ] unit-test -! [ f ] [ "fooxbar" "foo\\b" matches? ] unit-test -! [ t ] [ "foo" "foo\\b" matches? ] unit-test -! [ t ] [ "foo bar" "foo\\b bar" matches? ] unit-test -! [ f ] [ "fooxbar" "foo\\bxbar" matches? ] unit-test -! [ f ] [ "foo" "foo\\bbar" matches? ] unit-test +[ 3 ] [ "foo bar" "foo\\b" first-match length ] unit-test +[ f ] [ "fooxbar" "foo\\b" re-contains? ] unit-test +[ t ] [ "foo" "foo\\b" re-contains? ] unit-test +[ t ] [ "foo bar" "foo\\b bar" matches? ] unit-test +[ f ] [ "fooxbar" "foo\\bxbar" matches? ] unit-test +[ f ] [ "foo" "foo\\bbar" matches? ] unit-test -! [ f ] [ "foo bar" "foo\\B" matches? ] unit-test -! [ 3 ] [ "fooxbar" "foo\\B" match-index-head ] unit-test -! [ t ] [ "foo" "foo\\B" matches? ] unit-test -! [ f ] [ "foo bar" "foo\\B bar" matches? ] unit-test -! [ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test -! [ f ] [ "foo" "foo\\Bbar" matches? ] unit-test +[ f ] [ "foo bar" "foo\\B" re-contains? ] unit-test +[ 3 ] [ "fooxbar" "foo\\B" first-match length ] unit-test +[ f ] [ "foo" "foo\\B" re-contains? ] unit-test +[ f ] [ "foo bar" "foo\\B bar" matches? ] unit-test +[ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test +[ f ] [ "foo" "foo\\Bbar" matches? ] unit-test ! [ 1 ] [ "aaacb" "a+?" match-index-head ] unit-test ! [ 1 ] [ "aaacb" "aa??" match-index-head ] unit-test diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 7f27a13104..a7f2fa4e12 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -17,21 +17,16 @@ TUPLE: reverse-regexp < regexp ; > @ ] [ positive?>> [ ] [ not ] ? ] bi compose ; inline - M: lookahead question>quot ! Returns ( index string -- ? ) - [ ast>dfa dfa>shortest-word '[ f _ execute ] ] maybe-negated ; + term>> ast>dfa dfa>shortest-word '[ f _ execute ] ; : ( ast -- reversed ) "r" string>options ; M: lookbehind question>quot ! Returns ( index string -- ? ) - [ - - ast>dfa dfa>reverse-shortest-word - '[ [ 1- ] dip f _ execute ] - ] maybe-negated ; + term>> + ast>dfa dfa>reverse-shortest-word + '[ [ 1- ] dip f _ execute ] ; : check-string ( string -- string ) ! Make this configurable diff --git a/basis/unicode/breaks/breaks-tests.factor b/basis/unicode/breaks/breaks-tests.factor index d8e220cf18..493c2db0c2 100644 --- a/basis/unicode/breaks/breaks-tests.factor +++ b/basis/unicode/breaks/breaks-tests.factor @@ -37,3 +37,5 @@ IN: unicode.breaks.tests grapheme-break-test parse-test-file [ >graphemes ] test word-break-test parse-test-file [ >words ] test + +[ { t f t t f t } ] [ 6 [ "as df" word-break-at? ] map ] unit-test diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index ddcb99b829..f2e9454545 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -228,3 +228,20 @@ PRIVATE> : >words ( str -- words ) [ first-word ] >pieces ; + + + +: word-break-at? ( i str -- ? ) + { + [ drop zero? ] + [ length = ] + [ + [ nth-next [ word-break-prop ] dip ] 2keep + word-break-next nip + ] + } 2|| ; From 23c8b375ccdaff42d785bce058fd2b3efc7328d8 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 16:06:14 -0500 Subject: [PATCH 02/10] Uncommenting most remaining regexp unit tests --- basis/regexp/regexp-tests.factor | 37 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index 0b94f8296d..eedbcbbc4f 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -452,30 +452,21 @@ IN: regexp-tests [ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test [ f ] [ "foo" "foo\\Bbar" matches? ] unit-test -! [ 1 ] [ "aaacb" "a+?" match-index-head ] unit-test -! [ 1 ] [ "aaacb" "aa??" match-index-head ] unit-test -! [ f ] [ "aaaab" "a++ab" matches? ] unit-test -! [ t ] [ "aaacb" "a++cb" matches? ] unit-test -! [ 3 ] [ "aacb" "aa?c" match-index-head ] unit-test -! [ 3 ] [ "aacb" "aa??c" match-index-head ] unit-test +[ t ] [ "ab" "a(?=b*)" re-contains? ] unit-test +[ t ] [ "abbbbbc" "a(?=b*c)" re-contains? ] unit-test +[ f ] [ "abbbbb" "a(?=b*c)" re-contains? ] unit-test +[ t ] [ "ab" "a(?=b*)" re-contains? ] unit-test -! "ab" "a(?=b*)" match -! "abbbbbc" "a(?=b*c)" match -! "ab" "a(?=b*)" match +[ "az" ] [ "baz" "(?<=b)(az)" first-match >string ] unit-test +[ f ] [ "chaz" "(?<=b)(az)" re-contains? ] unit-test +[ "a" ] [ "cbaz" "(?<=b*)a" first-match >string ] unit-test +[ f ] [ "baz" "a(?<=b)" re-contains? ] unit-test -! "baz" "(az)(?<=b)" first-match -! "cbaz" "a(?<=b*)" first-match -! "baz" "a(?<=b)" first-match +[ f ] [ "baz" "(? re-contains? ] unit-test +[ t ] [ "caz" "(? re-contains? ] unit-test -! "baz" "a(? first-match -! "caz" "a(? first-match +[ "abcd" ] [ "abcdefg" "a(?=bcdefg)bcd" first-match >string ] unit-test +[ t ] [ "abcdefg" "a(?#bcdefg)bcd" re-contains? ] unit-test +[ t ] [ "abcdefg" "a(?:bcdefg)" matches? ] unit-test -! "abcdefg" "a(?=bcdefg)bcd" first-match -! "abcdefg" "a(?#bcdefg)bcd" first-match -! "abcdefg" "a(?:bcdefg)" first-match - -! "caba" "a(?<=b)" first-match - -! capture group 1: "aaaa" 2: "" -! "aaaa" "(a*)(a*)" match* -! "aaaa" "(a*)(a+)" match* +[ 3 ] [ "caba" "(?<=b)a" first-match from>> ] unit-test From 643da5f073e42af8495fd9c73fd82a07124164f5 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 11 Mar 2009 16:21:29 -0500 Subject: [PATCH 03/10] Remove match iterators for a performance boost --- basis/regexp/regexp-docs.factor | 16 ++---- basis/regexp/regexp-tests.factor | 4 +- basis/regexp/regexp.factor | 97 ++++++++++++++++---------------- 3 files changed, 55 insertions(+), 62 deletions(-) diff --git a/basis/regexp/regexp-docs.factor b/basis/regexp/regexp-docs.factor index d31b185b2f..adbeb341bb 100644 --- a/basis/regexp/regexp-docs.factor +++ b/basis/regexp/regexp-docs.factor @@ -42,8 +42,8 @@ ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions { $subsection matches? } { $subsection re-contains? } { $subsection first-match } -{ $subsection all-matches } -{ $subsection re-split1 } +{ $subsection all-matching-slices } +{ $subsection all-matching-subseqs } { $subsection re-split } { $subsection re-replace } { $subsection count-matches } ; @@ -67,25 +67,21 @@ HELP: matches? { $values { "string" string } { "regexp" regexp } { "?" "a boolean" } } { $description "Tests if the string as a whole matches the given regular expression." } ; -HELP: re-split1 -{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } } -{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ; - -HELP: all-matches +HELP: all-matching-slices { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } } { $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ; HELP: count-matches { $values { "string" string } { "regexp" regexp } { "n" integer } } -{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ; +{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matching-slices } "." } ; HELP: re-split { $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } } -{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ; +{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matching-slices } "." } ; HELP: re-replace { $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } } -{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ; +{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matching-slices } "." } ; HELP: first-match { $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } } diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index e01241552d..c6d1487d5a 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -287,7 +287,7 @@ IN: regexp-tests [ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test [ { "ABC" "DEF" "GHI" } ] -[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test +[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test [ 3 ] [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test @@ -431,7 +431,7 @@ IN: regexp-tests [ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test [ t ] [ "foo" R/ foo/ re-contains? ] unit-test -[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test +[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test ! [ t ] [ "foo" "\\bfoo\\b" matches? ] unit-test ! [ t ] [ "afoob" "\\Bfoo\\B" matches? ] unit-test diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 7f27a13104..e385c515ef 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -49,93 +49,90 @@ M: reverse-regexp end/start drop length 1- -1 swap ; PRIVATE> : matches? ( string regexp -- ? ) - [ end/start ] 2keep [ check-string ] dip + [ end/start ] 2keep match-index-from - [ swap = ] [ drop f ] if* ; + [ = ] [ drop f ] if* ; ( i string quot: ( i string -- i seq j ) reverse? -- match/f ) + i string quot call dup [| j | + j i j + reverse? [ swap [ 1+ ] bi@ ] when + string match boa + ] when ; inline : search-range ( i string reverse? -- seq ) [ drop 0 [a,b] ] [ length [a,b) ] if ; inline -: match>result ( match reverse? -- i start end string ) - over [ - [ [ i>> ] [ j>> tuck ] [ seq>> ] tri ] dip - [ [ swap [ 1+ ] bi@ ] dip ] when - ] [ 2drop f f f f ] if ; inline +: match>result ( match -- i start end string ) + dup + [ { [ i>> ] [ start>> ] [ end>> ] [ string>> ] } cleave ] + [ drop f f f f ] + if ; inline -:: next-match ( i string quot reverse? -- i start end string ) +:: next-match ( i string quot reverse? -- i start end ? ) i string reverse? search-range - [ string quot match-slice ] map-find drop - reverse? match>result ; inline + [ string quot reverse? ] map-find drop + match>result ; inline -: do-next-match ( i string regexp -- i start end string ) +: do-next-match ( i string regexp -- i start end ? ) dup next-match>> - execute-unsafe( i string regexp -- i start end string ) ; + execute-unsafe( i string regexp -- i start end ? ) ; inline -: next-slice ( i string regexp -- i/f slice/f ) - do-next-match - [ slice boa ] [ drop ] if* ; inline +:: (each-match) ( i string regexp quot: ( start end string -- ) -- ) + i string regexp do-next-match [| i' start end | + start end string quot call + i' string regexp quot (each-match) + ] [ 3drop ] if ; inline recursive PRIVATE> -TUPLE: match-iterator - { string read-only } - { regexp read-only } - { i read-only } - { value read-only } ; +: prepare-match-iterator ( string regexp -- i string regexp ) + [ check-string ] dip [ end/start nip ] 2keep ; inline -: iterate ( iterator -- iterator'/f ) - dup - [ i>> ] [ string>> ] [ regexp>> ] tri next-slice - [ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ] - [ 2drop f ] if* ; +: each-match ( string regexp quot: ( start end string -- ) -- ) + [ prepare-match-iterator ] dip (each-match) ; inline -: value ( iterator/f -- value/f ) - dup [ value>> ] when ; +: map-matches ( string regexp quot: ( start end string -- obj ) -- seq ) + accumulator [ each-match ] dip >array ; inline -: ( string regexp -- match-iterator ) - [ check-string ] dip - 2dup end/start nip f - match-iterator boa - iterate ; inline +: all-matching-slices ( string regexp -- seq ) + [ slice boa ] map-matches ; -: all-matches ( string regexp -- seq ) - [ iterate ] follow [ value ] map ; +: all-matching-subseqs ( string regexp -- seq ) + [ subseq ] map-matches ; : count-matches ( string regexp -- n ) - all-matches length ; + [ 0 ] 2dip [ 3drop 1+ ] each-match ; > ] map 0 prefix - slices [ from>> ] map string length suffix - [ string ] 2map ; +:: (re-split) ( string regexp quot -- new-slices ) + 0 string regexp [| end start end' string | + end' ! leave it on the stack for the next iteration + end start string quot call + ] map-matches + ! Final chunk + swap string length string quot call suffix ; inline PRIVATE> : first-match ( string regexp -- slice/f ) - value ; + [ prepare-match-iterator do-next-match ] [ drop ] 2bi + '[ _ slice boa nip ] [ 3drop f ] if ; : re-contains? ( string regexp -- ? ) - first-match >boolean ; - -: re-split1 ( string regexp -- before after/f ) - dupd first-match [ 1array split-slices first2 ] [ f ] if* ; + prepare-match-iterator do-next-match [ 3drop ] dip >boolean ; : re-split ( string regexp -- seq ) - dupd all-matches split-slices ; + [ slice boa ] (re-split) ; : re-replace ( string regexp replacement -- result ) - [ re-split ] dip join ; + [ [ subseq ] (re-split) ] dip join ; Date: Wed, 11 Mar 2009 16:36:53 -0500 Subject: [PATCH 04/10] Get rid of match tuple --- basis/regexp/regexp.factor | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index e385c515ef..778421b20d 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -56,28 +56,20 @@ PRIVATE> ( i string quot: ( i string -- i seq j ) reverse? -- match/f ) - i string quot call dup [| j | +:: (next-match) ( i string regexp word: ( i string -- j ) reverse? -- i start end ? ) + i string regexp word execute dup [| j | j i j reverse? [ swap [ 1+ ] bi@ ] when - string match boa - ] when ; inline + string + ] [ drop f f f f ] if ; inline : search-range ( i string reverse? -- seq ) [ drop 0 [a,b] ] [ length [a,b) ] if ; inline -: match>result ( match -- i start end string ) - dup - [ { [ i>> ] [ start>> ] [ end>> ] [ string>> ] } cleave ] - [ drop f f f f ] - if ; inline - -:: next-match ( i string quot reverse? -- i start end ? ) +:: next-match ( i string regexp word reverse? -- i start end ? ) + f f f f i string reverse? search-range - [ string quot reverse? ] map-find drop - match>result ; inline + [ [ 2drop 2drop ] dip string regexp word reverse? (next-match) dup ] find 2drop ; inline : do-next-match ( i string regexp -- i start end ? ) dup next-match>> @@ -89,11 +81,11 @@ TUPLE: match { i read-only } { start read-only } { end read-only } { string read i' string regexp quot (each-match) ] [ 3drop ] if ; inline recursive -PRIVATE> - : prepare-match-iterator ( string regexp -- i string regexp ) [ check-string ] dip [ end/start nip ] 2keep ; inline +PRIVATE> + : each-match ( string regexp quot: ( start end string -- ) -- ) [ prepare-match-iterator ] dip (each-match) ; inline @@ -165,7 +157,7 @@ DEFER: compile-next-match dup '[ dup \ next-initial-word = [ drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi - '[ _ '[ _ _ execute ] _ next-match ] + '[ _ _ next-match ] (( i string regexp -- i start end string )) simple-define-temp ] when ] change-next-match ; From 18ca3b34190c71de6af50443bec5c4daa5e49d44 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 11 Mar 2009 16:53:44 -0500 Subject: [PATCH 05/10] Add some declarations so that next-match is faster --- basis/regexp/regexp.factor | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 778421b20d..ab6accb120 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -1,10 +1,10 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors combinators kernel math sequences strings sets -assocs prettyprint.backend prettyprint.custom make lexer -namespaces parser arrays fry locals regexp.parser splitting -sorting regexp.ast regexp.negation regexp.compiler words -call call.private math.ranges ; +USING: accessors combinators kernel kernel.private math sequences +sequences.private strings sets assocs prettyprint.backend +prettyprint.custom make lexer namespaces parser arrays fry locals +regexp.parser splitting sorting regexp.ast regexp.negation +regexp.compiler words call call.private math.ranges ; IN: regexp TUPLE: regexp @@ -56,7 +56,7 @@ PRIVATE> ] [ drop f f f f ] if ; inline : search-range ( i string reverse? -- seq ) - [ drop 0 [a,b] ] [ length [a,b) ] if ; inline + [ drop dup 1+ -1 ] [ length 1 ] if range boa ; inline :: next-match ( i string regexp word reverse? -- i start end ? ) f f f f @@ -157,7 +157,7 @@ DEFER: compile-next-match dup '[ dup \ next-initial-word = [ drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi - '[ _ _ next-match ] + '[ { array-capacity string regexp } declare _ _ next-match ] (( i string regexp -- i start end string )) simple-define-temp ] when ] change-next-match ; From 034bda42caede36f3afe415940cabd0331caaef3 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 11 Mar 2009 17:06:45 -0500 Subject: [PATCH 06/10] Inline initial state in next-match loop --- basis/regexp/regexp.factor | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 22c7e2474f..29f7e3e84e 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -51,8 +51,8 @@ PRIVATE> : search-range ( i string reverse? -- seq ) [ drop dup 1+ -1 ] [ length 1 ] if range boa ; inline -:: next-match ( i string regexp word reverse? -- i start end ? ) +:: next-match ( i string regexp quot: ( i string regexp -- j ) reverse? -- i start end ? ) f f f f i string reverse? search-range - [ [ 2drop 2drop ] dip string regexp word reverse? (next-match) dup ] find 2drop ; inline + [ [ 2drop 2drop ] dip string regexp quot reverse? (next-match) dup ] find 2drop ; inline : do-next-match ( i string regexp -- i start end ? ) dup next-match>> @@ -151,7 +151,7 @@ DEFER: compile-next-match : compile-next-match ( regexp -- regexp ) dup '[ dup \ next-initial-word = [ - drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi + drop _ [ compile-regexp dfa>> def>> ] [ reverse-regexp? ] bi '[ { array-capacity string regexp } declare _ _ next-match ] (( i string regexp -- i start end string )) simple-define-temp ] when From 667eca941099c6cce01d8dde4220dc9595d6d843 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 11 Mar 2009 17:33:54 -0500 Subject: [PATCH 07/10] Fix unit tests and help lint for 'see' move --- basis/delegate/delegate-tests.factor | 2 +- .../help/definitions/definitions-tests.factor | 2 +- basis/inspector/inspector-tests.factor | 2 +- basis/locals/locals-tests.factor | 2 +- basis/macros/macros-tests.factor | 2 +- basis/memoize/memoize-tests.factor | 2 +- basis/opengl/textures/textures-tests.factor | 22 +++++++++++-------- basis/ui/gadgets/panes/panes-tests.factor | 2 +- core/classes/singleton/singleton-tests.factor | 2 +- core/classes/tuple/tuple-tests.factor | 2 +- core/classes/union/union-tests.factor | 2 +- core/generic/standard/standard-tests.factor | 2 +- core/kernel/kernel-docs.factor | 2 +- extra/descriptive/descriptive-tests.factor | 2 +- extra/multi-methods/tests/syntax.factor | 2 +- 15 files changed, 27 insertions(+), 23 deletions(-) diff --git a/basis/delegate/delegate-tests.factor b/basis/delegate/delegate-tests.factor index e2bea82e68..9bf07a5330 100644 --- a/basis/delegate/delegate-tests.factor +++ b/basis/delegate/delegate-tests.factor @@ -1,7 +1,7 @@ USING: delegate kernel arrays tools.test words math definitions compiler.units parser generic prettyprint io.streams.string accessors eval multiline generic.standard delegate.protocols -delegate.private assocs ; +delegate.private assocs see ; IN: delegate.tests TUPLE: hello this that ; diff --git a/basis/help/definitions/definitions-tests.factor b/basis/help/definitions/definitions-tests.factor index d95f6988a2..5d83afae88 100644 --- a/basis/help/definitions/definitions-tests.factor +++ b/basis/help/definitions/definitions-tests.factor @@ -1,6 +1,6 @@ USING: math definitions help.topics help tools.test prettyprint parser io.streams.string kernel source-files -assocs namespaces words io sequences eval accessors ; +assocs namespaces words io sequences eval accessors see ; IN: help.definitions.tests [ ] [ \ + >link see ] unit-test diff --git a/basis/inspector/inspector-tests.factor b/basis/inspector/inspector-tests.factor index 4ce549ac83..3f3e7f13df 100644 --- a/basis/inspector/inspector-tests.factor +++ b/basis/inspector/inspector-tests.factor @@ -8,7 +8,7 @@ f describe H{ } describe H{ } describe -[ "fixnum instance\n" ] [ [ 3 describe ] with-string-writer ] unit-test +[ "fixnum instance\n\n" ] [ [ 3 describe ] with-string-writer ] unit-test [ ] [ H{ } clone inspect ] unit-test diff --git a/basis/locals/locals-tests.factor b/basis/locals/locals-tests.factor index 923f890adf..558fa78494 100644 --- a/basis/locals/locals-tests.factor +++ b/basis/locals/locals-tests.factor @@ -2,7 +2,7 @@ USING: locals math sequences tools.test hashtables words kernel namespaces arrays strings prettyprint io.streams.string parser accessors generic eval combinators combinators.short-circuit combinators.short-circuit.smart math.order math.functions -definitions compiler.units fry lexer words.symbol ; +definitions compiler.units fry lexer words.symbol see ; IN: locals.tests :: foo ( a b -- a a ) a a ; diff --git a/basis/macros/macros-tests.factor b/basis/macros/macros-tests.factor index 7b061ab2f5..7d93ce8a9e 100644 --- a/basis/macros/macros-tests.factor +++ b/basis/macros/macros-tests.factor @@ -1,6 +1,6 @@ IN: macros.tests USING: tools.test macros math kernel arrays -vectors io.streams.string prettyprint parser eval ; +vectors io.streams.string prettyprint parser eval see ; MACRO: see-test ( a b -- c ) + ; diff --git a/basis/memoize/memoize-tests.factor b/basis/memoize/memoize-tests.factor index 168a0061e3..54378bd37e 100644 --- a/basis/memoize/memoize-tests.factor +++ b/basis/memoize/memoize-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: math kernel memoize tools.test parser generalizations -prettyprint io.streams.string sequences eval namespaces ; +prettyprint io.streams.string sequences eval namespaces see ; IN: memoize.tests MEMO: fib ( m -- n ) diff --git a/basis/opengl/textures/textures-tests.factor b/basis/opengl/textures/textures-tests.factor index 45b1d8f706..7141caa67d 100644 --- a/basis/opengl/textures/textures-tests.factor +++ b/basis/opengl/textures/textures-tests.factor @@ -5,15 +5,19 @@ images kernel namespaces ; IN: opengl.textures.tests [ ] [ - { 3 5 } - RGB - B{ - 1 2 3 4 5 6 7 8 9 - 10 11 12 13 14 15 16 17 18 - 19 20 21 22 23 24 25 26 27 - 28 29 30 31 32 33 34 35 36 - 37 38 39 40 41 42 43 44 45 - } image boa "image" set + T{ image + { dim { 3 5 } } + { component-order RGB } + { bitmap + B{ + 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 + 19 20 21 22 23 24 25 26 27 + 28 29 30 31 32 33 34 35 36 + 37 38 39 40 41 42 43 44 45 + } + } + } "image" set ] unit-test [ diff --git a/basis/ui/gadgets/panes/panes-tests.factor b/basis/ui/gadgets/panes/panes-tests.factor index e486bffd38..2947ce242d 100644 --- a/basis/ui/gadgets/panes/panes-tests.factor +++ b/basis/ui/gadgets/panes/panes-tests.factor @@ -2,7 +2,7 @@ USING: alien ui.gadgets.panes ui.gadgets namespaces kernel sequences io io.styles io.streams.string tools.test prettyprint definitions help help.syntax help.markup help.stylesheet splitting tools.test.ui models math summary -inspector accessors help.topics ; +inspector accessors help.topics see ; IN: ui.gadgets.panes.tests : #children "pane" get children>> length ; diff --git a/core/classes/singleton/singleton-tests.factor b/core/classes/singleton/singleton-tests.factor index 10ddde75ae..d9011ad776 100644 --- a/core/classes/singleton/singleton-tests.factor +++ b/core/classes/singleton/singleton-tests.factor @@ -1,4 +1,4 @@ -USING: kernel classes.singleton tools.test prettyprint io.streams.string ; +USING: kernel classes.singleton tools.test prettyprint io.streams.string see ; IN: classes.singleton.tests [ ] [ SINGLETON: bzzt ] unit-test diff --git a/core/classes/tuple/tuple-tests.factor b/core/classes/tuple/tuple-tests.factor index d221d28da9..f27d24e39d 100644 --- a/core/classes/tuple/tuple-tests.factor +++ b/core/classes/tuple/tuple-tests.factor @@ -4,7 +4,7 @@ namespaces quotations sequences.private classes continuations generic.standard effects classes.tuple classes.tuple.private arrays vectors strings compiler.units accessors classes.algebra calendar prettyprint io.streams.string splitting summary -columns math.order classes.private slots slots.private eval ; +columns math.order classes.private slots slots.private eval see ; IN: classes.tuple.tests TUPLE: rect x y w h ; diff --git a/core/classes/union/union-tests.factor b/core/classes/union/union-tests.factor index 97baf08874..0802c0a2d9 100644 --- a/core/classes/union/union-tests.factor +++ b/core/classes/union/union-tests.factor @@ -4,7 +4,7 @@ tools.test vectors words quotations classes classes.private classes.union classes.mixin classes.predicate classes.algebra vectors definitions source-files compiler.units kernel.private sorting vocabs io.streams.string -eval ; +eval see ; IN: classes.union.tests ! DEFER: bah diff --git a/core/generic/standard/standard-tests.factor b/core/generic/standard/standard-tests.factor index 516d408933..2cd64ac9f4 100644 --- a/core/generic/standard/standard-tests.factor +++ b/core/generic/standard/standard-tests.factor @@ -5,7 +5,7 @@ specialized-arrays.double byte-arrays bit-arrays parser namespaces make quotations stack-checker vectors growable hashtables sbufs prettyprint byte-vectors bit-vectors specialized-vectors.double definitions generic sets graphs assocs -grouping ; +grouping see ; GENERIC: lo-tag-test ( obj -- obj' ) diff --git a/core/kernel/kernel-docs.factor b/core/kernel/kernel-docs.factor index 9c5d6f56ea..c178573a0a 100644 --- a/core/kernel/kernel-docs.factor +++ b/core/kernel/kernel-docs.factor @@ -684,7 +684,7 @@ $nl "This operation is efficient and does not copy the quotation." } { $examples { $example "USING: kernel prettyprint ;" "5 [ . ] curry ." "[ 5 . ]" } - { $example "USING: kernel prettyprint ;" "\\ = [ see ] curry ." "[ \\ = see ]" } + { $example "USING: kernel prettyprint see ;" "\\ = [ see ] curry ." "[ \\ = see ]" } { $example "USING: kernel math prettyprint sequences ;" "{ 1 2 3 } 2 [ - ] curry map ." "{ -1 0 1 }" } } ; diff --git a/extra/descriptive/descriptive-tests.factor b/extra/descriptive/descriptive-tests.factor index 1582ca895d..755c57ceda 100755 --- a/extra/descriptive/descriptive-tests.factor +++ b/extra/descriptive/descriptive-tests.factor @@ -1,4 +1,4 @@ -USING: descriptive kernel math tools.test continuations prettyprint io.streams.string ; +USING: descriptive kernel math tools.test continuations prettyprint io.streams.string see ; IN: descriptive.tests DESCRIPTIVE: divide ( num denom -- fraction ) / ; diff --git a/extra/multi-methods/tests/syntax.factor b/extra/multi-methods/tests/syntax.factor index 597a1cebeb..9d9c80b214 100644 --- a/extra/multi-methods/tests/syntax.factor +++ b/extra/multi-methods/tests/syntax.factor @@ -1,7 +1,7 @@ IN: multi-methods.tests USING: multi-methods tools.test math sequences namespaces system kernel strings definitions prettyprint debugger arrays -hashtables continuations classes assocs accessors ; +hashtables continuations classes assocs accessors see ; GENERIC: first-test From e70748f8f10a2c5ea5a02e9facbd4650b73dbbdd Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 19:39:35 -0500 Subject: [PATCH 08/10] Redoing class algebra so conjunction works --- basis/regexp/classes/classes-tests.factor | 8 +- basis/regexp/classes/classes.factor | 170 ++++++++++-------- .../combinators/combinators-tests.factor | 4 - basis/regexp/minimize/minimize-tests.factor | 2 +- 4 files changed, 101 insertions(+), 83 deletions(-) diff --git a/basis/regexp/classes/classes-tests.factor b/basis/regexp/classes/classes-tests.factor index 2deb944b61..e2db86f6c1 100644 --- a/basis/regexp/classes/classes-tests.factor +++ b/basis/regexp/classes/classes-tests.factor @@ -6,7 +6,7 @@ IN: regexp.classes.tests ! Class algebra [ f ] [ { 1 2 } ] unit-test -[ T{ or-class f { 2 1 } } ] [ { 1 2 } ] unit-test +[ T{ or-class f { 1 2 } } ] [ { 1 2 } ] unit-test [ 3 ] [ { 1 2 } 3 2array ] unit-test [ CHAR: A ] [ CHAR: A LETTER-class 2array ] unit-test [ CHAR: A ] [ LETTER-class CHAR: A 2array ] unit-test @@ -26,11 +26,13 @@ IN: regexp.classes.tests [ t ] [ { t t } ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test [ T{ primitive-class { class letter-class } } ] [ letter-class dup 2array ] unit-test -[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } { 2 3 } 2array ] unit-test -[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } 1 2array ] unit-test +[ T{ or-class { seq { 1 2 3 } } } ] [ { 1 2 } { 2 3 } 2array ] unit-test +[ T{ or-class { seq { 2 3 } } } ] [ { 2 3 } 1 2array ] unit-test [ f ] [ t ] unit-test [ t ] [ f ] unit-test [ f ] [ 1 1 t answer ] unit-test +[ t ] [ { 1 2 } 1 2 3array ] unit-test +[ f ] [ { 1 2 } 1 2 3array ] unit-test ! Making classes into nested conditionals diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 1959a91cb5..d26ff7f69c 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel math math.order words combinators locals ascii unicode.categories combinators.short-circuit sequences -fry macros arrays assocs sets classes ; +fry macros arrays assocs sets classes mirrors ; IN: regexp.classes SINGLETONS: any-char any-char-no-nl @@ -110,97 +110,116 @@ M: f class-member? 2drop f ; TUPLE: primitive-class class ; C: primitive-class +TUPLE: not-class class ; + +PREDICATE: not-integer < not-class class>> integer? ; +PREDICATE: not-primitive < not-class class>> primitive-class? ; + +M: not-class class-member? + class>> class-member? not ; + TUPLE: or-class seq ; -TUPLE: not-class class ; +M: or-class class-member? + seq>> [ class-member? ] with any? ; TUPLE: and-class seq ; -GENERIC: combine-and ( class1 class2 -- combined ? ) +M: and-class class-member? + seq>> [ class-member? ] with all? ; -: replace-if-= ( object object -- object ? ) - over = ; - -M: object combine-and replace-if-= ; - -M: t combine-and - drop t ; - -M: f combine-and - nip t ; - -M: not-class combine-and - class>> 2dup = [ 2drop f t ] [ - dup integer? [ - 2dup swap class-member? - [ 2drop f f ] - [ drop t ] if - ] [ 2drop f f ] if - ] if ; - -M: integer combine-and - swap 2dup class-member? [ drop t ] [ 2drop f t ] if ; - -GENERIC: combine-or ( class1 class2 -- combined ? ) - -M: object combine-or replace-if-= ; - -M: t combine-or - nip t ; - -M: f combine-or - drop t ; - -M: not-class combine-or - class>> = [ t t ] [ f f ] if ; - -M: integer combine-or - 2dup swap class-member? [ drop t ] [ 2drop f f ] if ; +DEFER: substitute : flatten ( seq class -- newseq ) '[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline -: try-combine ( elt1 elt2 quot -- combined/f ? ) - 3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline - -DEFER: answer - -:: try-cancel ( elt1 elt2 empty -- combined/f ? ) - [ elt1 elt2 empty answer dup elt1 = not ] try-combine ; - -:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq ) - f :> combined! - seq [ elt quot call swap combined! ] find drop - [ seq remove-nth combined prefix ] - [ seq elt prefix ] if* ; inline - -: combine-by ( seq quot -- new-seq ) - { } swap '[ _ prefix-combining ] reduce ; inline - :: seq>instance ( seq empty class -- instance ) seq length { { 0 [ empty ] } { 1 [ seq first ] } - [ drop class new seq >>seq ] + [ drop class new seq { } like >>seq ] } case ; inline -:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq ) - seq class flatten - [ quot try-combine ] combine-by - ! [ empty try-cancel ] combine-by ! This makes the algorithm O(n^4) - empty class seq>instance ; inline +TUPLE: class-partition integers not-integers primitives not-primitives and or other ; + +: partition-classes ( seq -- class-partition ) + prune + [ integer? ] partition + [ not-integer? ] partition + [ primitive-class? ] partition ! extend primitive-class to epsilon tags + [ not-primitive? ] partition + [ and-class? ] partition + [ or-class? ] partition + class-partition boa ; + +: class-partition>seq ( class-partition -- seq ) + make-mirror values concat ; + +: repartition ( partition -- partition' ) + ! This could be made more efficient; only and and or are effected + class-partition>seq partition-classes ; + +: filter-not-integers ( partition -- partition' ) + dup + [ primitives>> ] [ not-primitives>> ] [ or>> ] tri + 3append and-class boa + '[ [ class>> _ class-member? ] filter ] change-not-integers ; + +: answer-ors ( partition -- partition' ) + dup [ not-integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + '[ [ _ [ t substitute ] each ] map ] change-or ; + +: contradiction? ( partition -- ? ) + { + [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ other>> f swap member? ] + } 1|| ; + +: make-and-class ( partition -- and-class ) + answer-ors repartition + [ t swap remove ] change-other + dup contradiction? + [ drop f ] + [ filter-not-integers class-partition>seq prune t and-class seq>instance ] if ; : ( seq -- class ) - [ combine-and ] t and-class combine ; + dup and-class flatten partition-classes + dup integers>> length { + { 0 [ nip make-and-class ] } + { 1 [ integers>> first [ '[ _ swap class-member? ] all? ] keep and ] } + [ 3drop f ] + } case ; -M: and-class class-member? - seq>> [ class-member? ] with all? ; +: filter-integers ( partition -- partition' ) + dup + [ primitives>> ] [ not-primitives>> ] [ and>> ] tri + 3append or-class boa + '[ [ _ class-member? not ] filter ] change-integers ; + +: answer-ands ( partition -- partition' ) + dup [ integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + '[ [ _ [ f substitute ] each ] map ] change-and ; + +: tautology? ( partition -- ? ) + { + [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ other>> t swap member? ] + } 1|| ; + +: make-or-class ( partition -- and-class ) + answer-ands repartition + [ f swap remove ] change-other + dup tautology? + [ drop t ] + [ filter-integers class-partition>seq prune f or-class seq>instance ] if ; : ( seq -- class ) - [ combine-or ] f or-class combine ; - -M: or-class class-member? - seq>> [ class-member? ] with any? ; + dup or-class flatten partition-classes + dup not-integers>> length { + { 0 [ nip make-or-class ] } + { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] } + [ 3drop t ] + } case ; GENERIC: ( class -- inverse ) @@ -219,9 +238,6 @@ M: or-class M: t drop f ; M: f drop t ; -M: not-class class-member? - class>> class-member? not ; - M: primitive-class class-member? class>> class-member? ; @@ -247,8 +263,12 @@ M: or-class answer M: not-class answer [ class>> ] 2dip answer ; +GENERIC# substitute 1 ( class from to -- new-class ) +M: object substitute answer ; +M: not-class substitute [ ] bi@ answer ; + : assoc-answer ( table question answer -- new-table ) - '[ _ _ answer ] assoc-map + '[ _ _ substitute ] assoc-map [ nip ] assoc-filter ; : assoc-answers ( table questions answer -- new-table ) diff --git a/basis/regexp/combinators/combinators-tests.factor b/basis/regexp/combinators/combinators-tests.factor index ddfd0dcaad..85fa190bfe 100644 --- a/basis/regexp/combinators/combinators-tests.factor +++ b/basis/regexp/combinators/combinators-tests.factor @@ -9,9 +9,6 @@ IN: regexp.combinators.tests [ t t t ] [ "foo" "bar" "baz" [ strings matches? ] tri@ ] unit-test [ f f f ] [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test -USE: multiline -/* -! Why is conjuction broken? : conj ( -- regexp ) { R' .*a' R' b.*' } ; @@ -22,7 +19,6 @@ USE: multiline [ f ] [ "bljhasflsda" conj matches? ] unit-test [ t ] [ "bsdfdfs" conj matches? ] unit-test [ t ] [ "fsfa" conj matches? ] unit-test -*/ [ f f ] [ "" "hi" [ matches? ] bi@ ] unit-test [ t t ] [ "" "hi" [ matches? ] bi@ ] unit-test diff --git a/basis/regexp/minimize/minimize-tests.factor b/basis/regexp/minimize/minimize-tests.factor index a7a9b50327..17a1d51b88 100644 --- a/basis/regexp/minimize/minimize-tests.factor +++ b/basis/regexp/minimize/minimize-tests.factor @@ -54,5 +54,5 @@ IN: regexp.minimize.tests [ [ ] [ ] while-changes ] must-infer -[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ] +[ H{ { T{ or-class f { 2 1 } } 3 } { 4 5 } } ] [ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test From 03f048cce9c0ed0e5ce37b078983ea14657d8897 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 11 Mar 2009 21:51:39 -0500 Subject: [PATCH 09/10] Add a couple of must-infer tests --- basis/html/components/components-tests.factor | 2 ++ basis/xmode/code2html/code2html-tests.factor | 2 ++ 2 files changed, 4 insertions(+) diff --git a/basis/html/components/components-tests.factor b/basis/html/components/components-tests.factor index 410c3ce223..0b85455c2e 100644 --- a/basis/html/components/components-tests.factor +++ b/basis/html/components/components-tests.factor @@ -4,6 +4,8 @@ io.streams.null accessors inspector html.streams html.components html.forms namespaces xml.writer ; +\ render must-infer + [ ] [ begin-form ] unit-test [ ] [ 3 "hi" set-value ] unit-test diff --git a/basis/xmode/code2html/code2html-tests.factor b/basis/xmode/code2html/code2html-tests.factor index c0b8a1b560..241ab7ff75 100644 --- a/basis/xmode/code2html/code2html-tests.factor +++ b/basis/xmode/code2html/code2html-tests.factor @@ -3,6 +3,8 @@ USING: xmode.code2html xmode.catalog tools.test multiline splitting memoize kernel io.streams.string xml.writer ; +\ htmlize-file must-infer + [ ] [ \ (load-mode) reset-memoized ] unit-test [ ] [ From 1ca2e8196be8a9f1d681e73c0773717455305a11 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 22:04:47 -0500 Subject: [PATCH 10/10] Making regexp generate less class algebra --- basis/regexp/compiler/compiler.factor | 11 +---------- basis/regexp/disambiguate/disambiguate.factor | 5 ++--- basis/regexp/minimize/minimize.factor | 3 ++- .../transition-tables/transition-tables.factor | 12 ++++++++++++ 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/basis/regexp/compiler/compiler.factor b/basis/regexp/compiler/compiler.factor index c837df0f0f..186d683f82 100644 --- a/basis/regexp/compiler/compiler.factor +++ b/basis/regexp/compiler/compiler.factor @@ -77,17 +77,8 @@ C: box : literals>cases ( literal-transitions -- case-body ) [ execution-quot ] assoc-map ; -: expand-one-or ( or-class transition -- alist ) - [ seq>> ] dip '[ _ 2array ] map ; - -: expand-or ( alist -- new-alist ) - [ - first2 over or-class? - [ expand-one-or ] [ 2array 1array ] if - ] map concat ; - : split-literals ( transitions -- case default ) - >alist expand-or [ first integer? ] partition + { } assoc-like [ first integer? ] partition [ [ literals>cases ] keep ] dip non-literals>dispatch ; :: step ( last-match index str quot final? direction -- last-index/f ) diff --git a/basis/regexp/disambiguate/disambiguate.factor b/basis/regexp/disambiguate/disambiguate.factor index eac9c7e81d..67b1503f9b 100644 --- a/basis/regexp/disambiguate/disambiguate.factor +++ b/basis/regexp/disambiguate/disambiguate.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors regexp.classes math.bits assocs sequences -arrays sets regexp.dfa math fry regexp.minimize regexp.ast ; +arrays sets regexp.dfa math fry regexp.minimize regexp.ast regexp.transition-tables ; IN: regexp.disambiguate TUPLE: parts in out ; @@ -32,9 +32,8 @@ TUPLE: parts in out ; : preserving-epsilon ( state-transitions quot -- new-state-transitions ) [ [ drop tagged-epsilon? ] assoc-filter ] bi assoc-union H{ } assoc-like ; inline - : disambiguate ( nfa -- nfa ) - [ + expand-ors [ dup new-transitions '[ [ _ swap '[ _ get-transitions ] assoc-map diff --git a/basis/regexp/minimize/minimize.factor b/basis/regexp/minimize/minimize.factor index bdb53c51cb..1885144e6c 100644 --- a/basis/regexp/minimize/minimize.factor +++ b/basis/regexp/minimize/minimize.factor @@ -96,4 +96,5 @@ IN: regexp.minimize clone number-states combine-states - combine-transitions ; + combine-transitions + expand-ors ; diff --git a/basis/regexp/transition-tables/transition-tables.factor b/basis/regexp/transition-tables/transition-tables.factor index 48e84d372c..3c33ae8846 100644 --- a/basis/regexp/transition-tables/transition-tables.factor +++ b/basis/regexp/transition-tables/transition-tables.factor @@ -47,3 +47,15 @@ TUPLE: transition-table transitions start-state final-states ; [ '[ _ condition-at ] change-start-state ] [ '[ [ _ at ] map-set ] change-final-states ] [ '[ _ number-transitions ] change-transitions ] tri ; + +: expand-one-or ( or-class transition -- alist ) + [ seq>> ] dip '[ _ 2array ] map ; + +: expand-or ( state-transitions -- new-transitions ) + >alist [ + first2 over or-class? + [ expand-one-or ] [ 2array 1array ] if + ] map concat >hashtable ; + +: expand-ors ( transition-table -- transition-table ) + [ [ expand-or ] assoc-map ] change-transitions ;