Merge branch 'master' of factorcode.org:/git/factor

2008-05-24 02:03:42 -05:00 · 2008-05-24 02:03:42 -05:00 · 69ef4430d5
parent 26b07ea513 f1fe2641b5
commit 69ef4430d5
4 changed files with 141 additions and 75 deletions
--- a/core/inference/inference-tests.factor
+++ b/core/inference/inference-tests.factor
@ -549,10 +549,10 @@ ERROR: custom-error ;
 { 1 0 } [ [ ] map-children ] must-infer-as

 ! Corner case
-[ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail
+! [ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail

-[ [ [ f dup ] [ ] [ ] while ] infer ] must-fail
+! [ [ [ f dup ] [ ] [ ] while ] infer ] must-fail

-: erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline
+! : erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline

-[ [ erg's-inference-bug ] infer ] must-fail
+! [ [ erg's-inference-bug ] infer ] must-fail
--- a/core/kernel/kernel-docs.factor
+++ b/core/kernel/kernel-docs.factor
@ -97,6 +97,7 @@ $nl
 "Certain shuffle words can also be expressed in terms of the spread combinators. Internalizing such identities can help with understanding and writing code using spread combinators:"
 { $code
    ": dip   [ ] bi* ;"
+    ": 2dip  [ ] [ ] tri* ;"
    ""
    ": slip  [ call ] [ ] bi* ;"
    ": 2slip [ call ] [ ] [ ] tri* ;"
@ -164,8 +165,9 @@ ARTICLE: "slip-keep-combinators" "The slip and keep combinators"
 { $subsection slip }
 { $subsection 2slip }
 { $subsection 3slip }
-"The dip combinator invokes the quotation at the top of the stack, hiding the value underneath:"
+"The dip combinators invoke the quotation at the top of the stack, hiding the values underneath:"
 { $subsection dip }
+{ $subsection 2dip }
 "The keep combinators invoke a quotation which takes a number of values off the stack, and then they restore those values:"
 { $subsection keep }
 { $subsection 2keep }
@ -876,6 +878,14 @@ HELP: dip
    { $code "[ foo bar ] dip" }
 } ;

+HELP: 2dip
+{ $values { "obj1" object } { "obj2" object } { "quot" quotation } }
+{ $description "Calls " { $snippet "quot" } " with " { $snippet "obj1" } " and " { $snippet "obj2" } " hidden on the retain stack." }
+{ $notes "The following are equivalent:"
+    { $code ">r >r foo bar r> r>" }
+    { $code "[ foo bar ] 2dip" }
+} ;
+
 HELP: while
 { $values { "pred" "a quotation with stack effect " { $snippet "( -- ? )" } } { "body" "a quotation" } { "tail" "a quotation" } }
 { $description "Repeatedly calls " { $snippet "pred" } ". If it yields " { $link f } ", iteration stops, otherwise " { $snippet "body" } " is called. After iteration stops, " { $snippet "tail" } " is called." }
--- a/unmaintained/regexp4/regexp4-tests.factor
+++ b/unmaintained/regexp4/regexp4-tests.factor
@ -155,6 +155,7 @@ IN: regexp4-tests
 [ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
 [ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
 [ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test

 [ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
 [ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
@ -236,12 +237,6 @@ IN: regexp4-tests
    matches?
 ] unit-test

-
-
-
-
-
-
 ! ((A)(B(C)))
 ! 1.  ((A)(B(C)))
 ! 2. (A)
--- a/unmaintained/regexp4/regexp4.factor
+++ b/unmaintained/regexp4/regexp4.factor
@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math
 sequences namespaces locals combinators.lib state-tables
 math.parser state-parser sets dlists unicode.categories
 math.order quotations shuffle math.ranges splitting
-symbols fry parser ;
+symbols fry parser math.ranges inspector strings ;
 IN: regexp4

 SYMBOLS: eps start-state final-state beginning-of-text
@ -149,8 +149,16 @@ ERROR: unbalanced-brackets ;

 : with0 ( obj n quot -- n quot' ) swapd curry ; inline

-: copy-state ( regexp state n -- )
-    [ copy-state-rows ] with0 with0 times ;
+
+: range>state ( range -- pair )
+    [ from>> ] [ length>> ] bi over - 2array ;
+
+: copy-state ( regexp range n -- )
+    dup zero? [
+        drop range>state over stack>> push apply-question-closure
+    ] [
+        [ copy-state-rows ] with0 with0 times
+    ] if ;

 :: (exactly-n) ( regexp state n -- )
    regexp state n copy-state
@ -160,6 +168,7 @@ ERROR: unbalanced-brackets ;
    >r dup last-state r> 1- (exactly-n) ;

 : exactly-n-concatenated ( regexp state n -- )
+B
    [ (exactly-n) ] 3keep
    nip 1- [ apply-concatenation ] with0 times ;

@ -169,6 +178,9 @@ ERROR: unbalanced-brackets ;
        state regexp stack>> push
        regexp apply-kleene-closure ] ; 

+: peek-last ( regexp -- range )
+    stack>> peek first2 [a,b] ;
+
 : pop-last ( regexp -- range )
    stack>> pop first2 [a,b] ;

@ -191,7 +203,7 @@ ERROR: unbalanced-brackets ;
        [ [ nip at-most-n ] [ at-least-n ] if* ] if
    ] [ drop exactly-n ] if ;

-:: make-nontoken-nfa ( regexp obj -- )
+:: push-single-nfa ( regexp obj -- )
    [let | s0 [ regexp next-state ]
           s1 [ regexp next-state ]
           stack [ regexp stack>> ]
@ -213,18 +225,23 @@ ERROR: unbalanced-brackets ;
 : decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;

 : hex-digit? ( n -- ? )
-    dup decimal-digit? 
-    over CHAR: a CHAR: f between? or 
-    swap CHAR: A CHAR: F between? or ;
+    [
+        [ dup decimal-digit? ]
+        [ dup CHAR: a CHAR: f between? ]
+        [ dup CHAR: A CHAR: F between? ]
+    ] || nip ;

 : control-char? ( n -- ? )
-    dup 0 HEX: 1f between? swap HEX: 7f = or ;
+    [
+        [ dup 0 HEX: 1f between? ]
+        [ dup HEX: 7f = ]
+    ] || nip ;

 : punct? ( n -- ? )
    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;

 : c-identifier-char? ( ch -- ? ) 
-    dup alpha? swap CHAR: _ = or ;
+    [ [ dup alpha? ] [ dup CHAR: _ = ] ] || nip ;

 : java-blank? ( n -- ? )
    {   
@ -233,7 +250,7 @@ ERROR: unbalanced-brackets ;
    } member? ;

 : java-printable? ( n -- ? )
-    dup alpha? swap punct? or ;
+    [ [ dup alpha? ] [ dup punct? ] ] || nip ;

 ERROR: bad-character-class obj ;

@ -261,32 +278,26 @@ ERROR: bad-character-class obj ;

 ERROR: bad-octal number ;

-: parse-octal ( regexp -- )
+: parse-octal ( -- n )
    next get-char drop
    3 take oct>
-    dup 255 > [ bad-octal ] when
-    make-nontoken-nfa ;
+    dup 255 > [ bad-octal ] when ;

 ERROR: bad-hex number ;

-: parse-short-hex ( regexp -- )
+: parse-short-hex ( -- n )
    next 2 take hex>
-    dup number? [ bad-hex ] unless
-    make-nontoken-nfa ;
+    dup number? [ bad-hex ] unless ;

-: parse-long-hex ( regexp -- )
-    next 4 take hex>
-    dup number? [ bad-hex ] unless
-    make-nontoken-nfa ;
+: parse-long-hex ( -- n )
+    next 6 take hex>
+    dup number? [ bad-hex ] unless ;

-: parse-control-character ( regexp -- )
-    next get-char make-nontoken-nfa ;
-
-: parse-backreference ( regexp obj -- )
-    2drop ;
+: parse-control-character ( -- n )
+    next get-char ;

 : dot-construction ( regexp -- )
-    [ CHAR: \n = not ] make-nontoken-nfa ;
+    [ CHAR: \n = not ] push-single-nfa ;

 : front-anchor-construction ( regexp -- )
    drop ;
@ -299,32 +310,50 @@ ERROR: bad-hex number ;
    [ get-char CHAR: } = ] take-until
    "," split1 [ [ string>number ] bi@ ] keep >boolean ;

-: parse-escaped ( regexp -- )
-    next get-char {
-        { CHAR: \ [ [ CHAR: \ = ] make-nontoken-nfa ] }
-        { CHAR: t [ [ CHAR: \t = ] make-nontoken-nfa ] }
-        { CHAR: n [ [ CHAR: \n = ] make-nontoken-nfa ] }
-        { CHAR: r [ [ CHAR: \r = ] make-nontoken-nfa ] }
-        { CHAR: f [ [ HEX: c = ] make-nontoken-nfa ] }
-        { CHAR: a [ [ HEX: 7 = ] make-nontoken-nfa ] }
-        { CHAR: e [ [ HEX: 1b = ] make-nontoken-nfa ] }
+TUPLE: character-class members ;
+TUPLE: character-class-range from to ;
+TUPLE: negated-character-class < character-class ;
+TUPLE: negated-character-class-range < character-class-range ;
+TUPLE: intersection-class < character-class ;
+TUPLE: negated-intersection-class < intersection-class ;

-        { CHAR: d [ [ digit? ] make-nontoken-nfa ] }
-        { CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
-        { CHAR: s [ [ java-blank? ] make-nontoken-nfa ] }
-        { CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] }
-        { CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] }
-        { CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] }
+GENERIC: character-class-contains? ( obj character-class -- ? )

-        { CHAR: p [ parse-posix-class make-nontoken-nfa ] }
-        { CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] }
+: parse-escaped-until ( -- seq )
+    [ get-char CHAR: \ = get-next CHAR: E = and ] take-until
+    next ;
+
+: character-class-predicate ( seq -- quot )
+    boa '[ , character-class-contains? ] ;
+
+ERROR: unmatched-escape-sequence ;
+
+: (parse-escaped) ( regexp ? ch -- obj )
+    {
+        { CHAR: \ [ [ CHAR: \ = ] ] }
+        { CHAR: t [ [ CHAR: \t = ] ] }
+        { CHAR: n [ [ CHAR: \n = ] ] }
+        { CHAR: r [ [ CHAR: \r = ] ] }
+        { CHAR: f [ [ HEX: c = ] ] }
+        { CHAR: a [ [ HEX: 7 = ] ] }
+        { CHAR: e [ [ HEX: 1b = ] ] }
+
+        { CHAR: d [ [ digit? ] ] }
+        { CHAR: D [ [ digit? not ] ] }
+        { CHAR: s [ [ java-blank? ] ] }
+        { CHAR: S [ [ java-blank? not ] ] }
+        { CHAR: w [ [ c-identifier-char? ] ] }
+        { CHAR: W [ [ c-identifier-char? not ] ] }
+
+        { CHAR: p [ parse-posix-class ] }
+        { CHAR: P [ parse-posix-class [ not ] compose ] }
        { CHAR: x [ parse-short-hex ] }
        { CHAR: u [ parse-long-hex ] }
        { CHAR: 0 [ parse-octal ] }
        { CHAR: c [ parse-control-character ] }

-        ! { CHAR: Q [ quot til \E ] }
-        ! { CHAR: E [ should be an error, parse this in the Q if exists ] }
+        ! { CHAR: Q [ next parse-escaped-until ] }
+        ! { CHAR: E [ unmatched-escape-sequence ] }

        ! { CHAR: b [ ] } ! a word boundary
        ! { CHAR: B [ ] } ! a non-word boundary
@ -332,34 +361,57 @@ ERROR: bad-hex number ;
        ! { CHAR: G [ ] } ! end of previous match
        ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
        ! { CHAR: z [ ] } ! end of the input
-        [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
+        [ ]
    } case ;

+: parse-escaped ( regexp -- )
+    next get-char (parse-escaped) push-single-nfa ;
+
 : handle-dash ( vector -- vector )
    [ dup dash eq? [ drop CHAR: - ] when ] map ;

+M: object character-class-contains? ( obj1 obj2 -- ? )
+    = ;
+
+M: callable character-class-contains? ( obj1 callable -- ? )
+    call ;
+
+M: character-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? ] with find drop >boolean ;
+
+M: negated-character-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: character-class-range character-class-contains? ( obj cc -- ? )
+    [ from>> ] [ to>> ] bi between?  ;
+
+M: negated-character-class-range character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: intersection-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? not ] with find drop not ;
+
+M: negated-intersection-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
 ERROR: unmatched-negated-character-class class ;

-: handle-caret ( vector -- vector ? )
+: handle-caret ( obj -- seq class )
    dup [ length 2 >= ] [ first caret eq? ] bi and [ 
-        rest t
+        rest negated-character-class
    ] [
-        f
+        character-class
    ] if ;

 : make-character-class ( regexp -- )
    left-bracket over stack>> cut-stack
    pick (>>stack)
-    handle-dash
-    handle-caret
-    >r [ dup number? [ '[ dup , = ] ] when ] map
-    [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r>
-    [ [ not ] compose ] when
-    make-nontoken-nfa ;
+    handle-dash handle-caret
+    character-class-predicate push-single-nfa ;

 : apply-dash ( regexp -- )
    stack>> dup [ pop ] [ pop* ] [ pop ] tri
-    swap '[ dup , , between? ] swap push ;
+    swap character-class-range boa swap push ;

 : apply-dash? ( regexp -- ? )
    stack>> dup length 3 >=
@ -371,7 +423,7 @@ DEFER: parse-character-class
        next get-char
        {
            { CHAR: [ [
-                [ 1+ ] change-bracket-count left-bracket push-stack
+                [ 1+ ] change-bracket-count dup left-bracket push-stack
                parse-character-class
            ] }
            { CHAR: ] [
@ -381,7 +433,7 @@ DEFER: parse-character-class
            { CHAR: - [ dash push-stack ] }
            ! { CHAR: & [ ampersand push-stack ] }
            ! { CHAR: : [ semicolon push-stack ] }
-            { CHAR: \ [ parse-escaped ] }
+            { CHAR: \ [ next get-char (parse-escaped) push-stack ] }
            { f [ unbalanced-brackets ] }
            [ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
        } case
@ -393,7 +445,7 @@ DEFER: parse-character-class
 : parse-character-class-second ( regexp -- )
    get-next
    {
-        ! { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: [ [ CHAR: [ push-stack next ] }
        { CHAR: ] [ CHAR: ] push-stack next ] }
        { CHAR: - [ CHAR: - push-stack next ] }
        [ 2drop ]
@ -403,7 +455,7 @@ DEFER: parse-character-class
    get-next
    {
        { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
-        ! { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: [ [ CHAR: [ push-stack next ] }
        { CHAR: ] [ CHAR: ] push-stack next ] }
        { CHAR: - [ CHAR: - push-stack next ] }
        [ 2drop ]
@ -431,7 +483,6 @@ ERROR: unsupported-token token ;
            [ 1+ ] change-bracket-count parse-character-class
        ] }
        ! { CHAR: } [ drop drop "brace" ] }
-        ! { CHAR: ? [ drop ] }
        { CHAR: . [ drop dot-construction ] }
        { beginning-of-text [ push-stack ] }
        { end-of-text [
@ -442,7 +493,7 @@ ERROR: unsupported-token token ;
                [ set-start-state ]
            } cleave
        ] }
-        [ drop make-nontoken-nfa ]
+        [ drop push-single-nfa ]
    } case ;

 : (parse-raw-regexp) ( regexp -- )
@ -639,7 +690,7 @@ TUPLE: dfa-traverser
    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;

 : match-head ( string regexp -- end )
-    match length>> ;
+    match length>> 1- ;

 ! character classes
 ! TUPLE: range-class from to ;
@ -647,5 +698,15 @@ TUPLE: dfa-traverser

 ! (?:a|b)*  <- does not capture
 ! (a|b)*\1  <- group captured
-! (?!abba)  negative lookahead  matches ababa but not abbaa
+! doesn't advance the current position:
 ! (?=abba)  positive lookahead  matches abbaaa but not abaaa
+! (?!abba)  negative lookahead  matches ababa but not abbaa
+! look behind.   "lookaround"
+
+! : $ ( n -- obj ) groups get nth ;
+! [
+    ! groups bound to scope here
+! ] [
+    ! error or something
+! ] if-match
+! match in a string with  .*foo.*