break vector compare intrinsics into %compare, %or, and %not instructions that map directly to cpu instructions

2009-10-07 15:27:03 -05:00 · 2009-10-07 15:27:03 -05:00 · dd691a61e8
parent f2c9eb79e2
commit dd691a61e8
5 changed files with 86 additions and 50 deletions
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -328,7 +328,6 @@ literal: rep ;
 PURE-INSN: ##compare-vector
 def: dst
 use: src1 src2
-temp: temp
 literal: rep cc ;

 PURE-INSN: ##test-vector
@ -816,7 +815,6 @@ UNION: kill-vreg-insn
 UNION: def-is-use-insn
 ##box-alien
 ##box-displaced-alien
-##compare-vector
 ##string-nth
 ##unbox-any-c-ptr ;

--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@ -177,12 +177,12 @@ IN: compiler.cfg.intrinsics
        { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@ -122,6 +122,28 @@ MACRO: if-literals-match ( quots -- )
    [ ^^not-vector ]
    [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;

+:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
+    {cc,swap} first2 :> swap? :> cc
+    swap?
+    [ src2 src1 rep cc ^^compare-vector ]
+    [ src1 src2 rep cc ^^compare-vector ] if ;
+
+:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
+    rep orig-cc %compare-vector-ccs :> not? :> ccs
+
+    ccs empty?
+    [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
+    [
+        ccs unclip :> first-cc :> rest-ccs
+        src1 src2 rep first-cc (generate-compare-vector) :> first-dst
+
+        rest-ccs first-dst
+        [ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
+        reduce
+
+        not? [ rep generate-not-vector ] when
+    ] if ;
+
 :: generate-unpack-vector-head ( src rep -- dst )
    {
        {
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2006, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays generic kernel kernel.private math
-memory namespaces make sequences layouts system hashtables
+USING: accessors arrays assocs generic kernel kernel.private
+math memory namespaces make sequences layouts system hashtables
 classes alien byte-arrays combinators words sets fry ;
 IN: cpu.architecture

@ -95,6 +95,18 @@ double-rep
 vector-rep
 scalar-rep ;

+: unsign-rep ( rep -- rep' )
+    {
+        { uint-4-rep           int-4-rep }
+        { ulonglong-2-rep      longlong-2-rep }
+        { ushort-8-rep         short-8-rep }
+        { uchar-16-rep         char-16-rep }
+        { uchar-scalar-rep     char-scalar-rep }
+        { ushort-scalar-rep    short-scalar-rep }
+        { uint-scalar-rep      int-scalar-rep }
+        { ulonglong-scalar-rep longlong-scalar-rep }
+    } ?at drop ;
+
 ! Register classes
 SINGLETONS: int-regs float-regs ;

@ -239,7 +251,7 @@ HOOK: %unpack-vector-head cpu ( dst src rep -- )
 HOOK: %unpack-vector-tail cpu ( dst src rep -- )
 HOOK: %integer>float-vector cpu ( dst src rep -- )
 HOOK: %float>integer-vector cpu ( dst src rep -- )
-HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
+HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
 HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
 HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
 HOOK: %add-vector cpu ( dst src1 src2 rep -- )
@ -285,6 +297,7 @@ HOOK: %unpack-vector-tail-reps cpu ( -- reps )
 HOOK: %integer>float-vector-reps cpu ( -- reps )
 HOOK: %float>integer-vector-reps cpu ( -- reps )
 HOOK: %compare-vector-reps cpu ( cc -- reps )
+HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
 HOOK: %test-vector-reps cpu ( -- reps )
 HOOK: %add-vector-reps cpu ( -- reps )
 HOOK: %saturated-add-vector-reps cpu ( -- reps )
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -588,14 +588,6 @@ M: x86 %fill-vector-reps
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
    } available-reps ;

-: unsign-rep ( rep -- rep' )
-    {
-        { uint-4-rep      int-4-rep }
-        { ulonglong-2-rep longlong-2-rep }
-        { ushort-8-rep    short-8-rep }
-        { uchar-16-rep    char-16-rep }
-    } ?at drop ;
-
 ! M:: x86 %broadcast-vector ( dst src rep -- )
 !     rep unsign-rep {
 !         { float-4-rep [
@ -820,14 +812,10 @@ M: x86 %float>integer-vector-reps
        { sse2? { float-4-rep } }
    } available-reps ;

-:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
-    cc { cc> cc>= cc/> cc/>= } member?
-    [ dst src2 src1 rep two-operand rep cc swap-cc ]
-    [ dst src1 src2 rep two-operand rep cc         ] if ;
 : (%compare-float-vector) ( dst src rep double single -- )
    [ double-2-rep eq? ] 2dip if ; inline
-: %compare-float-vector ( dst src1 src2 temp rep cc -- )
-    compare-float-v-operands {
+: %compare-float-vector ( dst src rep cc -- )
+    {
        { cc<    [ [ CMPLTPD    ] [ CMPLTPS    ] (%compare-float-vector) ] }
        { cc<=   [ [ CMPLEPD    ] [ CMPLEPS    ] (%compare-float-vector) ] }
        { cc=    [ [ CMPEQPD    ] [ CMPEQPS    ] (%compare-float-vector) ] }
@ -838,16 +826,6 @@ M: x86 %float>integer-vector-reps
        { cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
    } case ;

-:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' )
-    cc order-cc :> occ
-    occ {
-        { cc=  [ f   dst  src1 src2 rep two-operand rep cc= ] }
-        { cc/= [ dst temp src1 src2 rep two-operand rep cc= ] }
-        { cc<= [ dst temp src1 src2 rep two-operand rep cc> ] }
-        { cc<  [ f   dst  src2 src1 rep two-operand rep cc> ] }
-        { cc>  [ f   dst  src1 src2 rep two-operand rep cc> ] }
-        { cc>= [ dst temp src2 src1 rep two-operand rep cc> ] }
-    } case ;
 :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
    rep unsign-rep :> rep'
    dst src rep' {
@ -856,18 +834,14 @@ M: x86 %float>integer-vector-reps
        { short-8-rep    [ int16 call ] }
        { char-16-rep    [ int8  call ] }
    } case ; inline
-:: (%not-vector) ( dst src rep -- )
-    dst rep %fill-vector
-    dst dst src rep %xor-vector ;
-:: %compare-int-vector ( dst src1 src2 temp rep cc -- )
-    dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst
-    cmp-dst src' rep cc' {
+: %compare-int-vector ( dst src rep cc -- )
+    {
        { cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
        { cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
-    } case
-    not-dst [ cmp-dst rep (%not-vector) ] when* ;
+    } case ;

-M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
+M: x86 %compare-vector ( dst src1 src2 rep cc -- )
+    [ [ two-operand ] keep ] dip
    over float-vector-rep?
    [ %compare-float-vector ]
    [ %compare-int-vector ] if ;
@ -878,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
        { sse4.1? { longlong-2-rep ulonglong-2-rep } }
    } available-reps ;
-: %compare-vector-unord-reps ( -- reps )
-    {
-        { sse? { float-4-rep } }
-        { sse2? { double-2-rep } }
-    } available-reps ;
 : %compare-vector-ord-reps ( -- reps )
    {
        { sse? { float-4-rep } }
@ -893,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
 M: x86 %compare-vector-reps
    {
        { [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
-        { [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] }
        [ drop %compare-vector-ord-reps ]
    } cond ;

+: %compare-float-vector-ccs ( cc -- ccs not? )
+    {
+        { cc<    [ { { cc<  f   }              } f ] }
+        { cc<=   [ { { cc<= f   }              } f ] }
+        { cc>    [ { { cc<  t   }              } f ] }
+        { cc>=   [ { { cc<= t   }              } f ] }
+        { cc=    [ { { cc=  f   }              } f ] }
+        { cc<>   [ { { cc<  f   } { cc<    t } } f ] }
+        { cc<>=  [ { { cc<>= f  }              } f ] }
+        { cc/<   [ { { cc/<  f  }              } f ] }
+        { cc/<=  [ { { cc/<= f  }              } f ] }
+        { cc/>   [ { { cc/<  t  }              } f ] }
+        { cc/>=  [ { { cc/<= t  }              } f ] }
+        { cc/=   [ { { cc/=  f  }              } f ] }
+        { cc/<>  [ { { cc/=  f  } { cc/<>= f } } f ] }
+        { cc/<>= [ { { cc/<>= f }              } f ] }
+    } case ;
+
+: %compare-int-vector-ccs ( cc -- ccs not? )
+    order-cc {
+        { cc<    [ { { cc> t } } f ] }
+        { cc<=   [ { { cc> f } } t ] }
+        { cc>    [ { { cc> f } } f ] }
+        { cc>=   [ { { cc> t } } t ] }
+        { cc=    [ { { cc= f } } f ] }
+        { cc/=   [ { { cc= f } } t ] }
+        { t      [ {           } t ] }
+        { f      [ {           } f ] }
+    } case ;
+
+M: x86 %compare-vector-ccs
+    swap float-vector-rep?
+    [ %compare-float-vector-ccs ]
+    [ %compare-int-vector-ccs ] if ;
+
 :: %test-vector-mask ( dst temp mask vcc -- )
    vcc {
        { vcc-any    [ dst dst TEST dst temp \ CMOVNE %boolean ] }