break vector compare intrinsics into %compare, %or, and %not instructions that map directly to cpu instructions

db4
Joe Groff 2009-10-07 15:27:03 -05:00
parent f2c9eb79e2
commit dd691a61e8
5 changed files with 86 additions and 50 deletions

View File

@ -328,7 +328,6 @@ literal: rep ;
PURE-INSN: ##compare-vector
def: dst
use: src1 src2
temp: temp
literal: rep cc ;
PURE-INSN: ##test-vector
@ -816,7 +815,6 @@ UNION: kill-vreg-insn
UNION: def-is-use-insn
##box-alien
##box-displaced-alien
##compare-vector
##string-nth
##unbox-any-c-ptr ;

View File

@ -177,12 +177,12 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }

View File

@ -122,6 +122,28 @@ MACRO: if-literals-match ( quots -- )
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
{cc,swap} first2 :> swap? :> cc
swap?
[ src2 src1 rep cc ^^compare-vector ]
[ src1 src2 rep cc ^^compare-vector ] if ;
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
rep orig-cc %compare-vector-ccs :> not? :> ccs
ccs empty?
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
[
ccs unclip :> first-cc :> rest-ccs
src1 src2 rep first-cc (generate-compare-vector) :> first-dst
rest-ccs first-dst
[ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
reduce
not? [ rep generate-not-vector ] when
] if ;
:: generate-unpack-vector-head ( src rep -- dst )
{
{

View File

@ -1,7 +1,7 @@
! Copyright (C) 2006, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays generic kernel kernel.private math
memory namespaces make sequences layouts system hashtables
USING: accessors arrays assocs generic kernel kernel.private
math memory namespaces make sequences layouts system hashtables
classes alien byte-arrays combinators words sets fry ;
IN: cpu.architecture
@ -95,6 +95,18 @@ double-rep
vector-rep
scalar-rep ;
: unsign-rep ( rep -- rep' )
{
{ uint-4-rep int-4-rep }
{ ulonglong-2-rep longlong-2-rep }
{ ushort-8-rep short-8-rep }
{ uchar-16-rep char-16-rep }
{ uchar-scalar-rep char-scalar-rep }
{ ushort-scalar-rep short-scalar-rep }
{ uint-scalar-rep int-scalar-rep }
{ ulonglong-scalar-rep longlong-scalar-rep }
} ?at drop ;
! Register classes
SINGLETONS: int-regs float-regs ;
@ -239,7 +251,7 @@ HOOK: %unpack-vector-head cpu ( dst src rep -- )
HOOK: %unpack-vector-tail cpu ( dst src rep -- )
HOOK: %integer>float-vector cpu ( dst src rep -- )
HOOK: %float>integer-vector cpu ( dst src rep -- )
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
@ -285,6 +297,7 @@ HOOK: %unpack-vector-tail-reps cpu ( -- reps )
HOOK: %integer>float-vector-reps cpu ( -- reps )
HOOK: %float>integer-vector-reps cpu ( -- reps )
HOOK: %compare-vector-reps cpu ( cc -- reps )
HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
HOOK: %test-vector-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %saturated-add-vector-reps cpu ( -- reps )

View File

@ -588,14 +588,6 @@ M: x86 %fill-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
: unsign-rep ( rep -- rep' )
{
{ uint-4-rep int-4-rep }
{ ulonglong-2-rep longlong-2-rep }
{ ushort-8-rep short-8-rep }
{ uchar-16-rep char-16-rep }
} ?at drop ;
! M:: x86 %broadcast-vector ( dst src rep -- )
! rep unsign-rep {
! { float-4-rep [
@ -820,14 +812,10 @@ M: x86 %float>integer-vector-reps
{ sse2? { float-4-rep } }
} available-reps ;
:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
cc { cc> cc>= cc/> cc/>= } member?
[ dst src2 src1 rep two-operand rep cc swap-cc ]
[ dst src1 src2 rep two-operand rep cc ] if ;
: (%compare-float-vector) ( dst src rep double single -- )
[ double-2-rep eq? ] 2dip if ; inline
: %compare-float-vector ( dst src1 src2 temp rep cc -- )
compare-float-v-operands {
: %compare-float-vector ( dst src rep cc -- )
{
{ cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] }
{ cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] }
{ cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] }
@ -838,16 +826,6 @@ M: x86 %float>integer-vector-reps
{ cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
} case ;
:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' )
cc order-cc :> occ
occ {
{ cc= [ f dst src1 src2 rep two-operand rep cc= ] }
{ cc/= [ dst temp src1 src2 rep two-operand rep cc= ] }
{ cc<= [ dst temp src1 src2 rep two-operand rep cc> ] }
{ cc< [ f dst src2 src1 rep two-operand rep cc> ] }
{ cc> [ f dst src1 src2 rep two-operand rep cc> ] }
{ cc>= [ dst temp src2 src1 rep two-operand rep cc> ] }
} case ;
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
rep unsign-rep :> rep'
dst src rep' {
@ -856,18 +834,14 @@ M: x86 %float>integer-vector-reps
{ short-8-rep [ int16 call ] }
{ char-16-rep [ int8 call ] }
} case ; inline
:: (%not-vector) ( dst src rep -- )
dst rep %fill-vector
dst dst src rep %xor-vector ;
:: %compare-int-vector ( dst src1 src2 temp rep cc -- )
dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst
cmp-dst src' rep cc' {
: %compare-int-vector ( dst src rep cc -- )
{
{ cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
{ cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
} case
not-dst [ cmp-dst rep (%not-vector) ] when* ;
} case ;
M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
M: x86 %compare-vector ( dst src1 src2 rep cc -- )
[ [ two-operand ] keep ] dip
over float-vector-rep?
[ %compare-float-vector ]
[ %compare-int-vector ] if ;
@ -878,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
} available-reps ;
: %compare-vector-unord-reps ( -- reps )
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
: %compare-vector-ord-reps ( -- reps )
{
{ sse? { float-4-rep } }
@ -893,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
M: x86 %compare-vector-reps
{
{ [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
{ [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] }
[ drop %compare-vector-ord-reps ]
} cond ;
: %compare-float-vector-ccs ( cc -- ccs not? )
{
{ cc< [ { { cc< f } } f ] }
{ cc<= [ { { cc<= f } } f ] }
{ cc> [ { { cc< t } } f ] }
{ cc>= [ { { cc<= t } } f ] }
{ cc= [ { { cc= f } } f ] }
{ cc<> [ { { cc< f } { cc< t } } f ] }
{ cc<>= [ { { cc<>= f } } f ] }
{ cc/< [ { { cc/< f } } f ] }
{ cc/<= [ { { cc/<= f } } f ] }
{ cc/> [ { { cc/< t } } f ] }
{ cc/>= [ { { cc/<= t } } f ] }
{ cc/= [ { { cc/= f } } f ] }
{ cc/<> [ { { cc/= f } { cc/<>= f } } f ] }
{ cc/<>= [ { { cc/<>= f } } f ] }
} case ;
: %compare-int-vector-ccs ( cc -- ccs not? )
order-cc {
{ cc< [ { { cc> t } } f ] }
{ cc<= [ { { cc> f } } t ] }
{ cc> [ { { cc> f } } f ] }
{ cc>= [ { { cc> t } } t ] }
{ cc= [ { { cc= f } } f ] }
{ cc/= [ { { cc= f } } t ] }
{ t [ { } t ] }
{ f [ { } f ] }
} case ;
M: x86 %compare-vector-ccs
swap float-vector-rep?
[ %compare-float-vector-ccs ]
[ %compare-int-vector-ccs ] if ;
:: %test-vector-mask ( dst temp mask vcc -- )
vcc {
{ vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }