generate unsigned vector comparison fallbacks using min/max or xor/signed compare

db4
Joe Groff 2009-10-20 22:30:57 -05:00
parent cb36111a3c
commit 8a6b0a1453
3 changed files with 67 additions and 15 deletions

View File

@ -10,8 +10,8 @@ compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien
specialized-arrays ;
FROM: alien.c-types => heap-size char uchar float double ;
SPECIALIZED-ARRAYS: float double ;
FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ;
SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ;
IN: compiler.cfg.intrinsics.simd
MACRO: check-elements ( quots -- )
@ -155,28 +155,79 @@ MACRO: if-literals-match ( quots -- )
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
{cc,swap} first2 :> swap? :> cc
swap?
[ src2 src1 rep cc ^^compare-vector ]
[ src1 src2 rep cc ^^compare-vector ] if ;
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst )
rep orig-cc %compare-vector-ccs :> not? :> ccs
ccs empty?
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
[
ccs unclip :> first-cc :> rest-ccs
src1 src2 rep first-cc (generate-compare-vector) :> first-dst
src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst
rest-ccs first-dst
[ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
[ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ]
reduce
not? [ rep generate-not-vector ] when
] if ;
: sign-bit-mask ( rep -- byte-array )
unsign-rep {
{ char-16-rep [ uchar-array{
HEX: 80 HEX: 80 HEX: 80 HEX: 80
HEX: 80 HEX: 80 HEX: 80 HEX: 80
HEX: 80 HEX: 80 HEX: 80 HEX: 80
HEX: 80 HEX: 80 HEX: 80 HEX: 80
} underlying>> ] }
{ short-8-rep [ ushort-array{
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
} underlying>> ] }
{ int-4-rep [ uint-array{
HEX: 8000,0000 HEX: 8000,0000
HEX: 8000,0000 HEX: 8000,0000
} underlying>> ] }
{ longlong-2-rep [ ulonglong-array{
HEX: 8000,0000,0000,0000
HEX: 8000,0000,0000,0000
} underlying>> ] }
} case ;
:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst )
orig-cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] }
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] }
} case ;
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
{
{
[ rep orig-cc %compare-vector-reps member? ]
[ src1 src2 rep orig-cc (generate-compare-vector) ]
}
{
[ rep %min-vector-reps member? ]
[ src1 src2 rep orig-cc (generate-minmax-compare-vector) ]
}
{
[ rep unsign-rep orig-cc %compare-vector-reps member? ]
[
rep sign-bit-mask ^^load-constant :> sign-bits
src1 sign-bits rep ^^xor-vector
src2 sign-bits rep ^^xor-vector
rep unsign-rep orig-cc (generate-compare-vector)
]
}
} cond ;
:: generate-unpack-vector-head ( src rep -- dst )
{
{
@ -278,3 +329,4 @@ MACRO: if-literals-match ( quots -- )
[ cc> generate-compare-vector ]
[ generate-blend-vector ] 3bi
] if ;

View File

@ -893,7 +893,7 @@ M: x86 %compare-vector ( dst src1 src2 rep cc -- )
M: x86 %compare-vector-reps
{
{ [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
{ [ dup { cc= cc/= cc/<>= cc<>= } memq? ] [ drop %compare-vector-eq-reps ] }
[ drop %compare-vector-ord-reps ]
} cond ;
@ -1098,7 +1098,7 @@ M: x86 %min-vector ( dst src1 src2 rep -- )
M: x86 %min-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
@ -1118,7 +1118,7 @@ M: x86 %max-vector ( dst src1 src2 rep -- )
M: x86 %max-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;

View File

@ -193,12 +193,12 @@ M: vector-rep supported-simd-op?
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
{ \ (simd-v>) [ cc> %compare-vector-reps ] }
{ \ (simd-v>=) [ cc>= %compare-vector-reps ] }
{ \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] }
{ \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] }
{ \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] }
{ \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] }
{ \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] }
{ \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] }
{ \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
{ \ (simd-vany?) [ %test-vector-reps ] }