generate better code for vabs when instruction isn't available instead of using software fallback (-0.0 andn for floats, x > 0 ? x : -x for signed ints, nop for unsigned ints)

db4
Joe Groff 2009-10-09 14:14:44 -05:00
parent 01f5d392be
commit 471c86a110
4 changed files with 45 additions and 5 deletions

View File

@ -166,7 +166,7 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }

View File

@ -180,10 +180,44 @@ MACRO: if-literals-match ( quots -- )
]
} cond ;
:: generate-neg-vector ( src rep -- dst )
:: generate-load-neg-zero-vector ( rep -- dst )
rep {
{ float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
{ double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
[ drop rep ^^zero-vector ]
} case
} case ;
:: generate-neg-vector ( src rep -- dst )
rep generate-load-neg-zero-vector
src rep ^^sub-vector ;
:: generate-blend-vector ( mask true false rep -- dst )
mask true rep ^^and-vector
mask false rep ^^andn-vector
rep ^^or-vector ;
:: generate-abs-vector ( src rep -- dst )
{
{
[ rep unsigned-int-vector-rep? ]
[ src ]
}
{
[ rep %abs-vector-reps member? ]
[ src rep ^^abs-vector ]
}
{
[ rep float-vector-rep? ]
[
rep generate-load-neg-zero-vector
src rep ^^andn-vector
]
}
[
rep ^^zero-vector :> zero
zero src rep ^^sub-vector :> -src
zero src rep cc> ^^compare-vector :> sign
sign -src src rep generate-blend-vector
]
} cond ;

View File

@ -9,13 +9,13 @@ IN: compiler.tree.propagation.simd
(simd-v+)
(simd-v-)
(simd-vneg)
(simd-vabs)
(simd-v+-)
(simd-v*)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-sum)
(simd-vabs)
(simd-vsqrt)
(simd-vbitand)
(simd-vbitandn)

View File

@ -142,6 +142,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
%merge-vector-reps [ int-vector-rep? ] filter
%unpack-vector-head-reps union ;
: (%abs-reps) ( -- reps )
cc> %compare-vector-reps [ int-vector-rep? ] filter
%xor-vector-reps [ float-vector-rep? ] filter
union
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
@ -158,7 +164,7 @@ M: vector-rep supported-simd-op?
{ \ (simd-v.) [ %dot-vector-reps ] }
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
{ \ (simd-vabs) [ %abs-vector-reps ] }
{ \ (simd-vabs) [ (%abs-reps) ] }
{ \ (simd-vbitand) [ %and-vector-reps ] }
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }