generate better code for vabs when instruction isn't available instead of using software fallback (-0.0 andn for floats, x > 0 ? x : -x for signed ints, nop for unsigned ints)
parent
01f5d392be
commit
471c86a110
|
@ -166,7 +166,7 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
|
||||
|
|
|
@ -180,10 +180,44 @@ MACRO: if-literals-match ( quots -- )
|
|||
]
|
||||
} cond ;
|
||||
|
||||
:: generate-neg-vector ( src rep -- dst )
|
||||
:: generate-load-neg-zero-vector ( rep -- dst )
|
||||
rep {
|
||||
{ float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
|
||||
{ double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
|
||||
[ drop rep ^^zero-vector ]
|
||||
} case
|
||||
} case ;
|
||||
|
||||
:: generate-neg-vector ( src rep -- dst )
|
||||
rep generate-load-neg-zero-vector
|
||||
src rep ^^sub-vector ;
|
||||
|
||||
:: generate-blend-vector ( mask true false rep -- dst )
|
||||
mask true rep ^^and-vector
|
||||
mask false rep ^^andn-vector
|
||||
rep ^^or-vector ;
|
||||
|
||||
:: generate-abs-vector ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep unsigned-int-vector-rep? ]
|
||||
[ src ]
|
||||
}
|
||||
{
|
||||
[ rep %abs-vector-reps member? ]
|
||||
[ src rep ^^abs-vector ]
|
||||
}
|
||||
{
|
||||
[ rep float-vector-rep? ]
|
||||
[
|
||||
rep generate-load-neg-zero-vector
|
||||
src rep ^^andn-vector
|
||||
]
|
||||
}
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep ^^sub-vector :> -src
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
sign -src src rep generate-blend-vector
|
||||
]
|
||||
} cond ;
|
||||
|
||||
|
|
|
@ -9,13 +9,13 @@ IN: compiler.tree.propagation.simd
|
|||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-vabs)
|
||||
(simd-v+-)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-sum)
|
||||
(simd-vabs)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
|
|
|
@ -142,6 +142,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
|||
%merge-vector-reps [ int-vector-rep? ] filter
|
||||
%unpack-vector-head-reps union ;
|
||||
|
||||
: (%abs-reps) ( -- reps )
|
||||
cc> %compare-vector-reps [ int-vector-rep? ] filter
|
||||
%xor-vector-reps [ float-vector-rep? ] filter
|
||||
union
|
||||
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
|
@ -158,7 +164,7 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||
{ \ (simd-vabs) [ (%abs-reps) ] }
|
||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||
|
|
Loading…
Reference in New Issue