simd intrinsic implementation for v*high, v*hs+, vavg, and vsad
parent
1845915dc6
commit
104c29aabc
|
@ -382,6 +382,16 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##mul-high-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##mul-horizontal-add-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##saturated-mul-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
@ -402,11 +412,21 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##avg-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##dot-vector
|
||||
def: dst/scalar-rep
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##sad-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
|
|
@ -57,6 +57,12 @@ IN: compiler.cfg.intrinsics.simd
|
|||
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
|
||||
} case ;
|
||||
|
||||
: ^load-half-vector ( rep -- dst )
|
||||
{
|
||||
{ float-4-rep [ float-array{ 0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
|
||||
{ double-2-rep [ double-array{ 0.5 0.5 } underlying>> ^^load-constant ] }
|
||||
} case ;
|
||||
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
|
@ -336,6 +342,16 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
|
|||
[ ^^mul-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v*high ( node -- )
|
||||
{
|
||||
[ ^^mul-high-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v*hs+ ( node -- )
|
||||
{
|
||||
[ ^^mul-horizontal-add-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v/ ( node -- )
|
||||
{
|
||||
[ ^^div-vector ]
|
||||
|
@ -359,12 +375,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
|
|||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vavg ( node -- )
|
||||
{
|
||||
[ ^^avg-vector ]
|
||||
{ float-vector-rep [| src1 src2 rep |
|
||||
src1 src2 rep ^^add-vector
|
||||
rep ^load-half-vector rep ^^mul-vector
|
||||
] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v. ( node -- )
|
||||
{
|
||||
[ ^^dot-vector ]
|
||||
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsad ( node -- )
|
||||
{
|
||||
[ [ ^^sad-vector ] [ widen-vector-rep ^^vector>scalar ] bi ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsqrt ( node -- )
|
||||
{
|
||||
[ ^^sqrt-vector ]
|
||||
|
@ -580,10 +610,14 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
|
|||
{ (simd-vs-) [ emit-simd-vs- ] }
|
||||
{ (simd-vs*) [ emit-simd-vs* ] }
|
||||
{ (simd-v*) [ emit-simd-v* ] }
|
||||
{ (simd-v*high) [ emit-simd-v*high ] }
|
||||
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
|
||||
{ (simd-v/) [ emit-simd-v/ ] }
|
||||
{ (simd-vmin) [ emit-simd-vmin ] }
|
||||
{ (simd-vmax) [ emit-simd-vmax ] }
|
||||
{ (simd-vavg) [ emit-simd-vavg ] }
|
||||
{ (simd-v.) [ emit-simd-v. ] }
|
||||
{ (simd-vsad) [ emit-simd-vsad ] }
|
||||
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
|
||||
{ (simd-sum) [ emit-simd-sum ] }
|
||||
{ (simd-vabs) [ emit-simd-vabs ] }
|
||||
|
|
|
@ -173,11 +173,15 @@ CODEGEN: ##add-sub-vector %add-sub-vector
|
|||
CODEGEN: ##sub-vector %sub-vector
|
||||
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
|
||||
CODEGEN: ##mul-vector %mul-vector
|
||||
CODEGEN: ##mul-high-vector %mul-high-vector
|
||||
CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
|
||||
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
|
||||
CODEGEN: ##div-vector %div-vector
|
||||
CODEGEN: ##min-vector %min-vector
|
||||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##avg-vector %avg-vector
|
||||
CODEGEN: ##dot-vector %dot-vector
|
||||
CODEGEN: ##sad-vector %sad-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
||||
|
|
|
@ -16,9 +16,12 @@ CONSTANT: vector>vector-intrinsics
|
|||
(simd-vs-)
|
||||
(simd-vs*)
|
||||
(simd-v*)
|
||||
(simd-v*high)
|
||||
(simd-v*hs+)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-vavg)
|
||||
(simd-vsqrt)
|
||||
(simd-vabs)
|
||||
(simd-vbitand)
|
||||
|
@ -60,6 +63,7 @@ CONSTANT: vector>vector-intrinsics
|
|||
CONSTANT: vector-other-intrinsics
|
||||
{
|
||||
(simd-v.)
|
||||
(simd-vsad)
|
||||
(simd-sum)
|
||||
(simd-vany?)
|
||||
(simd-vall?)
|
||||
|
|
|
@ -283,11 +283,15 @@ HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
|
@ -332,11 +336,15 @@ HOOK: %add-sub-vector-reps cpu ( -- reps )
|
|||
HOOK: %sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %mul-vector-reps cpu ( -- reps )
|
||||
HOOK: %mul-high-vector-reps cpu ( -- reps )
|
||||
HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
|
||||
HOOK: %div-vector-reps cpu ( -- reps )
|
||||
HOOK: %min-vector-reps cpu ( -- reps )
|
||||
HOOK: %max-vector-reps cpu ( -- reps )
|
||||
HOOK: %avg-vector-reps cpu ( -- reps )
|
||||
HOOK: %dot-vector-reps cpu ( -- reps )
|
||||
HOOK: %sad-vector-reps cpu ( -- reps )
|
||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
|
||||
|
|
|
@ -1106,6 +1106,33 @@ M: x86 %mul-vector-reps
|
|||
{ sse4.1? { int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-high-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ short-8-rep [ PMULHW ] }
|
||||
{ ushort-8-rep [ PMULHUW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %mul-high-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PMADDUBSW ] }
|
||||
{ uchar-16-rep [ PMADDUBSW ] }
|
||||
{ short-8-rep [ PMADDWD ] }
|
||||
{ ushort-8-rep [ PMADDWD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %mul-horizontal-add-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep ushort-8-rep } }
|
||||
{ ssse3? { char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1159,6 +1186,18 @@ M: x86 %max-vector-reps
|
|||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %avg-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ uchar-16-rep [ PAVGB ] }
|
||||
{ ushort-8-rep [ PAVGW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %avg-vector-reps
|
||||
{
|
||||
{ sse2? { uchar-16-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %dot-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1171,6 +1210,18 @@ M: x86 %dot-vector-reps
|
|||
{ sse4.1? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sad-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PSADBW ] }
|
||||
{ uchar-16-rep [ PSADBW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %sad-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
|
|
|
@ -168,7 +168,6 @@ M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ]
|
|||
M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v*high \ A-rep [ (simd-v*high) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v*hs+ \ A-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vavg \ A-rep [ (simd-vavg) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
|
||||
|
@ -273,6 +272,19 @@ SIMD-128: double-2
|
|||
M: simd-128 vshuffle ( u perm -- v )
|
||||
vshuffle-bytes ; inline
|
||||
|
||||
M: uchar-16 v*hs+
|
||||
uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ushort-8-cast ; inline
|
||||
M: ushort-8 v*hs+
|
||||
ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
|
||||
M: uint-4 v*hs+
|
||||
uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
|
||||
M: char-16 v*hs+
|
||||
char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
|
||||
M: short-8 v*hs+
|
||||
short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
|
||||
M: int-4 v*hs+
|
||||
int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
|
||||
|
||||
"mirrors" vocab [
|
||||
"math.vectors.simd.mirrors" require
|
||||
] when
|
||||
|
|
Loading…
Reference in New Issue