simd intrinsic implementation for v*high, v*hs+, vavg, and vsad

db4
Joe Groff 2009-12-05 14:52:18 -08:00
parent 1845915dc6
commit 104c29aabc
7 changed files with 134 additions and 1 deletions

View File

@ -382,6 +382,16 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-high-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-horizontal-add-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
@ -402,11 +412,21 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##avg-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##dot-vector
def: dst/scalar-rep
use: src1 src2
literal: rep ;
PURE-INSN: ##sad-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst
use: src1 src2

View File

@ -57,6 +57,12 @@ IN: compiler.cfg.intrinsics.simd
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
} case ;
: ^load-half-vector ( rep -- dst )
{
{ float-4-rep [ float-array{ 0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
{ double-2-rep [ double-array{ 0.5 0.5 } underlying>> ^^load-constant ] }
} case ;
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
@ -336,6 +342,16 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
[ ^^mul-vector ]
} emit-vv-vector-op ;
: emit-simd-v*high ( node -- )
{
[ ^^mul-high-vector ]
} emit-vv-vector-op ;
: emit-simd-v*hs+ ( node -- )
{
[ ^^mul-horizontal-add-vector ]
} emit-vv-vector-op ;
: emit-simd-v/ ( node -- )
{
[ ^^div-vector ]
@ -359,12 +375,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
]
} emit-vv-vector-op ;
: emit-simd-vavg ( node -- )
{
[ ^^avg-vector ]
{ float-vector-rep [| src1 src2 rep |
src1 src2 rep ^^add-vector
rep ^load-half-vector rep ^^mul-vector
] }
} emit-vv-vector-op ;
: emit-simd-v. ( node -- )
{
[ ^^dot-vector ]
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
} emit-vv-vector-op ;
: emit-simd-vsad ( node -- )
{
[ [ ^^sad-vector ] [ widen-vector-rep ^^vector>scalar ] bi ]
} emit-vv-vector-op ;
: emit-simd-vsqrt ( node -- )
{
[ ^^sqrt-vector ]
@ -580,10 +610,14 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v*high) [ emit-simd-v*high ] }
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }

View File

@ -173,11 +173,15 @@ CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##mul-high-vector %mul-high-vector
CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
CODEGEN: ##avg-vector %avg-vector
CODEGEN: ##dot-vector %dot-vector
CODEGEN: ##sad-vector %sad-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector

View File

@ -16,9 +16,12 @@ CONSTANT: vector>vector-intrinsics
(simd-vs-)
(simd-vs*)
(simd-v*)
(simd-v*high)
(simd-v*hs+)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-vavg)
(simd-vsqrt)
(simd-vabs)
(simd-vbitand)
@ -60,6 +63,7 @@ CONSTANT: vector>vector-intrinsics
CONSTANT: vector-other-intrinsics
{
(simd-v.)
(simd-vsad)
(simd-sum)
(simd-vany?)
(simd-vall?)

View File

@ -283,11 +283,15 @@ HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
@ -332,11 +336,15 @@ HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %mul-high-vector-reps cpu ( -- reps )
HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps )
HOOK: %avg-vector-reps cpu ( -- reps )
HOOK: %dot-vector-reps cpu ( -- reps )
HOOK: %sad-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )

View File

@ -1106,6 +1106,33 @@ M: x86 %mul-vector-reps
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %mul-high-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ short-8-rep [ PMULHW ] }
{ ushort-8-rep [ PMULHUW ] }
} case ;
M: x86 %mul-high-vector-reps
{
{ sse2? { short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PMADDUBSW ] }
{ uchar-16-rep [ PMADDUBSW ] }
{ short-8-rep [ PMADDWD ] }
{ ushort-8-rep [ PMADDWD ] }
} case ;
M: x86 %mul-horizontal-add-vector-reps
{
{ sse2? { short-8-rep ushort-8-rep } }
{ ssse3? { char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
@ -1159,6 +1186,18 @@ M: x86 %max-vector-reps
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %avg-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ uchar-16-rep [ PAVGB ] }
{ ushort-8-rep [ PAVGW ] }
} case ;
M: x86 %avg-vector-reps
{
{ sse2? { uchar-16-rep ushort-8-rep } }
} available-reps ;
M: x86 %dot-vector
[ two-operand ] keep
{
@ -1171,6 +1210,18 @@ M: x86 %dot-vector-reps
{ sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sad-vector
[ two-operand ] keep
{
{ char-16-rep [ PSADBW ] }
{ uchar-16-rep [ PSADBW ] }
} case ;
M: x86 %sad-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
signed-rep {

View File

@ -168,7 +168,6 @@ M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ]
M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
M: A v*high \ A-rep [ (simd-v*high) ] [ call-next-method ] vv->v-op ; inline
M: A v*hs+ \ A-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ; inline
M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
M: A vavg \ A-rep [ (simd-vavg) ] [ call-next-method ] vv->v-op ; inline
M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
@ -273,6 +272,19 @@ SIMD-128: double-2
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
M: uchar-16 v*hs+
uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ushort-8-cast ; inline
M: ushort-8 v*hs+
ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
M: uint-4 v*hs+
uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
M: char-16 v*hs+
char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
M: short-8 v*hs+
short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
M: int-4 v*hs+
int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
"mirrors" vocab [
"math.vectors.simd.mirrors" require
] when