SSE integer gather and broadcast
parent
97785c0c09
commit
965730efbe
|
@ -578,40 +578,66 @@ MACRO: available-reps ( alist -- )
|
||||||
reverse [ { } ] suffix
|
reverse [ { } ] suffix
|
||||||
'[ _ cond ] ;
|
'[ _ cond ] ;
|
||||||
|
|
||||||
M: x86 %broadcast-vector ( dst src rep -- )
|
: unsign-rep ( rep -- rep' )
|
||||||
{
|
dup {
|
||||||
{ float-4-rep [ [ float-4-rep %copy ] [ drop dup 0 SHUFPS ] 2bi ] }
|
{ uint-4-rep int-4-rep }
|
||||||
{ double-2-rep [ [ double-2-rep %copy ] [ drop dup UNPCKLPD ] 2bi ] }
|
{ ulonglong-2-rep longlong-2-rep }
|
||||||
|
{ ushort-8-rep short-8-rep }
|
||||||
|
{ uchar-16-rep char-16-rep }
|
||||||
|
} at* [ nip ] [ drop ] if ;
|
||||||
|
|
||||||
|
M:: x86 %broadcast-vector ( dst src rep -- )
|
||||||
|
rep unsign-rep {
|
||||||
|
{ float-4-rep [
|
||||||
|
dst src float-4-rep %copy
|
||||||
|
dst dst 0 SHUFPS
|
||||||
|
] }
|
||||||
|
{ double-2-rep [
|
||||||
|
dst src double-2-rep %copy
|
||||||
|
dst dst UNPCKLPD
|
||||||
|
] }
|
||||||
|
{ longlong-2-rep [ dst src BIN: 01000100 PSHUFD ] }
|
||||||
|
{ int-4-rep [ dst src 0 PSHUFD ] }
|
||||||
|
{ short-8-rep [
|
||||||
|
dst src 0 PSHUFLW
|
||||||
|
dst dst PUNPCKLQDQ
|
||||||
|
] }
|
||||||
|
{ char-16-rep [
|
||||||
|
dst src char-16-rep %copy
|
||||||
|
dst dst PUNPCKLBW
|
||||||
|
dst dst 0 PSHUFLW
|
||||||
|
dst dst PUNPCKLQDQ
|
||||||
|
] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %broadcast-vector-reps
|
M: x86 %broadcast-vector-reps
|
||||||
{
|
{
|
||||||
! Can't do this with sse1 since it will want to unbox
|
! Can't do this with sse1 since it will want to unbox
|
||||||
! a double-precision float and convert to single precision
|
! a double-precision float and convert to single precision
|
||||||
{ sse2? { float-4-rep double-2-rep } }
|
{ sse2? {
|
||||||
|
float-4-rep double-2-rep
|
||||||
|
longlong-2-rep ulonglong-2-rep
|
||||||
|
int-4-rep uint-4-rep
|
||||||
|
short-8-rep ushort-8-rep
|
||||||
|
char-16-rep uchar-16-rep
|
||||||
|
} }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||||
{
|
rep unsign-rep {
|
||||||
{
|
{ float-4-rep [
|
||||||
[ rep float-4-rep eq? ]
|
dst src1 float-4-rep %copy
|
||||||
[
|
dst src2 UNPCKLPS
|
||||||
dst src1 float-4-rep %copy
|
src3 src4 UNPCKLPS
|
||||||
dst src2 UNPCKLPS
|
dst src3 MOVLHPS
|
||||||
src3 src4 UNPCKLPS
|
] }
|
||||||
dst src3 MOVLHPS
|
{ int-4-rep [
|
||||||
]
|
dst src1 int-4-rep %copy
|
||||||
}
|
dst src2 PUNPCKLDQ
|
||||||
{
|
src3 src4 PUNPCKLDQ
|
||||||
[ rep { int-4-rep uint-4-rep } memq? ]
|
dst src3 PUNPCKLQDQ
|
||||||
[
|
] }
|
||||||
dst src1 int-4-rep %copy
|
} case ;
|
||||||
dst src2 PUNPCKLDQ
|
|
||||||
src3 src4 PUNPCKLDQ
|
|
||||||
dst src3 PUNPCKLQDQ
|
|
||||||
]
|
|
||||||
}
|
|
||||||
} cond ;
|
|
||||||
|
|
||||||
M: x86 %gather-vector-4-reps
|
M: x86 %gather-vector-4-reps
|
||||||
{
|
{
|
||||||
|
@ -621,19 +647,20 @@ M: x86 %gather-vector-4-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||||
rep {
|
rep unsign-rep {
|
||||||
{
|
{ double-2-rep [
|
||||||
double-2-rep
|
dst src1 double-2-rep %copy
|
||||||
[
|
dst src2 UNPCKLPD
|
||||||
dst src1 double-2-rep %copy
|
] }
|
||||||
dst src2 UNPCKLPD
|
{ longlong-2-rep [
|
||||||
]
|
dst src1 longlong-2-rep %copy
|
||||||
}
|
dst src2 PUNPCKLQDQ
|
||||||
|
] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %gather-vector-2-reps
|
M: x86 %gather-vector-2-reps
|
||||||
{
|
{
|
||||||
{ sse2? { double-2-rep } }
|
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
|
|
Loading…
Reference in New Issue