change ##horizontal-add-vector insn to better match what the HADD SSE instructions do (add adjacent pairs, pack results)
parent
b98742be30
commit
f6643a1c72
|
@ -408,13 +408,13 @@ use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-add-vector
|
PURE-INSN: ##horizontal-add-vector
|
||||||
def: dst/scalar-rep
|
def: dst
|
||||||
use: src
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-sub-vector
|
PURE-INSN: ##horizontal-sub-vector
|
||||||
def: dst/scalar-rep
|
def: dst
|
||||||
use: src
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-shl-vector-imm
|
PURE-INSN: ##horizontal-shl-vector-imm
|
||||||
|
|
|
@ -277,8 +277,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %abs-vector cpu ( dst src rep -- )
|
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||||
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
|
|
@ -1134,14 +1134,25 @@ M: x86 %dot-vector
|
||||||
{ float-4-rep [
|
{ float-4-rep [
|
||||||
sse4.1?
|
sse4.1?
|
||||||
[ HEX: ff DPPS ]
|
[ HEX: ff DPPS ]
|
||||||
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
|
[
|
||||||
if
|
[ MULPS ] [
|
||||||
|
drop 2dup float-4-rep
|
||||||
|
[ %horizontal-add-vector ]
|
||||||
|
[ %horizontal-add-vector ]
|
||||||
|
[ nip %vector>scalar ] 3tri
|
||||||
|
] 2bi
|
||||||
|
] if
|
||||||
] }
|
] }
|
||||||
{ double-2-rep [
|
{ double-2-rep [
|
||||||
sse4.1?
|
sse4.1?
|
||||||
[ HEX: ff DPPD ]
|
[ HEX: ff DPPD ]
|
||||||
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
|
[
|
||||||
if
|
[ MULPD ] [
|
||||||
|
drop 2dup double-2-rep
|
||||||
|
[ %horizontal-add-vector ]
|
||||||
|
[ nip %vector>scalar ] 3bi
|
||||||
|
] 2bi
|
||||||
|
] if
|
||||||
] }
|
] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
@ -1150,15 +1161,19 @@ M: x86 %dot-vector-reps
|
||||||
{ sse3? { float-4-rep double-2-rep } }
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
|
||||||
{
|
[ two-operand ] keep
|
||||||
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
unsign-rep {
|
||||||
{ double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
|
{ float-4-rep [ HADDPS ] }
|
||||||
|
{ double-2-rep [ HADDPD ] }
|
||||||
|
{ int-4-rep [ PHADDD ] }
|
||||||
|
{ short-8-rep [ PHADDW ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector-reps
|
M: x86 %horizontal-add-vector-reps
|
||||||
{
|
{
|
||||||
{ sse3? { float-4-rep double-2-rep } }
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
|
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
||||||
|
|
Loading…
Reference in New Issue