change ##horizontal-add-vector insn to better match what the HADD SSE instructions do (add adjacent pairs, pack results)

db4
Joe Groff 2009-11-04 12:18:01 -06:00
parent b98742be30
commit f6643a1c72
3 changed files with 29 additions and 14 deletions
basis
compiler/cfg/instructions

View File

@ -408,13 +408,13 @@ use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-sub-vector
def: dst/scalar-rep
use: src
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-shl-vector-imm

View File

@ -277,8 +277,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %abs-vector cpu ( dst src rep -- )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )

View File

@ -1134,14 +1134,25 @@ M: x86 %dot-vector
{ float-4-rep [
sse4.1?
[ HEX: ff DPPS ]
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
if
[
[ MULPS ] [
drop 2dup float-4-rep
[ %horizontal-add-vector ]
[ %horizontal-add-vector ]
[ nip %vector>scalar ] 3tri
] 2bi
] if
] }
{ double-2-rep [
sse4.1?
[ HEX: ff DPPD ]
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
if
[
[ MULPD ] [
drop 2dup double-2-rep
[ %horizontal-add-vector ]
[ nip %vector>scalar ] 3bi
] 2bi
] if
] }
} case ;
@ -1150,15 +1161,19 @@ M: x86 %dot-vector-reps
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
unsign-rep {
{ float-4-rep [ HADDPS ] }
{ double-2-rep [ HADDPD ] }
{ int-4-rep [ PHADDD ] }
{ short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )