diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index d4d84a088a..30fe8b590e 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -408,13 +408,13 @@ use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-add-vector -def: dst/scalar-rep -use: src +def: dst +use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-sub-vector -def: dst/scalar-rep -use: src +def: dst +use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-shl-vector-imm diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 75fbb85542..81aea67eb5 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -277,8 +277,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %dot-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) -HOOK: %horizontal-add-vector cpu ( dst src rep -- ) -HOOK: %horizontal-sub-vector cpu ( dst src rep -- ) +HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %abs-vector cpu ( dst src rep -- ) HOOK: %and-vector cpu ( dst src1 src2 rep -- ) HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index b0a5dc0897..68c2fb0438 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1134,14 +1134,25 @@ M: x86 %dot-vector { float-4-rep [ sse4.1? [ HEX: ff DPPS ] - [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ] - if + [ + [ MULPS ] [ + drop 2dup float-4-rep + [ %horizontal-add-vector ] + [ %horizontal-add-vector ] + [ nip %vector>scalar ] 3tri + ] 2bi + ] if ] } { double-2-rep [ sse4.1? [ HEX: ff DPPD ] - [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ] - if + [ + [ MULPD ] [ + drop 2dup double-2-rep + [ %horizontal-add-vector ] + [ nip %vector>scalar ] 3bi + ] 2bi + ] if ] } } case ; @@ -1150,15 +1161,19 @@ M: x86 %dot-vector-reps { sse3? { float-4-rep double-2-rep } } } available-reps ; -M: x86 %horizontal-add-vector ( dst src rep -- ) - { - { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] } - { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] } +M: x86 %horizontal-add-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep + unsign-rep { + { float-4-rep [ HADDPS ] } + { double-2-rep [ HADDPD ] } + { int-4-rep [ PHADDD ] } + { short-8-rep [ PHADDW ] } } case ; M: x86 %horizontal-add-vector-reps { { sse3? { float-4-rep double-2-rep } } + { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } } } available-reps ; M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )