add %merge-vector-head and %merge-vector-tail instructions to back vmerge
parent
05c722ea0c
commit
0c9c3d4859
|
@ -280,6 +280,16 @@ def: dst
|
||||||
use: src
|
use: src
|
||||||
literal: shuffle rep ;
|
literal: shuffle rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##merge-vector-head
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##merge-vector-tail
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##compare-vector
|
PURE-INSN: ##compare-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
|
|
|
@ -194,6 +194,8 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||||
|
|
|
@ -163,6 +163,8 @@ CODEGEN: ##zero-vector %zero-vector
|
||||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||||
|
CODEGEN: ##merge-vector-head %merge-vector-head
|
||||||
|
CODEGEN: ##merge-vector-tail %merge-vector-tail
|
||||||
CODEGEN: ##compare-vector %compare-vector
|
CODEGEN: ##compare-vector %compare-vector
|
||||||
CODEGEN: ##test-vector %test-vector
|
CODEGEN: ##test-vector %test-vector
|
||||||
CODEGEN: ##add-vector %add-vector
|
CODEGEN: ##add-vector %add-vector
|
||||||
|
|
|
@ -31,6 +31,8 @@ IN: compiler.tree.propagation.simd
|
||||||
(simd-hlshift)
|
(simd-hlshift)
|
||||||
(simd-hrshift)
|
(simd-hrshift)
|
||||||
(simd-vshuffle)
|
(simd-vshuffle)
|
||||||
|
(simd-vmerge-head)
|
||||||
|
(simd-vmerge-tail)
|
||||||
(simd-v<=)
|
(simd-v<=)
|
||||||
(simd-v<)
|
(simd-v<)
|
||||||
(simd-v=)
|
(simd-v=)
|
||||||
|
|
|
@ -218,6 +218,8 @@ HOOK: %fill-vector cpu ( dst rep -- )
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||||
|
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
|
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
|
||||||
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
||||||
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
||||||
|
@ -256,6 +258,7 @@ HOOK: %fill-vector-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||||
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %merge-vector-reps cpu ( -- reps )
|
||||||
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
||||||
HOOK: %test-vector-reps cpu ( -- reps )
|
HOOK: %test-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -262,6 +262,7 @@ M: ppc %fill-vector-reps { } ;
|
||||||
M: ppc %gather-vector-2-reps { } ;
|
M: ppc %gather-vector-2-reps { } ;
|
||||||
M: ppc %gather-vector-4-reps { } ;
|
M: ppc %gather-vector-4-reps { } ;
|
||||||
M: ppc %shuffle-vector-reps { } ;
|
M: ppc %shuffle-vector-reps { } ;
|
||||||
|
M: ppc %merge-vector-reps { } ;
|
||||||
M: ppc %compare-vector-reps drop { } ;
|
M: ppc %compare-vector-reps drop { } ;
|
||||||
M: ppc %test-vector-reps { } ;
|
M: ppc %test-vector-reps { } ;
|
||||||
M: ppc %add-vector-reps { } ;
|
M: ppc %add-vector-reps { } ;
|
||||||
|
|
|
@ -721,6 +721,34 @@ M: x86 %shuffle-vector-reps
|
||||||
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %merge-vector-head
|
||||||
|
[ two-operand ] keep
|
||||||
|
unsign-rep {
|
||||||
|
{ double-2-rep [ UNPCKLPD ] }
|
||||||
|
{ float-4-rep [ UNPCKLPS ] }
|
||||||
|
{ longlong-2-rep [ PUNPCKLQDQ ] }
|
||||||
|
{ int-4-rep [ PUNPCKLDQ ] }
|
||||||
|
{ short-8-rep [ PUNPCKLWD ] }
|
||||||
|
{ char-16-rep [ PUNPCKLBW ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %merge-vector-tail
|
||||||
|
[ two-operand ] keep
|
||||||
|
unsign-rep {
|
||||||
|
{ double-2-rep [ UNPCKHPD ] }
|
||||||
|
{ float-4-rep [ UNPCKHPS ] }
|
||||||
|
{ longlong-2-rep [ PUNPCKHQDQ ] }
|
||||||
|
{ int-4-rep [ PUNPCKHDQ ] }
|
||||||
|
{ short-8-rep [ PUNPCKHWD ] }
|
||||||
|
{ char-16-rep [ PUNPCKHBW ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %merge-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
|
:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
|
||||||
cc { cc> cc>= cc/> cc/>= } member?
|
cc { cc> cc>= cc/> cc/>= } member?
|
||||||
[ dst src2 src1 rep two-operand rep cc swap-cc ]
|
[ dst src2 src1 rep two-operand rep cc swap-cc ]
|
||||||
|
|
|
@ -325,6 +325,8 @@ A-v.-op DEFINES-PRIVATE ${A}-v.-op
|
||||||
A-sum-op DEFINES-PRIVATE ${A}-sum-op
|
A-sum-op DEFINES-PRIVATE ${A}-sum-op
|
||||||
A-vany-op DEFINES-PRIVATE ${A}-vany-op
|
A-vany-op DEFINES-PRIVATE ${A}-vany-op
|
||||||
A-vall-op DEFINES-PRIVATE ${A}-vall-op
|
A-vall-op DEFINES-PRIVATE ${A}-vall-op
|
||||||
|
A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op
|
||||||
|
A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op
|
||||||
|
|
||||||
WHERE
|
WHERE
|
||||||
|
|
||||||
|
@ -419,6 +421,20 @@ INSTANCE: A sequence
|
||||||
: A-vall-op ( v1 quot -- n )
|
: A-vall-op ( v1 quot -- n )
|
||||||
[ (simd-vbitand) ] (A-v->n-op) ; inline
|
[ (simd-vbitand) ] (A-v->n-op) ; inline
|
||||||
|
|
||||||
|
: A-vmerge-head-op ( v1 v2 quot -- v )
|
||||||
|
drop
|
||||||
|
[ underlying1>> ] bi@
|
||||||
|
[ A-rep (simd-vmerge-head) ]
|
||||||
|
[ A-rep (simd-vmerge-tail) ] 2bi
|
||||||
|
\ A boa ;
|
||||||
|
|
||||||
|
: A-vmerge-tail-op ( v1 v2 quot -- v )
|
||||||
|
drop
|
||||||
|
[ underlying2>> ] bi@
|
||||||
|
[ A-rep (simd-vmerge-head) ]
|
||||||
|
[ A-rep (simd-vmerge-tail) ] 2bi
|
||||||
|
\ A boa ;
|
||||||
|
|
||||||
simd new
|
simd new
|
||||||
\ A >>class
|
\ A >>class
|
||||||
\ A-with >>ctor
|
\ A-with >>ctor
|
||||||
|
@ -429,6 +445,8 @@ simd new
|
||||||
{ vnone? A-vany-op }
|
{ vnone? A-vany-op }
|
||||||
{ vany? A-vany-op }
|
{ vany? A-vany-op }
|
||||||
{ vall? A-vall-op }
|
{ vall? A-vall-op }
|
||||||
|
{ vmerge-head A-vmerge-head-op }
|
||||||
|
{ vmerge-tail A-vmerge-tail-op }
|
||||||
} >>special-wrappers
|
} >>special-wrappers
|
||||||
{
|
{
|
||||||
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
|
|
|
@ -55,6 +55,8 @@ SIMD-OP: vrshift
|
||||||
SIMD-OP: hlshift
|
SIMD-OP: hlshift
|
||||||
SIMD-OP: hrshift
|
SIMD-OP: hrshift
|
||||||
SIMD-OP: vshuffle
|
SIMD-OP: vshuffle
|
||||||
|
SIMD-OP: vmerge-head
|
||||||
|
SIMD-OP: vmerge-tail
|
||||||
SIMD-OP: v<=
|
SIMD-OP: v<=
|
||||||
SIMD-OP: v<
|
SIMD-OP: v<
|
||||||
SIMD-OP: v=
|
SIMD-OP: v=
|
||||||
|
@ -118,44 +120,46 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||||
|
|
||||||
M: vector-rep supported-simd-op?
|
M: vector-rep supported-simd-op?
|
||||||
{
|
{
|
||||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||||
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
|
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
|
||||||
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
||||||
{ \ (simd-v-) [ %sub-vector-reps ] }
|
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||||
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
|
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
|
||||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||||
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
||||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||||
{ \ (simd-v.) [ %dot-vector-reps ] }
|
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||||
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
||||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||||
{ \ (simd-vbitnot) [ %not-vector-reps ] }
|
{ \ (simd-vbitnot) [ %not-vector-reps ] }
|
||||||
{ \ (simd-vand) [ %and-vector-reps ] }
|
{ \ (simd-vand) [ %and-vector-reps ] }
|
||||||
{ \ (simd-vandn) [ %andn-vector-reps ] }
|
{ \ (simd-vandn) [ %andn-vector-reps ] }
|
||||||
{ \ (simd-vor) [ %or-vector-reps ] }
|
{ \ (simd-vor) [ %or-vector-reps ] }
|
||||||
{ \ (simd-vxor) [ %xor-vector-reps ] }
|
{ \ (simd-vxor) [ %xor-vector-reps ] }
|
||||||
{ \ (simd-vnot) [ %not-vector-reps ] }
|
{ \ (simd-vnot) [ %not-vector-reps ] }
|
||||||
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
||||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||||
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||||
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
{ \ (simd-vmerge-head) [ %merge-vector-reps ] }
|
||||||
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
{ \ (simd-vmerge-tail) [ %merge-vector-reps ] }
|
||||||
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
||||||
{ \ (simd-v>) [ cc> %compare-vector-reps ] }
|
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
||||||
{ \ (simd-v>=) [ cc>= %compare-vector-reps ] }
|
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
||||||
{ \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] }
|
{ \ (simd-v>) [ cc> %compare-vector-reps ] }
|
||||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
{ \ (simd-v>=) [ cc>= %compare-vector-reps ] }
|
||||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
{ \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] }
|
||||||
{ \ (simd-vany?) [ %test-vector-reps ] }
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
{ \ (simd-vall?) [ %test-vector-reps ] }
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
{ \ (simd-vnone?) [ %test-vector-reps ] }
|
{ \ (simd-vany?) [ %test-vector-reps ] }
|
||||||
|
{ \ (simd-vall?) [ %test-vector-reps ] }
|
||||||
|
{ \ (simd-vnone?) [ %test-vector-reps ] }
|
||||||
} case member? ;
|
} case member? ;
|
||||||
|
|
|
@ -98,6 +98,8 @@ H{
|
||||||
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
||||||
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
||||||
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
||||||
|
{ vmerge-head { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vmerge-tail { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v<= { +vector+ +vector+ -> +vector+ } }
|
{ v<= { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v< { +vector+ +vector+ -> +vector+ } }
|
{ v< { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v= { +vector+ +vector+ -> +vector+ } }
|
{ v= { +vector+ +vector+ -> +vector+ } }
|
||||||
|
|
Loading…
Reference in New Issue