From 0c9c3d485951c0413c3977b63d862d9839cd4298 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 3 Oct 2009 21:48:53 -0500 Subject: [PATCH] add %merge-vector-head and %merge-vector-tail instructions to back vmerge --- .../cfg/instructions/instructions.factor | 10 +++ .../compiler/cfg/intrinsics/intrinsics.factor | 2 + basis/compiler/codegen/codegen.factor | 2 + .../tree/propagation/simd/simd.factor | 2 + basis/cpu/architecture/architecture.factor | 3 + basis/cpu/ppc/ppc.factor | 1 + basis/cpu/x86/x86.factor | 28 +++++++ .../math/vectors/simd/functor/functor.factor | 18 ++++ .../vectors/simd/intrinsics/intrinsics.factor | 84 ++++++++++--------- .../specialization/specialization.factor | 2 + 10 files changed, 112 insertions(+), 40 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index a4ebde304e..b6881b61b6 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -280,6 +280,16 @@ def: dst use: src literal: shuffle rep ; +PURE-INSN: ##merge-vector-head +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##merge-vector-tail +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##compare-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index b320489080..124aac5b18 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -194,6 +194,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index ff1c8e0b0b..05f50771f6 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -163,6 +163,8 @@ CODEGEN: ##zero-vector %zero-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector +CODEGEN: ##merge-vector-head %merge-vector-head +CODEGEN: ##merge-vector-tail %merge-vector-tail CODEGEN: ##compare-vector %compare-vector CODEGEN: ##test-vector %test-vector CODEGEN: ##add-vector %add-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 06d96ef28e..462e5d6e0b 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -31,6 +31,8 @@ IN: compiler.tree.propagation.simd (simd-hlshift) (simd-hrshift) (simd-vshuffle) + (simd-vmerge-head) + (simd-vmerge-tail) (simd-v<=) (simd-v<) (simd-v=) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 1d683ffbe3..0fb69120da 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -218,6 +218,8 @@ HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) +HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) @@ -256,6 +258,7 @@ HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) +HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps ) HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 2f00ee0591..f604efe64d 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -262,6 +262,7 @@ M: ppc %fill-vector-reps { } ; M: ppc %gather-vector-2-reps { } ; M: ppc %gather-vector-4-reps { } ; M: ppc %shuffle-vector-reps { } ; +M: ppc %merge-vector-reps { } ; M: ppc %compare-vector-reps drop { } ; M: ppc %test-vector-reps { } ; M: ppc %add-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index f7d1aabfdd..7c025707fc 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -721,6 +721,34 @@ M: x86 %shuffle-vector-reps { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %merge-vector-head + [ two-operand ] keep + unsign-rep { + { double-2-rep [ UNPCKLPD ] } + { float-4-rep [ UNPCKLPS ] } + { longlong-2-rep [ PUNPCKLQDQ ] } + { int-4-rep [ PUNPCKLDQ ] } + { short-8-rep [ PUNPCKLWD ] } + { char-16-rep [ PUNPCKLBW ] } + } case ; + +M: x86 %merge-vector-tail + [ two-operand ] keep + unsign-rep { + { double-2-rep [ UNPCKHPD ] } + { float-4-rep [ UNPCKHPS ] } + { longlong-2-rep [ PUNPCKHQDQ ] } + { int-4-rep [ PUNPCKHDQ ] } + { short-8-rep [ PUNPCKHWD ] } + { char-16-rep [ PUNPCKHBW ] } + } case ; + +M: x86 %merge-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + :: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' ) cc { cc> cc>= cc/> cc/>= } member? [ dst src2 src1 rep two-operand rep cc swap-cc ] diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 878d4aea70..fb8326fde2 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -325,6 +325,8 @@ A-v.-op DEFINES-PRIVATE ${A}-v.-op A-sum-op DEFINES-PRIVATE ${A}-sum-op A-vany-op DEFINES-PRIVATE ${A}-vany-op A-vall-op DEFINES-PRIVATE ${A}-vall-op +A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op +A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op WHERE @@ -419,6 +421,20 @@ INSTANCE: A sequence : A-vall-op ( v1 quot -- n ) [ (simd-vbitand) ] (A-v->n-op) ; inline +: A-vmerge-head-op ( v1 v2 quot -- v ) + drop + [ underlying1>> ] bi@ + [ A-rep (simd-vmerge-head) ] + [ A-rep (simd-vmerge-tail) ] 2bi + \ A boa ; + +: A-vmerge-tail-op ( v1 v2 quot -- v ) + drop + [ underlying2>> ] bi@ + [ A-rep (simd-vmerge-head) ] + [ A-rep (simd-vmerge-tail) ] 2bi + \ A boa ; + simd new \ A >>class \ A-with >>ctor @@ -429,6 +445,8 @@ simd new { vnone? A-vany-op } { vany? A-vany-op } { vall? A-vall-op } + { vmerge-head A-vmerge-head-op } + { vmerge-tail A-vmerge-tail-op } } >>special-wrappers { { { +vector+ +vector+ -> +vector+ } A-vv->v-op } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 0b6b897c4b..dd87d4aaa9 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -55,6 +55,8 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle +SIMD-OP: vmerge-head +SIMD-OP: vmerge-tail SIMD-OP: v<= SIMD-OP: v< SIMD-OP: v= @@ -118,44 +120,46 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { - { \ (simd-v+) [ %add-vector-reps ] } - { \ (simd-vs+) [ %saturated-add-vector-reps ] } - { \ (simd-v+-) [ %add-sub-vector-reps ] } - { \ (simd-v-) [ %sub-vector-reps ] } - { \ (simd-vs-) [ %saturated-sub-vector-reps ] } - { \ (simd-v*) [ %mul-vector-reps ] } - { \ (simd-vs*) [ %saturated-mul-vector-reps ] } - { \ (simd-v/) [ %div-vector-reps ] } - { \ (simd-vmin) [ %min-vector-reps ] } - { \ (simd-vmax) [ %max-vector-reps ] } - { \ (simd-v.) [ %dot-vector-reps ] } - { \ (simd-vsqrt) [ %sqrt-vector-reps ] } - { \ (simd-sum) [ %horizontal-add-vector-reps ] } - { \ (simd-vabs) [ %abs-vector-reps ] } - { \ (simd-vbitand) [ %and-vector-reps ] } - { \ (simd-vbitandn) [ %andn-vector-reps ] } - { \ (simd-vbitor) [ %or-vector-reps ] } - { \ (simd-vbitxor) [ %xor-vector-reps ] } - { \ (simd-vbitnot) [ %not-vector-reps ] } - { \ (simd-vand) [ %and-vector-reps ] } - { \ (simd-vandn) [ %andn-vector-reps ] } - { \ (simd-vor) [ %or-vector-reps ] } - { \ (simd-vxor) [ %xor-vector-reps ] } - { \ (simd-vnot) [ %not-vector-reps ] } - { \ (simd-vlshift) [ %shl-vector-reps ] } - { \ (simd-vrshift) [ %shr-vector-reps ] } - { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } - { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ %shuffle-vector-reps ] } - { \ (simd-v<=) [ cc<= %compare-vector-reps ] } - { \ (simd-v<) [ cc< %compare-vector-reps ] } - { \ (simd-v=) [ cc= %compare-vector-reps ] } - { \ (simd-v>) [ cc> %compare-vector-reps ] } - { \ (simd-v>=) [ cc>= %compare-vector-reps ] } - { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] } - { \ (simd-gather-2) [ %gather-vector-2-reps ] } - { \ (simd-gather-4) [ %gather-vector-4-reps ] } - { \ (simd-vany?) [ %test-vector-reps ] } - { \ (simd-vall?) [ %test-vector-reps ] } - { \ (simd-vnone?) [ %test-vector-reps ] } + { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } + { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } + { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } + { \ (simd-v/) [ %div-vector-reps ] } + { \ (simd-vmin) [ %min-vector-reps ] } + { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-v.) [ %dot-vector-reps ] } + { \ (simd-vsqrt) [ %sqrt-vector-reps ] } + { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-vabs) [ %abs-vector-reps ] } + { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitandn) [ %andn-vector-reps ] } + { \ (simd-vbitor) [ %or-vector-reps ] } + { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-vbitnot) [ %not-vector-reps ] } + { \ (simd-vand) [ %and-vector-reps ] } + { \ (simd-vandn) [ %andn-vector-reps ] } + { \ (simd-vor) [ %or-vector-reps ] } + { \ (simd-vxor) [ %xor-vector-reps ] } + { \ (simd-vnot) [ %not-vector-reps ] } + { \ (simd-vlshift) [ %shl-vector-reps ] } + { \ (simd-vrshift) [ %shr-vector-reps ] } + { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } + { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } + { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-vmerge-head) [ %merge-vector-reps ] } + { \ (simd-vmerge-tail) [ %merge-vector-reps ] } + { \ (simd-v<=) [ cc<= %compare-vector-reps ] } + { \ (simd-v<) [ cc< %compare-vector-reps ] } + { \ (simd-v=) [ cc= %compare-vector-reps ] } + { \ (simd-v>) [ cc> %compare-vector-reps ] } + { \ (simd-v>=) [ cc>= %compare-vector-reps ] } + { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] } + { \ (simd-gather-2) [ %gather-vector-2-reps ] } + { \ (simd-gather-4) [ %gather-vector-4-reps ] } + { \ (simd-vany?) [ %test-vector-reps ] } + { \ (simd-vall?) [ %test-vector-reps ] } + { \ (simd-vnone?) [ %test-vector-reps ] } } case member? ; diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 217849ab3d..8d9d1b49cb 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -98,6 +98,8 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } + { vmerge-head { +vector+ +vector+ -> +vector+ } } + { vmerge-tail { +vector+ +vector+ -> +vector+ } } { v<= { +vector+ +vector+ -> +vector+ } } { v< { +vector+ +vector+ -> +vector+ } } { v= { +vector+ +vector+ -> +vector+ } }