diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 5712455988..d4d84a088a 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -417,12 +417,12 @@ def: dst/scalar-rep use: src literal: rep ; -PURE-INSN: ##horizontal-shl-vector +PURE-INSN: ##horizontal-shl-vector-imm def: dst use: src1 literal: src2 rep ; -PURE-INSN: ##horizontal-shr-vector +PURE-INSN: ##horizontal-shr-vector-imm def: dst use: src1 literal: src2 rep ; @@ -462,6 +462,16 @@ def: dst use: src literal: rep ; +PURE-INSN: ##shl-vector-imm +def: dst +use: src1 +literal: src2 rep ; + +PURE-INSN: ##shr-vector-imm +def: dst +use: src1 +literal: src2 rep ; + PURE-INSN: ##shl-vector def: dst use: src1 src2/int-scalar-rep diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 2af810ba49..a03f04f182 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -187,10 +187,10 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] } - { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] } + { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] } + { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] } + { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] } { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 84646be78b..a8dfaab2dd 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -55,10 +55,15 @@ MACRO: if-literals-match ( quots -- ) : [unary/param] ( quot -- quot' ) '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline -: emit-horizontal-shift ( node quot -- ) +: emit-shift-vector-imm-op ( node quot -- ) [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline +:: emit-shift-vector-op ( node imm-quot var-quot -- ) + node node-input-infos 2 tail-slice* first literal>> integer? + [ node imm-quot emit-shift-vector-imm-op ] + [ node var-quot emit-binary-vector-op ] if ; inline + : emit-gather-vector-2 ( node -- ) [ ^^gather-vector-2 ] emit-binary-vector-op ; @@ -241,6 +246,14 @@ MACRO: if-literals-match ( quots -- ) src zero rep ^^merge-vector-head ] } + { + [ rep widen-vector-rep %shr-vector-imm-reps member? ] + [ + src src rep ^^merge-vector-head + rep rep-component-type + heap-size 8 * rep widen-vector-rep ^^shr-vector-imm + ] + } [ rep ^^zero-vector :> zero zero src rep cc> ^^compare-vector :> sign @@ -268,6 +281,14 @@ MACRO: if-literals-match ( quots -- ) src zero rep ^^merge-vector-tail ] } + { + [ rep widen-vector-rep %shr-vector-imm-reps member? ] + [ + src src rep ^^merge-vector-tail + rep rep-component-type + heap-size 8 * rep widen-vector-rep ^^shr-vector-imm + ] + } [ rep ^^zero-vector :> zero zero src rep cc> ^^compare-vector :> sign diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index e8f3ca7d64..15c4e14ac1 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -181,14 +181,16 @@ CODEGEN: ##dot-vector %dot-vector CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector -CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector -CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector +CODEGEN: ##horizontal-shl-vector-imm %horizontal-shl-vector-imm +CODEGEN: ##horizontal-shr-vector-imm %horizontal-shr-vector-imm CODEGEN: ##abs-vector %abs-vector CODEGEN: ##and-vector %and-vector CODEGEN: ##andn-vector %andn-vector CODEGEN: ##or-vector %or-vector CODEGEN: ##xor-vector %xor-vector CODEGEN: ##not-vector %not-vector +CODEGEN: ##shl-vector-imm %shl-vector-imm +CODEGEN: ##shr-vector-imm %shr-vector-imm CODEGEN: ##shl-vector %shl-vector CODEGEN: ##shr-vector %shr-vector CODEGEN: ##integer>scalar %integer>scalar diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index c411d97558..75fbb85542 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -107,6 +107,16 @@ scalar-rep ; { ulonglong-scalar-rep longlong-scalar-rep } } ?at drop ; +: widen-vector-rep ( rep -- rep' ) + { + { char-16-rep short-8-rep } + { short-8-rep int-4-rep } + { int-4-rep longlong-2-rep } + { uchar-16-rep ushort-8-rep } + { ushort-8-rep uint-4-rep } + { uint-4-rep ulonglong-2-rep } + } at ; + ! Register classes SINGLETONS: int-regs float-regs ; @@ -277,8 +287,10 @@ HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) HOOK: %not-vector cpu ( dst src rep -- ) HOOK: %shl-vector cpu ( dst src1 src2 rep -- ) HOOK: %shr-vector cpu ( dst src1 src2 rep -- ) -HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- ) -HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- ) +HOOK: %shl-vector-imm cpu ( dst src1 src2 rep -- ) +HOOK: %shr-vector-imm cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-shl-vector-imm cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-shr-vector-imm cpu ( dst src1 src2 rep -- ) HOOK: %integer>scalar cpu ( dst src rep -- ) HOOK: %scalar>integer cpu ( dst src rep -- ) @@ -324,8 +336,10 @@ HOOK: %xor-vector-reps cpu ( -- reps ) HOOK: %not-vector-reps cpu ( -- reps ) HOOK: %shl-vector-reps cpu ( -- reps ) HOOK: %shr-vector-reps cpu ( -- reps ) -HOOK: %horizontal-shl-vector-reps cpu ( -- reps ) -HOOK: %horizontal-shr-vector-reps cpu ( -- reps ) +HOOK: %shl-vector-imm-reps cpu ( -- reps ) +HOOK: %shr-vector-imm-reps cpu ( -- reps ) +HOOK: %horizontal-shl-vector-imm-reps cpu ( -- reps ) +HOOK: %horizontal-shr-vector-imm-reps cpu ( -- reps ) M: object %zero-vector-reps { } ; M: object %fill-vector-reps { } ; @@ -366,8 +380,10 @@ M: object %xor-vector-reps { } ; M: object %not-vector-reps { } ; M: object %shl-vector-reps { } ; M: object %shr-vector-reps { } ; -M: object %horizontal-shl-vector-reps { } ; -M: object %horizontal-shr-vector-reps { } ; +M: object %shl-vector-imm-reps { } ; +M: object %shr-vector-imm-reps { } ; +M: object %horizontal-shl-vector-imm-reps { } ; +M: object %horizontal-shr-vector-imm-reps { } ; HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 07b21c9612..869f973b30 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1155,18 +1155,18 @@ M: x86 %horizontal-add-vector-reps { sse3? { float-4-rep double-2-rep } } } available-reps ; -M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- ) +M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- ) two-operand PSLLDQ ; -M: x86 %horizontal-shl-vector-reps +M: x86 %horizontal-shl-vector-imm-reps { { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- ) +M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- ) two-operand PSRLDQ ; -M: x86 %horizontal-shr-vector-reps +M: x86 %horizontal-shr-vector-imm-reps { { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; @@ -1282,6 +1282,11 @@ M: x86 %shr-vector-reps { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } } } available-reps ; +M: x86 %shl-vector-imm %shl-vector ; +M: x86 %shl-vector-imm-reps %shl-vector-reps ; +M: x86 %shr-vector-imm %shr-vector ; +M: x86 %shr-vector-imm-reps %shr-vector-reps ; + : scalar-sized-reg ( reg rep -- reg' ) rep-size 8 * n-bit-version-of ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 649e444915..003b42fe83 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -181,8 +181,8 @@ M: vector-rep supported-simd-op? { \ (simd-vnot) [ %xor-vector-reps ] } { \ (simd-vlshift) [ %shl-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] } - { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } - { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } + { \ (simd-hlshift) [ %horizontal-shl-vector-imm-reps ] } + { \ (simd-hrshift) [ %horizontal-shr-vector-imm-reps ] } { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] } { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] }