add ##shl-vector-imm and ##shr-vector-imm insn variants. use merge/shr instead of compare/merge to do signed unpacks
parent
eccf3fba9d
commit
b858860a67
|
@ -417,12 +417,12 @@ def: dst/scalar-rep
|
||||||
use: src
|
use: src
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-shl-vector
|
PURE-INSN: ##horizontal-shl-vector-imm
|
||||||
def: dst
|
def: dst
|
||||||
use: src1
|
use: src1
|
||||||
literal: src2 rep ;
|
literal: src2 rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-shr-vector
|
PURE-INSN: ##horizontal-shr-vector-imm
|
||||||
def: dst
|
def: dst
|
||||||
use: src1
|
use: src1
|
||||||
literal: src2 rep ;
|
literal: src2 rep ;
|
||||||
|
@ -462,6 +462,16 @@ def: dst
|
||||||
use: src
|
use: src
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##shl-vector-imm
|
||||||
|
def: dst
|
||||||
|
use: src1
|
||||||
|
literal: src2 rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##shr-vector-imm
|
||||||
|
def: dst
|
||||||
|
use: src1
|
||||||
|
literal: src2 rep ;
|
||||||
|
|
||||||
PURE-INSN: ##shl-vector
|
PURE-INSN: ##shl-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2/int-scalar-rep
|
use: src1 src2/int-scalar-rep
|
||||||
|
|
|
@ -187,10 +187,10 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
|
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
|
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
|
|
|
@ -55,10 +55,15 @@ MACRO: if-literals-match ( quots -- )
|
||||||
: [unary/param] ( quot -- quot' )
|
: [unary/param] ( quot -- quot' )
|
||||||
'[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline
|
'[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline
|
||||||
|
|
||||||
: emit-horizontal-shift ( node quot -- )
|
: emit-shift-vector-imm-op ( node quot -- )
|
||||||
[unary/param]
|
[unary/param]
|
||||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
|
:: emit-shift-vector-op ( node imm-quot var-quot -- )
|
||||||
|
node node-input-infos 2 tail-slice* first literal>> integer?
|
||||||
|
[ node imm-quot emit-shift-vector-imm-op ]
|
||||||
|
[ node var-quot emit-binary-vector-op ] if ; inline
|
||||||
|
|
||||||
: emit-gather-vector-2 ( node -- )
|
: emit-gather-vector-2 ( node -- )
|
||||||
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
||||||
|
|
||||||
|
@ -241,6 +246,14 @@ MACRO: if-literals-match ( quots -- )
|
||||||
src zero rep ^^merge-vector-head
|
src zero rep ^^merge-vector-head
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
|
||||||
|
[
|
||||||
|
src src rep ^^merge-vector-head
|
||||||
|
rep rep-component-type
|
||||||
|
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
|
||||||
|
]
|
||||||
|
}
|
||||||
[
|
[
|
||||||
rep ^^zero-vector :> zero
|
rep ^^zero-vector :> zero
|
||||||
zero src rep cc> ^^compare-vector :> sign
|
zero src rep cc> ^^compare-vector :> sign
|
||||||
|
@ -268,6 +281,14 @@ MACRO: if-literals-match ( quots -- )
|
||||||
src zero rep ^^merge-vector-tail
|
src zero rep ^^merge-vector-tail
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
|
||||||
|
[
|
||||||
|
src src rep ^^merge-vector-tail
|
||||||
|
rep rep-component-type
|
||||||
|
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
|
||||||
|
]
|
||||||
|
}
|
||||||
[
|
[
|
||||||
rep ^^zero-vector :> zero
|
rep ^^zero-vector :> zero
|
||||||
zero src rep cc> ^^compare-vector :> sign
|
zero src rep cc> ^^compare-vector :> sign
|
||||||
|
|
|
@ -181,14 +181,16 @@ CODEGEN: ##dot-vector %dot-vector
|
||||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
||||||
CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector
|
CODEGEN: ##horizontal-shl-vector-imm %horizontal-shl-vector-imm
|
||||||
CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector
|
CODEGEN: ##horizontal-shr-vector-imm %horizontal-shr-vector-imm
|
||||||
CODEGEN: ##abs-vector %abs-vector
|
CODEGEN: ##abs-vector %abs-vector
|
||||||
CODEGEN: ##and-vector %and-vector
|
CODEGEN: ##and-vector %and-vector
|
||||||
CODEGEN: ##andn-vector %andn-vector
|
CODEGEN: ##andn-vector %andn-vector
|
||||||
CODEGEN: ##or-vector %or-vector
|
CODEGEN: ##or-vector %or-vector
|
||||||
CODEGEN: ##xor-vector %xor-vector
|
CODEGEN: ##xor-vector %xor-vector
|
||||||
CODEGEN: ##not-vector %not-vector
|
CODEGEN: ##not-vector %not-vector
|
||||||
|
CODEGEN: ##shl-vector-imm %shl-vector-imm
|
||||||
|
CODEGEN: ##shr-vector-imm %shr-vector-imm
|
||||||
CODEGEN: ##shl-vector %shl-vector
|
CODEGEN: ##shl-vector %shl-vector
|
||||||
CODEGEN: ##shr-vector %shr-vector
|
CODEGEN: ##shr-vector %shr-vector
|
||||||
CODEGEN: ##integer>scalar %integer>scalar
|
CODEGEN: ##integer>scalar %integer>scalar
|
||||||
|
|
|
@ -107,6 +107,16 @@ scalar-rep ;
|
||||||
{ ulonglong-scalar-rep longlong-scalar-rep }
|
{ ulonglong-scalar-rep longlong-scalar-rep }
|
||||||
} ?at drop ;
|
} ?at drop ;
|
||||||
|
|
||||||
|
: widen-vector-rep ( rep -- rep' )
|
||||||
|
{
|
||||||
|
{ char-16-rep short-8-rep }
|
||||||
|
{ short-8-rep int-4-rep }
|
||||||
|
{ int-4-rep longlong-2-rep }
|
||||||
|
{ uchar-16-rep ushort-8-rep }
|
||||||
|
{ ushort-8-rep uint-4-rep }
|
||||||
|
{ uint-4-rep ulonglong-2-rep }
|
||||||
|
} at ;
|
||||||
|
|
||||||
! Register classes
|
! Register classes
|
||||||
SINGLETONS: int-regs float-regs ;
|
SINGLETONS: int-regs float-regs ;
|
||||||
|
|
||||||
|
@ -277,8 +287,10 @@ HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %not-vector cpu ( dst src rep -- )
|
HOOK: %not-vector cpu ( dst src rep -- )
|
||||||
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shl-vector-imm cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shr-vector-imm cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %horizontal-shl-vector-imm cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %horizontal-shr-vector-imm cpu ( dst src1 src2 rep -- )
|
||||||
|
|
||||||
HOOK: %integer>scalar cpu ( dst src rep -- )
|
HOOK: %integer>scalar cpu ( dst src rep -- )
|
||||||
HOOK: %scalar>integer cpu ( dst src rep -- )
|
HOOK: %scalar>integer cpu ( dst src rep -- )
|
||||||
|
@ -324,8 +336,10 @@ HOOK: %xor-vector-reps cpu ( -- reps )
|
||||||
HOOK: %not-vector-reps cpu ( -- reps )
|
HOOK: %not-vector-reps cpu ( -- reps )
|
||||||
HOOK: %shl-vector-reps cpu ( -- reps )
|
HOOK: %shl-vector-reps cpu ( -- reps )
|
||||||
HOOK: %shr-vector-reps cpu ( -- reps )
|
HOOK: %shr-vector-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
|
HOOK: %shl-vector-imm-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-shr-vector-reps cpu ( -- reps )
|
HOOK: %shr-vector-imm-reps cpu ( -- reps )
|
||||||
|
HOOK: %horizontal-shl-vector-imm-reps cpu ( -- reps )
|
||||||
|
HOOK: %horizontal-shr-vector-imm-reps cpu ( -- reps )
|
||||||
|
|
||||||
M: object %zero-vector-reps { } ;
|
M: object %zero-vector-reps { } ;
|
||||||
M: object %fill-vector-reps { } ;
|
M: object %fill-vector-reps { } ;
|
||||||
|
@ -366,8 +380,10 @@ M: object %xor-vector-reps { } ;
|
||||||
M: object %not-vector-reps { } ;
|
M: object %not-vector-reps { } ;
|
||||||
M: object %shl-vector-reps { } ;
|
M: object %shl-vector-reps { } ;
|
||||||
M: object %shr-vector-reps { } ;
|
M: object %shr-vector-reps { } ;
|
||||||
M: object %horizontal-shl-vector-reps { } ;
|
M: object %shl-vector-imm-reps { } ;
|
||||||
M: object %horizontal-shr-vector-reps { } ;
|
M: object %shr-vector-imm-reps { } ;
|
||||||
|
M: object %horizontal-shl-vector-imm-reps { } ;
|
||||||
|
M: object %horizontal-shr-vector-imm-reps { } ;
|
||||||
|
|
||||||
HOOK: %unbox-alien cpu ( dst src -- )
|
HOOK: %unbox-alien cpu ( dst src -- )
|
||||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||||
|
|
|
@ -1155,18 +1155,18 @@ M: x86 %horizontal-add-vector-reps
|
||||||
{ sse3? { float-4-rep double-2-rep } }
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
|
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
||||||
two-operand PSLLDQ ;
|
two-operand PSLLDQ ;
|
||||||
|
|
||||||
M: x86 %horizontal-shl-vector-reps
|
M: x86 %horizontal-shl-vector-imm-reps
|
||||||
{
|
{
|
||||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
|
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
|
||||||
two-operand PSRLDQ ;
|
two-operand PSRLDQ ;
|
||||||
|
|
||||||
M: x86 %horizontal-shr-vector-reps
|
M: x86 %horizontal-shr-vector-imm-reps
|
||||||
{
|
{
|
||||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
@ -1282,6 +1282,11 @@ M: x86 %shr-vector-reps
|
||||||
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
|
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %shl-vector-imm %shl-vector ;
|
||||||
|
M: x86 %shl-vector-imm-reps %shl-vector-reps ;
|
||||||
|
M: x86 %shr-vector-imm %shr-vector ;
|
||||||
|
M: x86 %shr-vector-imm-reps %shr-vector-reps ;
|
||||||
|
|
||||||
: scalar-sized-reg ( reg rep -- reg' )
|
: scalar-sized-reg ( reg rep -- reg' )
|
||||||
rep-size 8 * n-bit-version-of ;
|
rep-size 8 * n-bit-version-of ;
|
||||||
|
|
||||||
|
|
|
@ -181,8 +181,8 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-vnot) [ %xor-vector-reps ] }
|
{ \ (simd-vnot) [ %xor-vector-reps ] }
|
||||||
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
||||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
{ \ (simd-hlshift) [ %horizontal-shl-vector-imm-reps ] }
|
||||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
{ \ (simd-hrshift) [ %horizontal-shr-vector-imm-reps ] }
|
||||||
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
|
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
|
||||||
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
|
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
|
||||||
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
|
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
|
||||||
|
|
Loading…
Reference in New Issue