decompose %unpack-vector-head/tail into %compare-vector/%merge-vector-head/tail or %tail>head-vector/%unpack-vector-head insns when there isn't an actual unpack insn; get rid of fake x86 implementations
parent
fd6f370119
commit
f2c9eb79e2
|
@ -280,6 +280,11 @@ def: dst
|
|||
use: src
|
||||
literal: shuffle rep ;
|
||||
|
||||
PURE-INSN: ##tail>head-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##merge-vector-head
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
@ -303,13 +308,11 @@ literal: rep ;
|
|||
PURE-INSN: ##unpack-vector-head
|
||||
def: dst
|
||||
use: src
|
||||
temp: temp
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##unpack-vector-tail
|
||||
def: dst
|
||||
use: src
|
||||
temp: temp
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##integer>float-vector
|
||||
|
|
|
@ -200,8 +200,8 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ ^^unpack-vector-head ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ ^^unpack-vector-tail ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||
sequences math.vectors.simd.intrinsics macros generalizations
|
||||
combinators combinators.short-circuit arrays
|
||||
combinators combinators.short-circuit arrays locals
|
||||
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||
compiler.cfg.comparisons
|
||||
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.intrinsics.alien ;
|
||||
|
@ -121,3 +122,36 @@ MACRO: if-literals-match ( quots -- )
|
|||
[ ^^not-vector ]
|
||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
||||
|
||||
:: generate-unpack-vector-head ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep %unpack-vector-head-reps member? ]
|
||||
[ src rep ^^unpack-vector-head ]
|
||||
}
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
]
|
||||
} cond ;
|
||||
|
||||
:: generate-unpack-vector-tail ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep %unpack-vector-tail-reps member? ]
|
||||
[ src rep ^^unpack-vector-tail ]
|
||||
}
|
||||
{
|
||||
[ rep %unpack-vector-head-reps member? ]
|
||||
[
|
||||
src rep ^^tail>head-vector :> tail
|
||||
tail rep ^^unpack-vector-head
|
||||
]
|
||||
}
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
]
|
||||
} cond ;
|
||||
|
||||
|
|
|
@ -164,6 +164,7 @@ CODEGEN: ##fill-vector %fill-vector
|
|||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||
CODEGEN: ##tail>head-vector %tail>head-vector
|
||||
CODEGEN: ##merge-vector-head %merge-vector-head
|
||||
CODEGEN: ##merge-vector-tail %merge-vector-tail
|
||||
CODEGEN: ##signed-pack-vector %signed-pack-vector
|
||||
|
|
|
@ -230,12 +230,13 @@ HOOK: %fill-vector cpu ( dst rep -- )
|
|||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||
HOOK: %tail>head-vector cpu ( dst src rep -- )
|
||||
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %unpack-vector-head cpu ( dst src temp rep -- )
|
||||
HOOK: %unpack-vector-tail cpu ( dst src temp rep -- )
|
||||
HOOK: %unpack-vector-head cpu ( dst src rep -- )
|
||||
HOOK: %unpack-vector-tail cpu ( dst src rep -- )
|
||||
HOOK: %integer>float-vector cpu ( dst src rep -- )
|
||||
HOOK: %float>integer-vector cpu ( dst src rep -- )
|
||||
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
|
||||
|
@ -279,7 +280,8 @@ HOOK: %shuffle-vector-reps cpu ( -- reps )
|
|||
HOOK: %merge-vector-reps cpu ( -- reps )
|
||||
HOOK: %signed-pack-vector-reps cpu ( -- reps )
|
||||
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
|
||||
HOOK: %unpack-vector-reps cpu ( -- reps )
|
||||
HOOK: %unpack-vector-head-reps cpu ( -- reps )
|
||||
HOOK: %unpack-vector-tail-reps cpu ( -- reps )
|
||||
HOOK: %integer>float-vector-reps cpu ( -- reps )
|
||||
HOOK: %float>integer-vector-reps cpu ( -- reps )
|
||||
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
||||
|
|
|
@ -774,45 +774,32 @@ M: x86 %unsigned-pack-vector-reps
|
|||
{ sse4.1? { int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
:: %sign-extension-vector ( dst src rep -- )
|
||||
dst rep %zero-vector
|
||||
dst src rep {
|
||||
{ char-16-rep [ PCMPGTB ] }
|
||||
{ short-8-rep [ PCMPGTW ] }
|
||||
{ int-4-rep [ PCMPGTD ] }
|
||||
{ longlong-2-rep [ PCMPGTQ ] }
|
||||
M: x86 %tail>head-vector ( dst src rep -- )
|
||||
dup {
|
||||
{ float-4-rep [ drop MOVHLPS ] }
|
||||
{ double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] }
|
||||
[ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
|
||||
} case ;
|
||||
|
||||
:: (%unpack-vector-signs) ( dst src rep -- )
|
||||
dst rep signed-int-vector-rep?
|
||||
[ src rep %sign-extension-vector ]
|
||||
[ rep %zero-vector ] if ;
|
||||
|
||||
M:: x86 %unpack-vector-head ( dst src temp rep -- )
|
||||
temp src rep (%unpack-vector-signs)
|
||||
dst src rep %copy
|
||||
dst temp rep unsign-rep {
|
||||
{ char-16-rep [ PUNPCKLBW ] }
|
||||
{ short-8-rep [ PUNPCKLWD ] }
|
||||
{ int-4-rep [ PUNPCKLDQ ] }
|
||||
{ longlong-2-rep [ PUNPCKLQDQ ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %unpack-vector-tail ( dst src temp rep -- )
|
||||
temp src rep (%unpack-vector-signs)
|
||||
dst src rep %copy
|
||||
dst temp rep unsign-rep {
|
||||
{ char-16-rep [ PUNPCKHBW ] }
|
||||
{ short-8-rep [ PUNPCKHWD ] }
|
||||
{ int-4-rep [ PUNPCKHDQ ] }
|
||||
{ longlong-2-rep [ PUNPCKHQDQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %unpack-vector-reps ( -- reps )
|
||||
M: x86 %unpack-vector-head ( dst src rep -- )
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
{ char-16-rep [ PMOVSXBW ] }
|
||||
{ uchar-16-rep [ PMOVZXBW ] }
|
||||
{ short-8-rep [ PMOVSXWD ] }
|
||||
{ ushort-8-rep [ PMOVZXWD ] }
|
||||
{ int-4-rep [ PMOVSXDQ ] }
|
||||
{ uint-4-rep [ PMOVZXDQ ] }
|
||||
{ float-4-rep [ CVTPS2PD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %unpack-vector-head-reps ( -- reps )
|
||||
{
|
||||
{ sse2? { float-4-rep } }
|
||||
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
|
||||
|
||||
M: x86 %integer>float-vector ( dst src rep -- )
|
||||
{
|
||||
{ int-4-rep [ CVTDQ2PS ] }
|
||||
|
|
|
@ -4,7 +4,7 @@ USING: alien alien.c-types alien.data assocs combinators
|
|||
cpu.architecture compiler.cfg.comparisons fry generalizations
|
||||
kernel libc macros math
|
||||
math.vectors.conversion.backend
|
||||
sequences effects accessors namespaces
|
||||
sequences sets effects accessors namespaces
|
||||
lexer parser vocabs.parser words arrays math.vectors ;
|
||||
IN: math.vectors.simd.intrinsics
|
||||
|
||||
|
@ -137,6 +137,10 @@ MACRO: (simd-boa) ( rep -- quot )
|
|||
|
||||
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||
|
||||
: (%unpack-reps) ( -- reps )
|
||||
%merge-vector-reps [ int-vector-rep? ] filter
|
||||
%unpack-vector-head-reps union ;
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
|
@ -174,8 +178,8 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
|
||||
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
|
||||
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
||||
{ \ (simd-(vunpack-head)) [ %unpack-vector-reps ] }
|
||||
{ \ (simd-(vunpack-tail)) [ %unpack-vector-reps ] }
|
||||
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
|
||||
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
|
||||
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
||||
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
||||
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
||||
|
|
Loading…
Reference in New Issue