decompose %unpack-vector-head/tail into %compare-vector/%merge-vector-head/tail or %tail>head-vector/%unpack-vector-head insns when there isn't an actual unpack insn; get rid of fake x86 implementations

db4
Joe Groff 2009-10-07 14:09:46 -05:00
parent fd6f370119
commit f2c9eb79e2
7 changed files with 76 additions and 45 deletions

View File

@ -280,6 +280,11 @@ def: dst
use: src use: src
literal: shuffle rep ; literal: shuffle rep ;
PURE-INSN: ##tail>head-vector
def: dst
use: src
literal: rep ;
PURE-INSN: ##merge-vector-head PURE-INSN: ##merge-vector-head
def: dst def: dst
use: src1 src2 use: src1 src2
@ -303,13 +308,11 @@ literal: rep ;
PURE-INSN: ##unpack-vector-head PURE-INSN: ##unpack-vector-head
def: dst def: dst
use: src use: src
temp: temp
literal: rep ; literal: rep ;
PURE-INSN: ##unpack-vector-tail PURE-INSN: ##unpack-vector-tail
def: dst def: dst
use: src use: src
temp: temp
literal: rep ; literal: rep ;
PURE-INSN: ##integer>float-vector PURE-INSN: ##integer>float-vector

View File

@ -200,8 +200,8 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ ^^unpack-vector-head ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ ^^unpack-vector-tail ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }

View File

@ -2,8 +2,9 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel math USING: accessors byte-arrays fry cpu.architecture kernel math
sequences math.vectors.simd.intrinsics macros generalizations sequences math.vectors.simd.intrinsics macros generalizations
combinators combinators.short-circuit arrays combinators combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.tree.propagation.info compiler.cfg.builder.blocks
compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien ; compiler.cfg.intrinsics.alien ;
@ -121,3 +122,36 @@ MACRO: if-literals-match ( quots -- )
[ ^^not-vector ] [ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
:: generate-unpack-vector-head ( src rep -- dst )
{
{
[ rep %unpack-vector-head-reps member? ]
[ src rep ^^unpack-vector-head ]
}
[
rep ^^zero-vector :> zero
zero src rep cc> ^^compare-vector :> sign
src sign rep ^^merge-vector-head
]
} cond ;
:: generate-unpack-vector-tail ( src rep -- dst )
{
{
[ rep %unpack-vector-tail-reps member? ]
[ src rep ^^unpack-vector-tail ]
}
{
[ rep %unpack-vector-head-reps member? ]
[
src rep ^^tail>head-vector :> tail
tail rep ^^unpack-vector-head
]
}
[
rep ^^zero-vector :> zero
zero src rep cc> ^^compare-vector :> sign
src sign rep ^^merge-vector-tail
]
} cond ;

View File

@ -164,6 +164,7 @@ CODEGEN: ##fill-vector %fill-vector
CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##shuffle-vector %shuffle-vector
CODEGEN: ##tail>head-vector %tail>head-vector
CODEGEN: ##merge-vector-head %merge-vector-head CODEGEN: ##merge-vector-head %merge-vector-head
CODEGEN: ##merge-vector-tail %merge-vector-tail CODEGEN: ##merge-vector-tail %merge-vector-tail
CODEGEN: ##signed-pack-vector %signed-pack-vector CODEGEN: ##signed-pack-vector %signed-pack-vector

View File

@ -230,12 +230,13 @@ HOOK: %fill-vector cpu ( dst rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
HOOK: %tail>head-vector cpu ( dst src rep -- )
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- ) HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- )
HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- ) HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- )
HOOK: %unpack-vector-head cpu ( dst src temp rep -- ) HOOK: %unpack-vector-head cpu ( dst src rep -- )
HOOK: %unpack-vector-tail cpu ( dst src temp rep -- ) HOOK: %unpack-vector-tail cpu ( dst src rep -- )
HOOK: %integer>float-vector cpu ( dst src rep -- ) HOOK: %integer>float-vector cpu ( dst src rep -- )
HOOK: %float>integer-vector cpu ( dst src rep -- ) HOOK: %float>integer-vector cpu ( dst src rep -- )
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
@ -279,7 +280,8 @@ HOOK: %shuffle-vector-reps cpu ( -- reps )
HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps )
HOOK: %signed-pack-vector-reps cpu ( -- reps ) HOOK: %signed-pack-vector-reps cpu ( -- reps )
HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
HOOK: %unpack-vector-reps cpu ( -- reps ) HOOK: %unpack-vector-head-reps cpu ( -- reps )
HOOK: %unpack-vector-tail-reps cpu ( -- reps )
HOOK: %integer>float-vector-reps cpu ( -- reps ) HOOK: %integer>float-vector-reps cpu ( -- reps )
HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %float>integer-vector-reps cpu ( -- reps )
HOOK: %compare-vector-reps cpu ( cc -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps )

View File

@ -774,45 +774,32 @@ M: x86 %unsigned-pack-vector-reps
{ sse4.1? { int-4-rep } } { sse4.1? { int-4-rep } }
} available-reps ; } available-reps ;
:: %sign-extension-vector ( dst src rep -- ) M: x86 %tail>head-vector ( dst src rep -- )
dst rep %zero-vector dup {
dst src rep { { float-4-rep [ drop MOVHLPS ] }
{ char-16-rep [ PCMPGTB ] } { double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] }
{ short-8-rep [ PCMPGTW ] } [ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
{ int-4-rep [ PCMPGTD ] }
{ longlong-2-rep [ PCMPGTQ ] }
} case ; } case ;
:: (%unpack-vector-signs) ( dst src rep -- ) M: x86 %unpack-vector-head ( dst src rep -- )
dst rep signed-int-vector-rep?
[ src rep %sign-extension-vector ]
[ rep %zero-vector ] if ;
M:: x86 %unpack-vector-head ( dst src temp rep -- )
temp src rep (%unpack-vector-signs)
dst src rep %copy
dst temp rep unsign-rep {
{ char-16-rep [ PUNPCKLBW ] }
{ short-8-rep [ PUNPCKLWD ] }
{ int-4-rep [ PUNPCKLDQ ] }
{ longlong-2-rep [ PUNPCKLQDQ ] }
} case ;
M:: x86 %unpack-vector-tail ( dst src temp rep -- )
temp src rep (%unpack-vector-signs)
dst src rep %copy
dst temp rep unsign-rep {
{ char-16-rep [ PUNPCKHBW ] }
{ short-8-rep [ PUNPCKHWD ] }
{ int-4-rep [ PUNPCKHDQ ] }
{ longlong-2-rep [ PUNPCKHQDQ ] }
} case ;
M: x86 %unpack-vector-reps ( -- reps )
{ {
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } { char-16-rep [ PMOVSXBW ] }
{ uchar-16-rep [ PMOVZXBW ] }
{ short-8-rep [ PMOVSXWD ] }
{ ushort-8-rep [ PMOVZXWD ] }
{ int-4-rep [ PMOVSXDQ ] }
{ uint-4-rep [ PMOVZXDQ ] }
{ float-4-rep [ CVTPS2PD ] }
} case ;
M: x86 %unpack-vector-head-reps ( -- reps )
{
{ sse2? { float-4-rep } }
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
M: x86 %integer>float-vector ( dst src rep -- ) M: x86 %integer>float-vector ( dst src rep -- )
{ {
{ int-4-rep [ CVTDQ2PS ] } { int-4-rep [ CVTDQ2PS ] }

View File

@ -4,7 +4,7 @@ USING: alien alien.c-types alien.data assocs combinators
cpu.architecture compiler.cfg.comparisons fry generalizations cpu.architecture compiler.cfg.comparisons fry generalizations
kernel libc macros math kernel libc macros math
math.vectors.conversion.backend math.vectors.conversion.backend
sequences effects accessors namespaces sequences sets effects accessors namespaces
lexer parser vocabs.parser words arrays math.vectors ; lexer parser vocabs.parser words arrays math.vectors ;
IN: math.vectors.simd.intrinsics IN: math.vectors.simd.intrinsics
@ -137,6 +137,10 @@ MACRO: (simd-boa) ( rep -- quot )
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
: (%unpack-reps) ( -- reps )
%merge-vector-reps [ int-vector-rep? ] filter
%unpack-vector-head-reps union ;
M: vector-rep supported-simd-op? M: vector-rep supported-simd-op?
{ {
{ \ (simd-v+) [ %add-vector-reps ] } { \ (simd-v+) [ %add-vector-reps ] }
@ -174,8 +178,8 @@ M: vector-rep supported-simd-op?
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] } { \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] } { \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] } { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
{ \ (simd-(vunpack-head)) [ %unpack-vector-reps ] } { \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
{ \ (simd-(vunpack-tail)) [ %unpack-vector-reps ] } { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] } { \ (simd-v<=) [ cc<= %compare-vector-reps ] }
{ \ (simd-v<) [ cc< %compare-vector-reps ] } { \ (simd-v<) [ cc< %compare-vector-reps ] }
{ \ (simd-v=) [ cc= %compare-vector-reps ] } { \ (simd-v=) [ cc= %compare-vector-reps ] }