rename ##shuffle-vector to ##shuffle-vector-imm, and add a new ##shuffle-vector for dynamic shuffles. have vshuffle use ##shuffle-vector to do word and byte shuffles on x86
parent
5429b2132a
commit
3bc097f6ff
|
@ -277,6 +277,11 @@ literal: rep ;
|
|||
|
||||
PURE-INSN: ##shuffle-vector
|
||||
def: dst
|
||||
use: src shuffle
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##shuffle-vector-imm
|
||||
def: dst
|
||||
use: src
|
||||
literal: shuffle rep ;
|
||||
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||
sequences math.vectors.simd.intrinsics macros generalizations
|
||||
combinators combinators.short-circuit arrays locals
|
||||
USING: accessors alien byte-arrays fry cpu.architecture kernel math
|
||||
sequences math.vectors math.vectors.simd.intrinsics macros
|
||||
generalizations combinators combinators.short-circuit arrays locals
|
||||
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||
compiler.cfg.comparisons
|
||||
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.intrinsics.alien
|
||||
specialized-arrays ;
|
||||
FROM: alien.c-types => float double ;
|
||||
FROM: alien.c-types => heap-size char uchar float double ;
|
||||
SPECIALIZED-ARRAYS: float double ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
|
@ -21,7 +21,7 @@ MACRO: check-elements ( quots -- )
|
|||
|
||||
MACRO: if-literals-match ( quots -- )
|
||||
[ length ] [ ] [ length ] tri
|
||||
! n quots n n
|
||||
! n quots n
|
||||
'[
|
||||
! node quot
|
||||
[
|
||||
|
@ -75,17 +75,46 @@ MACRO: if-literals-match ( quots -- )
|
|||
ds-push
|
||||
] emit-vector-op ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
: variable-shuffle? ( obj -- ? )
|
||||
! the vshuffle intrinsic current doesn't allow variable shuffles
|
||||
drop f ;
|
||||
|
||||
: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ;
|
||||
|
||||
: (>variable-shuffle) ( shuffle rep -- shuffle )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
[ iota >byte-array ] bi
|
||||
'[ _ n*v _ v+ ] map concat ;
|
||||
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ;
|
||||
|
||||
: generate-shuffle-vector-imm? ( shuffle rep -- ? )
|
||||
{
|
||||
[ drop immediate-shuffle? ]
|
||||
[ nip %shuffle-vector-imm-reps member? ]
|
||||
} 2&& ;
|
||||
|
||||
: generate-shuffle-vector ( src shuffle rep -- dst )
|
||||
2dup generate-shuffle-vector-imm?
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[
|
||||
[ >variable-shuffle ^^load-constant ] keep
|
||||
^^shuffle-vector
|
||||
] if ;
|
||||
|
||||
: emit-shuffle-vector ( node -- )
|
||||
! Pad the permutation with zeroes if its too short, since we
|
||||
! Pad the permutation with zeroes if it's too short, since we
|
||||
! can't throw an error at this point.
|
||||
[ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param]
|
||||
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param]
|
||||
{ [ shuffle? ] [ representation? ] } if-literals-match ;
|
||||
|
||||
: ^^broadcast-vector ( src n rep -- dst )
|
||||
[ rep-components swap <array> ] keep
|
||||
^^shuffle-vector ;
|
||||
generate-shuffle-vector ;
|
||||
|
||||
: emit-broadcast-vector ( node -- )
|
||||
[ ^^broadcast-vector ] [unary/param]
|
||||
|
|
|
@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ;
|
|||
! Some lame constant folding for SIMD intrinsics. Eventually this
|
||||
! should be redone completely.
|
||||
|
||||
: rewrite-shuffle-vector ( insn expr -- insn' )
|
||||
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
|
||||
2dup [ rep>> ] bi@ eq? [
|
||||
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
|
||||
[ [ shuffle>> ] bi@ nths ]
|
||||
[ drop rep>> ]
|
||||
2tri \ ##shuffle-vector new-insn
|
||||
2tri \ ##shuffle-vector-imm new-insn
|
||||
] [ 2drop f ] if ;
|
||||
|
||||
: (fold-shuffle-vector) ( shuffle bytes -- bytes' )
|
||||
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
|
||||
2dup length swap length /i group nths concat ;
|
||||
|
||||
: fold-shuffle-vector ( insn expr -- insn' )
|
||||
: fold-shuffle-vector-imm ( insn expr -- insn' )
|
||||
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
|
||||
(fold-shuffle-vector) \ ##load-constant new-insn ;
|
||||
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
|
||||
|
||||
M: ##shuffle-vector rewrite
|
||||
M: ##shuffle-vector-imm rewrite
|
||||
dup src>> vreg>expr {
|
||||
{ [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector ] }
|
||||
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
[ 2drop f ]
|
||||
} cond ;
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ M: scalar>vector-expr simplify*
|
|||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: shuffle-vector-expr simplify*
|
||||
M: shuffle-vector-imm-expr simplify*
|
||||
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
|
||||
sequence= [ drop f ] unless ;
|
||||
|
||||
|
|
|
@ -1215,31 +1215,31 @@ cell 8 = [
|
|||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
@ -1253,7 +1253,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
|
||||
T{ ##scalar>vector f 1 0 int-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
@ -1267,7 +1267,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-constant f 0 1.25 }
|
||||
T{ ##scalar>vector f 1 0 float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
|
|
@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector
|
|||
CODEGEN: ##fill-vector %fill-vector
|
||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm
|
||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||
CODEGEN: ##tail>head-vector %tail>head-vector
|
||||
CODEGEN: ##merge-vector-head %merge-vector-head
|
||||
|
|
|
@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- )
|
|||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||
HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- )
|
||||
HOOK: %tail>head-vector cpu ( dst src rep -- )
|
||||
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
||||
|
@ -289,6 +290,7 @@ HOOK: %fill-vector-reps cpu ( -- reps )
|
|||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||
HOOK: %shuffle-vector-imm-reps cpu ( -- reps )
|
||||
HOOK: %merge-vector-reps cpu ( -- reps )
|
||||
HOOK: %signed-pack-vector-reps cpu ( -- reps )
|
||||
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
|
||||
|
@ -329,6 +331,7 @@ M: object %fill-vector-reps { } ;
|
|||
M: object %gather-vector-2-reps { } ;
|
||||
M: object %gather-vector-4-reps { } ;
|
||||
M: object %shuffle-vector-reps { } ;
|
||||
M: object %shuffle-vector-imm-reps { } ;
|
||||
M: object %merge-vector-reps { } ;
|
||||
M: object %signed-pack-vector-reps { } ;
|
||||
M: object %unsigned-pack-vector-reps { } ;
|
||||
|
|
|
@ -698,7 +698,7 @@ M: x86 %gather-vector-2-reps
|
|||
: longlong-2-shuffle ( dst shuffle -- )
|
||||
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
|
||||
|
||||
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
|
||||
dst src rep %copy
|
||||
dst shuffle rep unsign-rep {
|
||||
{ double-2-rep [ double-2-shuffle ] }
|
||||
|
@ -707,12 +707,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
|||
{ longlong-2-rep [ longlong-2-shuffle ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shuffle-vector-reps
|
||||
M: x86 %shuffle-vector-imm-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||
two-operand PSHUFB ;
|
||||
|
||||
M: x86 %shuffle-vector-reps
|
||||
{
|
||||
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %merge-vector-head
|
||||
[ two-operand ] keep
|
||||
unsign-rep {
|
||||
|
@ -790,8 +798,6 @@ M: x86 %unpack-vector-head-reps ( -- reps )
|
|||
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
|
||||
|
||||
M: x86 %integer>float-vector ( dst src rep -- )
|
||||
{
|
||||
{ int-4-rep [ CVTDQ2PS ] }
|
||||
|
@ -1037,10 +1043,6 @@ M: x86 %mul-vector-reps
|
|||
{ sse4.1? { int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-mul-vector-reps
|
||||
! No multiplication with saturation on x86
|
||||
{ } ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1223,8 +1225,6 @@ M: x86 %xor-vector-reps
|
|||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %not-vector-reps { } ;
|
||||
|
||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1271,6 +1271,29 @@ M:: x86 %scalar>integer ( dst src rep -- )
|
|||
{ uint-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
] }
|
||||
{ short-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVSX
|
||||
] }
|
||||
{ ushort-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVZX
|
||||
] }
|
||||
{ char-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVSX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
{ uchar-scalar-rep [
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVZX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86 %vector>scalar %copy ;
|
||||
|
|
|
@ -148,6 +148,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
|||
union
|
||||
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
|
||||
|
||||
: (%shuffle-reps) ( -- reps )
|
||||
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
|
@ -179,7 +182,7 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||
{ \ (simd-vshuffle) [ (%shuffle-reps) ] }
|
||||
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }
|
||||
|
|
Loading…
Reference in New Issue