rename ##shuffle-vector to ##shuffle-vector-imm, and add a new ##shuffle-vector for dynamic shuffles. have vshuffle use ##shuffle-vector to do word and byte shuffles on x86

db4
Joe Groff 2009-10-09 20:46:52 -05:00
parent 5429b2132a
commit 3bc097f6ff
9 changed files with 105 additions and 41 deletions

View File

@ -277,6 +277,11 @@ literal: rep ;
PURE-INSN: ##shuffle-vector
def: dst
use: src shuffle
literal: rep ;
PURE-INSN: ##shuffle-vector-imm
def: dst
use: src
literal: shuffle rep ;

View File

@ -1,15 +1,15 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel math
sequences math.vectors.simd.intrinsics macros generalizations
combinators combinators.short-circuit arrays locals
USING: accessors alien byte-arrays fry cpu.architecture kernel math
sequences math.vectors math.vectors.simd.intrinsics macros
generalizations combinators combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks
compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien
specialized-arrays ;
FROM: alien.c-types => float double ;
FROM: alien.c-types => heap-size char uchar float double ;
SPECIALIZED-ARRAYS: float double ;
IN: compiler.cfg.intrinsics.simd
@ -21,7 +21,7 @@ MACRO: check-elements ( quots -- )
MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri
! n quots n n
! n quots n
'[
! node quot
[
@ -75,17 +75,46 @@ MACRO: if-literals-match ( quots -- )
ds-push
] emit-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: variable-shuffle? ( obj -- ? )
! the vshuffle intrinsic current doesn't allow variable shuffles
drop f ;
: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ;
: (>variable-shuffle) ( shuffle rep -- shuffle )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ iota >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: >variable-shuffle ( shuffle rep -- shuffle' )
over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ;
: generate-shuffle-vector-imm? ( shuffle rep -- ? )
{
[ drop immediate-shuffle? ]
[ nip %shuffle-vector-imm-reps member? ]
} 2&& ;
: generate-shuffle-vector ( src shuffle rep -- dst )
2dup generate-shuffle-vector-imm?
[ ^^shuffle-vector-imm ]
[
[ >variable-shuffle ^^load-constant ] keep
^^shuffle-vector
] if ;
: emit-shuffle-vector ( node -- )
! Pad the permutation with zeroes if its too short, since we
! Pad the permutation with zeroes if it's too short, since we
! can't throw an error at this point.
[ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param]
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param]
{ [ shuffle? ] [ representation? ] } if-literals-match ;
: ^^broadcast-vector ( src n rep -- dst )
[ rep-components swap <array> ] keep
^^shuffle-vector ;
generate-shuffle-vector ;
: emit-broadcast-vector ( node -- )
[ ^^broadcast-vector ] [unary/param]

View File

@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ;
! Some lame constant folding for SIMD intrinsics. Eventually this
! should be redone completely.
: rewrite-shuffle-vector ( insn expr -- insn' )
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
2dup [ rep>> ] bi@ eq? [
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
[ [ shuffle>> ] bi@ nths ]
[ drop rep>> ]
2tri \ ##shuffle-vector new-insn
2tri \ ##shuffle-vector-imm new-insn
] [ 2drop f ] if ;
: (fold-shuffle-vector) ( shuffle bytes -- bytes' )
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
2dup length swap length /i group nths concat ;
: fold-shuffle-vector ( insn expr -- insn' )
: fold-shuffle-vector-imm ( insn expr -- insn' )
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
(fold-shuffle-vector) \ ##load-constant new-insn ;
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
M: ##shuffle-vector rewrite
M: ##shuffle-vector-imm rewrite
dup src>> vreg>expr {
{ [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector ] }
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
[ 2drop f ]
} cond ;

View File

@ -136,7 +136,7 @@ M: scalar>vector-expr simplify*
[ drop f ]
} cond ;
M: shuffle-vector-expr simplify*
M: shuffle-vector-imm-expr simplify*
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
sequence= [ drop f ] unless ;

View File

@ -1215,31 +1215,31 @@ cell 8 = [
}
] [
{
T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep }
} value-numbering-step
] unit-test
[
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep }
}
] [
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep }
} value-numbering-step
] unit-test
[
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
}
] [
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
} value-numbering-step
] unit-test
@ -1253,7 +1253,7 @@ cell 8 = [
{
T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
T{ ##scalar>vector f 1 0 int-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step
] unit-test
@ -1267,7 +1267,7 @@ cell 8 = [
{
T{ ##load-constant f 0 1.25 }
T{ ##scalar>vector f 1 0 float-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step
] unit-test

View File

@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector
CODEGEN: ##fill-vector %fill-vector
CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm
CODEGEN: ##shuffle-vector %shuffle-vector
CODEGEN: ##tail>head-vector %tail>head-vector
CODEGEN: ##merge-vector-head %merge-vector-head

View File

@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- )
HOOK: %tail>head-vector cpu ( dst src rep -- )
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
@ -289,6 +290,7 @@ HOOK: %fill-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %shuffle-vector-reps cpu ( -- reps )
HOOK: %shuffle-vector-imm-reps cpu ( -- reps )
HOOK: %merge-vector-reps cpu ( -- reps )
HOOK: %signed-pack-vector-reps cpu ( -- reps )
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
@ -329,6 +331,7 @@ M: object %fill-vector-reps { } ;
M: object %gather-vector-2-reps { } ;
M: object %gather-vector-4-reps { } ;
M: object %shuffle-vector-reps { } ;
M: object %shuffle-vector-imm-reps { } ;
M: object %merge-vector-reps { } ;
M: object %signed-pack-vector-reps { } ;
M: object %unsigned-pack-vector-reps { } ;

View File

@ -698,7 +698,7 @@ M: x86 %gather-vector-2-reps
: longlong-2-shuffle ( dst shuffle -- )
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
dst shuffle rep unsign-rep {
{ double-2-rep [ double-2-shuffle ] }
@ -707,12 +707,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
{ longlong-2-rep [ longlong-2-shuffle ] }
} case ;
M: x86 %shuffle-vector-reps
M: x86 %shuffle-vector-imm-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %shuffle-vector ( dst src shuffle rep -- )
two-operand PSHUFB ;
M: x86 %shuffle-vector-reps
{
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %merge-vector-head
[ two-operand ] keep
unsign-rep {
@ -790,8 +798,6 @@ M: x86 %unpack-vector-head-reps ( -- reps )
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
M: x86 %integer>float-vector ( dst src rep -- )
{
{ int-4-rep [ CVTDQ2PS ] }
@ -1037,10 +1043,6 @@ M: x86 %mul-vector-reps
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
@ -1223,8 +1225,6 @@ M: x86 %xor-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %not-vector-reps { } ;
M: x86 %shl-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
@ -1271,6 +1271,29 @@ M:: x86 %scalar>integer ( dst src rep -- )
{ uint-scalar-rep [
dst 32-bit-version-of src MOVD
] }
{ short-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVSX
] }
{ ushort-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVZX
] }
{ char-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVSX
dst tmp-dst int-rep %copy
] with-small-register
] }
{ uchar-scalar-rep [
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVZX
dst tmp-dst int-rep %copy
] with-small-register
] }
} case ;
M: x86 %vector>scalar %copy ;

View File

@ -148,6 +148,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
union
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
: (%shuffle-reps) ( -- reps )
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
@ -179,7 +182,7 @@ M: vector-rep supported-simd-op?
{ \ (simd-vrshift) [ %shr-vector-reps ] }
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
{ \ (simd-vshuffle) [ (%shuffle-reps) ] }
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }