math.vectors.simd.intrinsics: (simd-vshuffle2-elements) intrinsic that creates a vector by selecting elements from two input vectors. use ##shuffle-vector-halves-imm to implement for double-2s with SSE

db4
Joe Groff 2010-05-14 01:16:29 -07:00
parent 3766abd65e
commit 9f9e145819
3 changed files with 96 additions and 63 deletions

View File

@ -22,6 +22,7 @@ M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ;
M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ;
M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ;
M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ;
M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ;
M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ;
M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ;
M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ;
@ -84,6 +85,8 @@ MACRO: v-vector-op ( trials -- )
[ 1 2 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vl-vector-op ( trials -- )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vvl-vector-op ( trials -- )
[ 1 4 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-vector-op ( trials -- )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-cc-vector-op ( trials -- )
@ -121,6 +124,7 @@ MACRO: if-literals-match ( quots -- )
CONSTANT: [unary] [ ds-drop ds-pop ]
CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
CONSTANT: [binary] [ ds-drop 2inputs ]
CONSTANT: [binary/param] [ [ -2 inc-d 2inputs ] dip ]
CONSTANT: [quaternary]
[
ds-drop
@ -141,6 +145,8 @@ MACRO: emit-vl-vector-op ( trials literal-pred -- )
[ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
MACRO: emit-vv-vector-op ( trials -- )
[binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO: emit-vvl-vector-op ( trials literal-pred -- )
[ [binary/param] [ vvl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
MACRO: emit-vvvv-vector-op ( trials -- )
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;

View File

@ -275,6 +275,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
[ rep-length 0 pad-tail ] keep {
{ double-2-rep [| src1 src2 shuffle rep |
shuffle first2 :> ( i j )
{
{ [ i j [ 2 < ] both? ] [
src1 shuffle rep ^shuffle-vector-imm
] }
{ [ i j [ 2 >= ] both? ] [
src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
] }
{ [ i 2 < ] [
src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
] }
! [ j 2 < ]
[ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
} cond
] }
} vvl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
@ -475,6 +495,11 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
[ ^shuffle-vector-imm ]
} [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle2-elements ( node -- )
{
[ ^shuffle-2-vectors-imm ]
} [ shuffle? ] emit-vvl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- )
{
[ ^^shuffle-vector ]
@ -639,6 +664,7 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }

View File

@ -186,6 +186,8 @@ PRIVATE>
: (simd-hrshift) ( a n rep -- c )
drop tail-slice 16 0 pad-tail ;
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle2-elements) ( a b n rep -- c )
[ cord-append ] 2dip [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
:: (simd-vmerge-head) ( a b rep -- c )
a b rep 2>rep-array :> ( a' b' )
@ -252,4 +254,3 @@ PRIVATE>
"compiler.cfg.intrinsics.simd" require
"compiler.tree.propagation.simd" require
"compiler.cfg.value-numbering.simd" require