math.vectors.simd.intrinsics: (simd-vshuffle2-elements) intrinsic that creates a vector by selecting elements from two input vectors. use ##shuffle-vector-halves-imm to implement for double-2s with SSE

db4
Joe Groff 2010-05-14 01:16:29 -07:00
parent 3766abd65e
commit 9f9e145819
3 changed files with 96 additions and 63 deletions

View File

@ -22,6 +22,7 @@ M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ;
M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ;
M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ;
M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ;
M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ;
M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ;
M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ;
M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ;
@ -84,6 +85,8 @@ MACRO: v-vector-op ( trials -- )
[ 1 2 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vl-vector-op ( trials -- )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vvl-vector-op ( trials -- )
[ 1 4 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-vector-op ( trials -- )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-cc-vector-op ( trials -- )
@ -118,9 +121,10 @@ MACRO: if-literals-match ( quots -- )
] [ 2drop bad-simd-intrinsic ] if
] ;
CONSTANT: [unary] [ ds-drop ds-pop ]
CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
CONSTANT: [binary] [ ds-drop 2inputs ]
CONSTANT: [unary] [ ds-drop ds-pop ]
CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
CONSTANT: [binary] [ ds-drop 2inputs ]
CONSTANT: [binary/param] [ [ -2 inc-d 2inputs ] dip ]
CONSTANT: [quaternary]
[
ds-drop
@ -141,6 +145,8 @@ MACRO: emit-vl-vector-op ( trials literal-pred -- )
[ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
MACRO: emit-vv-vector-op ( trials -- )
[binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO: emit-vvl-vector-op ( trials literal-pred -- )
[ [binary/param] [ vvl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
MACRO: emit-vvvv-vector-op ( trials -- )
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;

View File

@ -275,6 +275,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
[ rep-length 0 pad-tail ] keep {
{ double-2-rep [| src1 src2 shuffle rep |
shuffle first2 :> ( i j )
{
{ [ i j [ 2 < ] both? ] [
src1 shuffle rep ^shuffle-vector-imm
] }
{ [ i j [ 2 >= ] both? ] [
src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
] }
{ [ i 2 < ] [
src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
] }
! [ j 2 < ]
[ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
} cond
] }
} vvl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
@ -475,6 +495,11 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
[ ^shuffle-vector-imm ]
} [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle2-elements ( node -- )
{
[ ^shuffle-2-vectors-imm ]
} [ shuffle? ] emit-vvl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- )
{
[ ^^shuffle-vector ]
@ -605,65 +630,66 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
: enable-simd ( -- )
{
{ (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v*high) [ emit-simd-v*high ] }
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] }
{ assert-positive [ drop ] }
{ (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v*high) [ emit-simd-v*high ] }
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] }
{ assert-positive [ drop ] }
} enable-intrinsics ;
enable-simd

View File

@ -186,6 +186,8 @@ PRIVATE>
: (simd-hrshift) ( a n rep -- c )
drop tail-slice 16 0 pad-tail ;
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle2-elements) ( a b n rep -- c )
[ cord-append ] 2dip [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
:: (simd-vmerge-head) ( a b rep -- c )
a b rep 2>rep-array :> ( a' b' )
@ -252,4 +254,3 @@ PRIVATE>
"compiler.cfg.intrinsics.simd" require
"compiler.tree.propagation.simd" require
"compiler.cfg.value-numbering.simd" require