From 9f9e14581919c56d35040346cf0ee2a91068945e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Fri, 14 May 2010 01:16:29 -0700 Subject: [PATCH] math.vectors.simd.intrinsics: (simd-vshuffle2-elements) intrinsic that creates a vector by selecting elements from two input vectors. use ##shuffle-vector-halves-imm to implement for double-2s with SSE --- .../intrinsics/simd/backend/backend.factor | 12 +- .../compiler/cfg/intrinsics/simd/simd.factor | 144 +++++++++++------- .../vectors/simd/intrinsics/intrinsics.factor | 3 +- 3 files changed, 96 insertions(+), 63 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index d9f3df000f..34e238bc81 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -22,6 +22,7 @@ M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ; M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ; M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ; M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ; +M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ; M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ; M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ; M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ; @@ -84,6 +85,8 @@ MACRO: v-vector-op ( trials -- ) [ 1 2 >vector-op-cond ] map '[ f f _ cond ] ; MACRO: vl-vector-op ( trials -- ) [ 1 3 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vvl-vector-op ( trials -- ) + [ 1 4 >vector-op-cond ] map '[ f f _ cond ] ; MACRO: vv-vector-op ( trials -- ) [ 1 3 >vector-op-cond ] map '[ f f _ cond ] ; MACRO: vv-cc-vector-op ( trials -- ) @@ -118,9 +121,10 @@ MACRO: if-literals-match ( quots -- ) ] [ 2drop bad-simd-intrinsic ] if ] ; -CONSTANT: [unary] [ ds-drop ds-pop ] -CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ] -CONSTANT: [binary] [ ds-drop 2inputs ] +CONSTANT: [unary] [ ds-drop ds-pop ] +CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ] +CONSTANT: [binary] [ ds-drop 2inputs ] +CONSTANT: [binary/param] [ [ -2 inc-d 2inputs ] dip ] CONSTANT: [quaternary] [ ds-drop @@ -141,6 +145,8 @@ MACRO: emit-vl-vector-op ( trials literal-pred -- ) [ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ; MACRO: emit-vv-vector-op ( trials -- ) [binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ; +MACRO: emit-vvl-vector-op ( trials literal-pred -- ) + [ [binary/param] [ vvl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ; MACRO: emit-vvvv-vector-op ( trials -- ) [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index a64aa828d0..c4a2d41c91 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -275,6 +275,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ] } vl-vector-op ; +: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst ) + [ rep-length 0 pad-tail ] keep { + { double-2-rep [| src1 src2 shuffle rep | + shuffle first2 :> ( i j ) + { + { [ i j [ 2 < ] both? ] [ + src1 shuffle rep ^shuffle-vector-imm + ] } + { [ i j [ 2 >= ] both? ] [ + src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm + ] } + { [ i 2 < ] [ + src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm + ] } + ! [ j 2 < ] + [ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ] + } cond + ] } + } vvl-vector-op ; + : ^broadcast-vector ( src n rep -- dst ) [ rep-length swap ] keep ^shuffle-vector-imm ; @@ -475,6 +495,11 @@ PREDICATE: fixnum-vector-rep < int-vector-rep [ ^shuffle-vector-imm ] } [ shuffle? ] emit-vl-vector-op ; +: emit-simd-vshuffle2-elements ( node -- ) + { + [ ^shuffle-2-vectors-imm ] + } [ shuffle? ] emit-vvl-vector-op ; + : emit-simd-vshuffle-bytes ( node -- ) { [ ^^shuffle-vector ] @@ -605,65 +630,66 @@ PREDICATE: fixnum-vector-rep < int-vector-rep : enable-simd ( -- ) { - { (simd-v+) [ emit-simd-v+ ] } - { (simd-v-) [ emit-simd-v- ] } - { (simd-vneg) [ emit-simd-vneg ] } - { (simd-v+-) [ emit-simd-v+- ] } - { (simd-vs+) [ emit-simd-vs+ ] } - { (simd-vs-) [ emit-simd-vs- ] } - { (simd-vs*) [ emit-simd-vs* ] } - { (simd-v*) [ emit-simd-v* ] } - { (simd-v*high) [ emit-simd-v*high ] } - { (simd-v*hs+) [ emit-simd-v*hs+ ] } - { (simd-v/) [ emit-simd-v/ ] } - { (simd-vmin) [ emit-simd-vmin ] } - { (simd-vmax) [ emit-simd-vmax ] } - { (simd-vavg) [ emit-simd-vavg ] } - { (simd-v.) [ emit-simd-v. ] } - { (simd-vsad) [ emit-simd-vsad ] } - { (simd-vsqrt) [ emit-simd-vsqrt ] } - { (simd-sum) [ emit-simd-sum ] } - { (simd-vabs) [ emit-simd-vabs ] } - { (simd-vbitand) [ emit-simd-vand ] } - { (simd-vbitandn) [ emit-simd-vandn ] } - { (simd-vbitor) [ emit-simd-vor ] } - { (simd-vbitxor) [ emit-simd-vxor ] } - { (simd-vbitnot) [ emit-simd-vnot ] } - { (simd-vand) [ emit-simd-vand ] } - { (simd-vandn) [ emit-simd-vandn ] } - { (simd-vor) [ emit-simd-vor ] } - { (simd-vxor) [ emit-simd-vxor ] } - { (simd-vnot) [ emit-simd-vnot ] } - { (simd-vlshift) [ emit-simd-vlshift ] } - { (simd-vrshift) [ emit-simd-vrshift ] } - { (simd-hlshift) [ emit-simd-hlshift ] } - { (simd-hrshift) [ emit-simd-hrshift ] } - { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } - { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } - { (simd-vmerge-head) [ emit-simd-vmerge-head ] } - { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } - { (simd-v<=) [ emit-simd-v<= ] } - { (simd-v<) [ emit-simd-v< ] } - { (simd-v=) [ emit-simd-v= ] } - { (simd-v>) [ emit-simd-v> ] } - { (simd-v>=) [ emit-simd-v>= ] } - { (simd-vunordered?) [ emit-simd-vunordered? ] } - { (simd-vany?) [ emit-simd-vany? ] } - { (simd-vall?) [ emit-simd-vall? ] } - { (simd-vnone?) [ emit-simd-vnone? ] } - { (simd-v>float) [ emit-simd-v>float ] } - { (simd-v>integer) [ emit-simd-v>integer ] } - { (simd-vpack-signed) [ emit-simd-vpack-signed ] } - { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } - { (simd-vunpack-head) [ emit-simd-vunpack-head ] } - { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } - { (simd-with) [ emit-simd-with ] } - { (simd-gather-2) [ emit-simd-gather-2 ] } - { (simd-gather-4) [ emit-simd-gather-4 ] } - { (simd-select) [ emit-simd-select ] } - { alien-vector [ emit-alien-vector ] } - { set-alien-vector [ emit-set-alien-vector ] } - { assert-positive [ drop ] } + { (simd-v+) [ emit-simd-v+ ] } + { (simd-v-) [ emit-simd-v- ] } + { (simd-vneg) [ emit-simd-vneg ] } + { (simd-v+-) [ emit-simd-v+- ] } + { (simd-vs+) [ emit-simd-vs+ ] } + { (simd-vs-) [ emit-simd-vs- ] } + { (simd-vs*) [ emit-simd-vs* ] } + { (simd-v*) [ emit-simd-v* ] } + { (simd-v*high) [ emit-simd-v*high ] } + { (simd-v*hs+) [ emit-simd-v*hs+ ] } + { (simd-v/) [ emit-simd-v/ ] } + { (simd-vmin) [ emit-simd-vmin ] } + { (simd-vmax) [ emit-simd-vmax ] } + { (simd-vavg) [ emit-simd-vavg ] } + { (simd-v.) [ emit-simd-v. ] } + { (simd-vsad) [ emit-simd-vsad ] } + { (simd-vsqrt) [ emit-simd-vsqrt ] } + { (simd-sum) [ emit-simd-sum ] } + { (simd-vabs) [ emit-simd-vabs ] } + { (simd-vbitand) [ emit-simd-vand ] } + { (simd-vbitandn) [ emit-simd-vandn ] } + { (simd-vbitor) [ emit-simd-vor ] } + { (simd-vbitxor) [ emit-simd-vxor ] } + { (simd-vbitnot) [ emit-simd-vnot ] } + { (simd-vand) [ emit-simd-vand ] } + { (simd-vandn) [ emit-simd-vandn ] } + { (simd-vor) [ emit-simd-vor ] } + { (simd-vxor) [ emit-simd-vxor ] } + { (simd-vnot) [ emit-simd-vnot ] } + { (simd-vlshift) [ emit-simd-vlshift ] } + { (simd-vrshift) [ emit-simd-vrshift ] } + { (simd-hlshift) [ emit-simd-hlshift ] } + { (simd-hrshift) [ emit-simd-hrshift ] } + { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } + { (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] } + { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } + { (simd-vmerge-head) [ emit-simd-vmerge-head ] } + { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } + { (simd-v<=) [ emit-simd-v<= ] } + { (simd-v<) [ emit-simd-v< ] } + { (simd-v=) [ emit-simd-v= ] } + { (simd-v>) [ emit-simd-v> ] } + { (simd-v>=) [ emit-simd-v>= ] } + { (simd-vunordered?) [ emit-simd-vunordered? ] } + { (simd-vany?) [ emit-simd-vany? ] } + { (simd-vall?) [ emit-simd-vall? ] } + { (simd-vnone?) [ emit-simd-vnone? ] } + { (simd-v>float) [ emit-simd-v>float ] } + { (simd-v>integer) [ emit-simd-v>integer ] } + { (simd-vpack-signed) [ emit-simd-vpack-signed ] } + { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } + { (simd-vunpack-head) [ emit-simd-vunpack-head ] } + { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } + { (simd-with) [ emit-simd-with ] } + { (simd-gather-2) [ emit-simd-gather-2 ] } + { (simd-gather-4) [ emit-simd-gather-4 ] } + { (simd-select) [ emit-simd-select ] } + { alien-vector [ emit-alien-vector ] } + { set-alien-vector [ emit-set-alien-vector ] } + { assert-positive [ drop ] } } enable-intrinsics ; enable-simd diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index d80755a6a5..d4a60452dd 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -186,6 +186,8 @@ PRIVATE> : (simd-hrshift) ( a n rep -- c ) drop tail-slice 16 0 pad-tail ; : (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ; +: (simd-vshuffle2-elements) ( a b n rep -- c ) + [ cord-append ] 2dip [ rep-length 0 pad-tail ] keep (vshuffle) ; : (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ; :: (simd-vmerge-head) ( a b rep -- c ) a b rep 2>rep-array :> ( a' b' ) @@ -252,4 +254,3 @@ PRIVATE> "compiler.cfg.intrinsics.simd" require "compiler.tree.propagation.simd" require "compiler.cfg.value-numbering.simd" require -