simd intrinsic implementation for v*high, v*hs+, vavg, and vsad
							parent
							
								
									1845915dc6
								
							
						
					
					
						commit
						104c29aabc
					
				| 
						 | 
				
			
			@ -382,6 +382,16 @@ def: dst
 | 
			
		|||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##mul-high-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##mul-horizontal-add-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##saturated-mul-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
| 
						 | 
				
			
			@ -402,11 +412,21 @@ def: dst
 | 
			
		|||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##avg-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##dot-vector
 | 
			
		||||
def: dst/scalar-rep
 | 
			
		||||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##sad-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##horizontal-add-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -57,6 +57,12 @@ IN: compiler.cfg.intrinsics.simd
 | 
			
		|||
        { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
: ^load-half-vector ( rep -- dst )
 | 
			
		||||
    {
 | 
			
		||||
        { float-4-rep  [ float-array{  0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
 | 
			
		||||
        { double-2-rep [ double-array{ 0.5 0.5 }         underlying>> ^^load-constant ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
: >variable-shuffle ( shuffle rep -- shuffle' )
 | 
			
		||||
    rep-component-type heap-size
 | 
			
		||||
    [ dup <repetition> >byte-array ]
 | 
			
		||||
| 
						 | 
				
			
			@ -336,6 +342,16 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
 | 
			
		|||
        [ ^^mul-vector ]
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-v*high ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^mul-high-vector ]
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-v*hs+ ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^mul-horizontal-add-vector ]
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-v/ ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^div-vector ]
 | 
			
		||||
| 
						 | 
				
			
			@ -359,12 +375,26 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
 | 
			
		|||
        ]
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-vavg ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^avg-vector ]
 | 
			
		||||
        { float-vector-rep [| src1 src2 rep |
 | 
			
		||||
            src1 src2 rep ^^add-vector
 | 
			
		||||
            rep ^load-half-vector rep ^^mul-vector
 | 
			
		||||
        ] }
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-v. ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^dot-vector ]
 | 
			
		||||
        { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-vsad ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ [ ^^sad-vector ] [ widen-vector-rep ^^vector>scalar ] bi ]
 | 
			
		||||
    } emit-vv-vector-op ;
 | 
			
		||||
 | 
			
		||||
: emit-simd-vsqrt ( node -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ ^^sqrt-vector ]
 | 
			
		||||
| 
						 | 
				
			
			@ -580,10 +610,14 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
 | 
			
		|||
        { (simd-vs-)               [ emit-simd-vs-                 ] }
 | 
			
		||||
        { (simd-vs*)               [ emit-simd-vs*                 ] }
 | 
			
		||||
        { (simd-v*)                [ emit-simd-v*                  ] }
 | 
			
		||||
        { (simd-v*high)            [ emit-simd-v*high              ] }
 | 
			
		||||
        { (simd-v*hs+)             [ emit-simd-v*hs+               ] }
 | 
			
		||||
        { (simd-v/)                [ emit-simd-v/                  ] }
 | 
			
		||||
        { (simd-vmin)              [ emit-simd-vmin                ] }
 | 
			
		||||
        { (simd-vmax)              [ emit-simd-vmax                ] }
 | 
			
		||||
        { (simd-vavg)              [ emit-simd-vavg                ] }
 | 
			
		||||
        { (simd-v.)                [ emit-simd-v.                  ] }
 | 
			
		||||
        { (simd-vsad)              [ emit-simd-vsad                ] }
 | 
			
		||||
        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
 | 
			
		||||
        { (simd-sum)               [ emit-simd-sum                 ] }
 | 
			
		||||
        { (simd-vabs)              [ emit-simd-vabs                ] }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -173,11 +173,15 @@ CODEGEN: ##add-sub-vector %add-sub-vector
 | 
			
		|||
CODEGEN: ##sub-vector %sub-vector
 | 
			
		||||
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
 | 
			
		||||
CODEGEN: ##mul-vector %mul-vector
 | 
			
		||||
CODEGEN: ##mul-high-vector %mul-high-vector
 | 
			
		||||
CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
 | 
			
		||||
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
 | 
			
		||||
CODEGEN: ##div-vector %div-vector
 | 
			
		||||
CODEGEN: ##min-vector %min-vector
 | 
			
		||||
CODEGEN: ##max-vector %max-vector
 | 
			
		||||
CODEGEN: ##avg-vector %avg-vector
 | 
			
		||||
CODEGEN: ##dot-vector %dot-vector
 | 
			
		||||
CODEGEN: ##sad-vector %sad-vector
 | 
			
		||||
CODEGEN: ##sqrt-vector %sqrt-vector
 | 
			
		||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
 | 
			
		||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,9 +16,12 @@ CONSTANT: vector>vector-intrinsics
 | 
			
		|||
        (simd-vs-)
 | 
			
		||||
        (simd-vs*)
 | 
			
		||||
        (simd-v*)
 | 
			
		||||
        (simd-v*high)
 | 
			
		||||
        (simd-v*hs+)
 | 
			
		||||
        (simd-v/)
 | 
			
		||||
        (simd-vmin)
 | 
			
		||||
        (simd-vmax)
 | 
			
		||||
        (simd-vavg)
 | 
			
		||||
        (simd-vsqrt)
 | 
			
		||||
        (simd-vabs)
 | 
			
		||||
        (simd-vbitand)
 | 
			
		||||
| 
						 | 
				
			
			@ -60,6 +63,7 @@ CONSTANT: vector>vector-intrinsics
 | 
			
		|||
CONSTANT: vector-other-intrinsics
 | 
			
		||||
    {
 | 
			
		||||
        (simd-v.)
 | 
			
		||||
        (simd-vsad)
 | 
			
		||||
        (simd-sum)
 | 
			
		||||
        (simd-vany?)
 | 
			
		||||
        (simd-vall?)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -283,11 +283,15 @@ HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
 | 
			
		|||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
 | 
			
		||||
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
| 
						 | 
				
			
			@ -332,11 +336,15 @@ HOOK: %add-sub-vector-reps cpu ( -- reps )
 | 
			
		|||
HOOK: %sub-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %mul-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %mul-high-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %div-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %min-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %max-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %avg-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %dot-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %sad-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1106,6 +1106,33 @@ M: x86 %mul-vector-reps
 | 
			
		|||
        { sse4.1? { int-4-rep uint-4-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %mul-high-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
        { short-8-rep  [ PMULHW ] }
 | 
			
		||||
        { ushort-8-rep [ PMULHUW ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
M: x86 %mul-high-vector-reps
 | 
			
		||||
    {
 | 
			
		||||
        { sse2? { short-8-rep ushort-8-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
        { char-16-rep  [ PMADDUBSW ] }
 | 
			
		||||
        { uchar-16-rep [ PMADDUBSW ] }
 | 
			
		||||
        { short-8-rep  [ PMADDWD ] }
 | 
			
		||||
        { ushort-8-rep [ PMADDWD ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
M: x86 %mul-horizontal-add-vector-reps
 | 
			
		||||
    {
 | 
			
		||||
        { sse2?  { short-8-rep ushort-8-rep } }
 | 
			
		||||
        { ssse3? { char-16-rep uchar-16-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %div-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
| 
						 | 
				
			
			@ -1159,6 +1186,18 @@ M: x86 %max-vector-reps
 | 
			
		|||
        { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %avg-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
        { uchar-16-rep [ PAVGB ] }
 | 
			
		||||
        { ushort-8-rep [ PAVGW ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
M: x86 %avg-vector-reps
 | 
			
		||||
    {
 | 
			
		||||
        { sse2? { uchar-16-rep ushort-8-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %dot-vector
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
| 
						 | 
				
			
			@ -1171,6 +1210,18 @@ M: x86 %dot-vector-reps
 | 
			
		|||
        { sse4.1? { float-4-rep double-2-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %sad-vector
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
        { char-16-rep [ PSADBW ] }
 | 
			
		||||
        { uchar-16-rep [ PSADBW ] }
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
M: x86 %sad-vector-reps
 | 
			
		||||
    {
 | 
			
		||||
        { sse2? { char-16-rep uchar-16-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    signed-rep {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -168,7 +168,6 @@ M: A vs-               \ A-rep [ (simd-vs-)               ] [ call-next-method ]
 | 
			
		|||
M: A vs*               \ A-rep [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A v*                \ A-rep [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A v*high            \ A-rep [ (simd-v*high)            ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A v*hs+             \ A-rep [ (simd-v*hs+)             ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A v/                \ A-rep [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A vavg              \ A-rep [ (simd-vavg)              ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
M: A vmin              \ A-rep [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
 | 
			
		||||
| 
						 | 
				
			
			@ -273,6 +272,19 @@ SIMD-128: double-2
 | 
			
		|||
M: simd-128 vshuffle ( u perm -- v )
 | 
			
		||||
    vshuffle-bytes ; inline
 | 
			
		||||
 | 
			
		||||
M: uchar-16 v*hs+
 | 
			
		||||
    uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ushort-8-cast ; inline
 | 
			
		||||
M: ushort-8 v*hs+
 | 
			
		||||
    ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
 | 
			
		||||
M: uint-4 v*hs+
 | 
			
		||||
    uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
 | 
			
		||||
M: char-16 v*hs+
 | 
			
		||||
    char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
 | 
			
		||||
M: short-8 v*hs+
 | 
			
		||||
    short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
 | 
			
		||||
M: int-4 v*hs+
 | 
			
		||||
    int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
 | 
			
		||||
 | 
			
		||||
"mirrors" vocab [
 | 
			
		||||
    "math.vectors.simd.mirrors" require
 | 
			
		||||
] when
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue