Merge branch 'more-simd' of git://factorcode.org/git/factor into more-simd
commit
7065dd09df
|
@ -51,9 +51,6 @@ insn-classes get [
|
|||
: ^^unbox-c-ptr ( src class -- dst )
|
||||
[ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ;
|
||||
|
||||
: ^^neg ( src -- dst )
|
||||
[ 0 ^^load-literal ] dip ^^sub ;
|
||||
|
||||
: ^^allot-tuple ( n -- dst )
|
||||
2 + cells tuple ^^allot ;
|
||||
|
||||
|
|
|
@ -186,6 +186,10 @@ PURE-INSN: ##not
|
|||
def: dst/int-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
PURE-INSN: ##neg
|
||||
def: dst/int-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
PURE-INSN: ##log2
|
||||
def: dst/int-rep
|
||||
use: src/int-rep ;
|
||||
|
@ -270,6 +274,10 @@ def: dst
|
|||
use: src/int-rep
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##zero-vector
|
||||
def: dst
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##broadcast-vector
|
||||
def: dst
|
||||
use: src/scalar-rep
|
||||
|
@ -285,6 +293,16 @@ def: dst
|
|||
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##shuffle-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: shuffle rep ;
|
||||
|
||||
PURE-INSN: ##select-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: n rep ;
|
||||
|
||||
PURE-INSN: ##add-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
@ -335,6 +353,11 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##dot-vector
|
||||
def: dst/scalar-rep
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
|
|
|
@ -164,6 +164,7 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
|
@ -177,6 +178,8 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vselect) [ emit-select-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||
|
|
|
@ -45,6 +45,13 @@ IN: compiler.cfg.intrinsics.simd
|
|||
ds-push
|
||||
] emit-vector-op ;
|
||||
|
||||
: emit-shuffle-vector ( node -- )
|
||||
;
|
||||
|
||||
: emit-select-vector ( node -- )
|
||||
|
||||
;
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
|
|
|
@ -142,6 +142,7 @@ CODEGEN: ##sar-imm %sar-imm
|
|||
CODEGEN: ##min %min
|
||||
CODEGEN: ##max %max
|
||||
CODEGEN: ##not %not
|
||||
CODEGEN: ##neg %neg
|
||||
CODEGEN: ##log2 %log2
|
||||
CODEGEN: ##copy %copy
|
||||
CODEGEN: ##unbox-float %unbox-float
|
||||
|
@ -160,9 +161,12 @@ CODEGEN: ##double>single-float %double>single-float
|
|||
CODEGEN: ##integer>float %integer>float
|
||||
CODEGEN: ##float>integer %float>integer
|
||||
CODEGEN: ##unbox-vector %unbox-vector
|
||||
CODEGEN: ##zero-vector %zero-vector
|
||||
CODEGEN: ##broadcast-vector %broadcast-vector
|
||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||
CODEGEN: ##select-vector %select-vector
|
||||
CODEGEN: ##box-vector %box-vector
|
||||
CODEGEN: ##add-vector %add-vector
|
||||
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||
|
@ -174,6 +178,7 @@ CODEGEN: ##saturated-mul-vector %saturated-mul-vector
|
|||
CODEGEN: ##div-vector %div-vector
|
||||
CODEGEN: ##min-vector %min-vector
|
||||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##dot-vector %dot-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
||||
|
|
|
@ -180,6 +180,7 @@ HOOK: %sar-imm cpu ( dst src1 src2 -- )
|
|||
HOOK: %min cpu ( dst src1 src2 -- )
|
||||
HOOK: %max cpu ( dst src1 src2 -- )
|
||||
HOOK: %not cpu ( dst src -- )
|
||||
HOOK: %neg cpu ( dst src -- )
|
||||
HOOK: %log2 cpu ( dst src -- )
|
||||
|
||||
HOOK: %copy cpu ( dst src rep -- )
|
||||
|
@ -210,9 +211,12 @@ HOOK: %float>integer cpu ( dst src -- )
|
|||
HOOK: %box-vector cpu ( dst src temp rep -- )
|
||||
HOOK: %unbox-vector cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %zero-vector cpu ( dst rep -- )
|
||||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||
HOOK: %select-vector cpu ( dst src n rep -- )
|
||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
|
@ -223,6 +227,7 @@ HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
||||
|
@ -239,9 +244,12 @@ HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %integer>scalar cpu ( dst src rep -- )
|
||||
HOOK: %scalar>integer cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %zero-vector-reps cpu ( -- reps )
|
||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||
HOOK: %select-vector-reps cpu ( -- reps )
|
||||
HOOK: %add-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||
|
@ -252,6 +260,7 @@ HOOK: %saturated-mul-vector-reps cpu ( -- reps )
|
|||
HOOK: %div-vector-reps cpu ( -- reps )
|
||||
HOOK: %min-vector-reps cpu ( -- reps )
|
||||
HOOK: %max-vector-reps cpu ( -- reps )
|
||||
HOOK: %dot-vector-reps cpu ( -- reps )
|
||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
|
||||
|
|
|
@ -129,6 +129,7 @@ M: x86 %min int-rep two-operand [ CMP ] [ CMOVG ] 2bi ;
|
|||
M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ;
|
||||
|
||||
M: x86 %not int-rep one-operand NOT ;
|
||||
M: x86 %neg int-rep one-operand NEG ;
|
||||
M: x86 %log2 BSR ;
|
||||
|
||||
GENERIC: copy-register* ( dst src rep -- )
|
||||
|
@ -578,6 +579,19 @@ MACRO: available-reps ( alist -- )
|
|||
reverse [ { } ] suffix
|
||||
'[ _ cond ] ;
|
||||
|
||||
M: x86 %zero-vector
|
||||
{
|
||||
{ double-2-rep [ dup XORPD ] }
|
||||
{ float-4-rep [ dup XORPS ] }
|
||||
[ drop dup PXOR ]
|
||||
} case ;
|
||||
|
||||
M: x86 %zero-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
: unsign-rep ( rep -- rep' )
|
||||
{
|
||||
{ uint-4-rep int-4-rep }
|
||||
|
@ -663,6 +677,10 @@ M: x86 %gather-vector-2-reps
|
|||
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shuffle-vector-reps { } ;
|
||||
|
||||
M: x86 %select-vector-reps { } ;
|
||||
|
||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -820,6 +838,28 @@ M: x86 %max-vector-reps
|
|||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %dot-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPS ]
|
||||
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
|
||||
if
|
||||
] }
|
||||
{ double-2-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPD ]
|
||||
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
|
||||
if
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86 %dot-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
|
|
|
@ -55,7 +55,7 @@ ERROR: bad-schema schema ;
|
|||
:: high-level-ops ( ctor elt-class -- assoc )
|
||||
! Some SIMD operations are defined in terms of others.
|
||||
{
|
||||
{ vneg [ [ dup v- ] keep v- ] }
|
||||
{ vneg [ [ dup vbitxor ] keep v- ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
{ v+n [ ctor execute v+ ] }
|
||||
{ n-v [ [ ctor execute ] dip v- ] }
|
||||
|
@ -71,12 +71,7 @@ ERROR: bad-schema schema ;
|
|||
! To compute dot product and distance with integer vectors, we
|
||||
! have to do things less efficiently, with integer overflow checks,
|
||||
! in the general case.
|
||||
elt-class m:float = [
|
||||
{
|
||||
{ distance [ v- norm ] }
|
||||
{ v. [ v* sum ] }
|
||||
} append
|
||||
] when ;
|
||||
elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
|
||||
|
||||
:: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- )
|
||||
rep rep-component-type c-type-boxed-class :> elt-class
|
||||
|
|
|
@ -36,6 +36,7 @@ SIMD-OP: v*
|
|||
SIMD-OP: v/
|
||||
SIMD-OP: vmin
|
||||
SIMD-OP: vmax
|
||||
SIMD-OP: v.
|
||||
SIMD-OP: vsqrt
|
||||
SIMD-OP: sum
|
||||
SIMD-OP: vabs
|
||||
|
@ -47,10 +48,12 @@ SIMD-OP: vlshift
|
|||
SIMD-OP: vrshift
|
||||
SIMD-OP: hlshift
|
||||
SIMD-OP: hrshift
|
||||
SIMD-OP: vshuffle
|
||||
|
||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
: (simd-select) ( v n rep -- x ) bad-simd-call ;
|
||||
|
||||
: assert-positive ( x -- y ) ;
|
||||
|
||||
|
@ -110,6 +113,7 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||
|
@ -121,7 +125,9 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||
{ \ (simd-select) [ %select-vector-reps ] }
|
||||
} case member? ;
|
||||
|
|
|
@ -66,6 +66,9 @@ PRIVATE>
|
|||
|
||||
GENERIC: new-underlying ( underlying seq -- seq' )
|
||||
|
||||
: change-underlying ( seq quot -- seq' )
|
||||
'[ underlying>> @ ] keep new-underlying ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||
|
@ -74,6 +77,14 @@ PRIVATE>
|
|||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
||||
|
||||
: vshuffle ( u perm -- v ) swap nths ;
|
||||
|
||||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||
|
||||
: hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
|
||||
: hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;
|
||||
|
||||
: vand ( u v -- w ) [ and ] 2map ;
|
||||
: vor ( u v -- w ) [ or ] 2map ;
|
||||
: vxor ( u v -- w ) [ xor ] 2map ;
|
||||
|
@ -88,15 +99,6 @@ PRIVATE>
|
|||
|
||||
: v? ( ? u v -- w ) [ ? ] pick 3map-as ;
|
||||
|
||||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||
|
||||
: hlshift ( u n -- w )
|
||||
[ [ underlying>> ] dip <byte-array> prepend 16 head ] [ drop ] 2bi new-underlying ;
|
||||
|
||||
: hrshift ( u n -- w )
|
||||
[ [ underlying>> ] dip <byte-array> append 16 tail* ] [ drop ] 2bi new-underlying ;
|
||||
|
||||
: vfloor ( u -- v ) [ floor ] map ;
|
||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||
|
|
Loading…
Reference in New Issue