math.vectors.simd: new operations: vabs vsqrt vbitand vbitor vbitxor
parent
863ccb61d6
commit
abac963882
|
|
@ -350,14 +350,34 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##horizontal-add-vector
|
||||||
|
def: dst/scalar-rep
|
||||||
|
use: src
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##abs-vector
|
||||||
|
def: dst
|
||||||
|
use: src
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##sqrt-vector
|
PURE-INSN: ##sqrt-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src
|
use: src
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-add-vector
|
PURE-INSN: ##and-vector
|
||||||
def: dst/scalar-rep
|
def: dst
|
||||||
use: src
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##or-vector
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##xor-vector
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
! Boxing and unboxing aliens
|
! Boxing and unboxing aliens
|
||||||
|
|
|
||||||
|
|
@ -164,7 +164,11 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,10 @@ UNION: two-operand-insn
|
||||||
##saturated-mul-vector
|
##saturated-mul-vector
|
||||||
##div-vector
|
##div-vector
|
||||||
##min-vector
|
##min-vector
|
||||||
##max-vector ;
|
##max-vector
|
||||||
|
##and-vector
|
||||||
|
##or-vector
|
||||||
|
##xor-vector ;
|
||||||
|
|
||||||
GENERIC: convert-two-operand* ( insn -- )
|
GENERIC: convert-two-operand* ( insn -- )
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -180,6 +180,10 @@ CODEGEN: ##min-vector %min-vector
|
||||||
CODEGEN: ##max-vector %max-vector
|
CODEGEN: ##max-vector %max-vector
|
||||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||||
|
CODEGEN: ##abs-vector %abs-vector
|
||||||
|
CODEGEN: ##and-vector %and-vector
|
||||||
|
CODEGEN: ##or-vector %or-vector
|
||||||
|
CODEGEN: ##xor-vector %xor-vector
|
||||||
CODEGEN: ##box-alien %box-alien
|
CODEGEN: ##box-alien %box-alien
|
||||||
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
||||||
CODEGEN: ##unbox-alien %unbox-alien
|
CODEGEN: ##unbox-alien %unbox-alien
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,12 @@ IN: compiler.tree.propagation.simd
|
||||||
(simd-v/)
|
(simd-v/)
|
||||||
(simd-vmin)
|
(simd-vmin)
|
||||||
(simd-vmax)
|
(simd-vmax)
|
||||||
|
(simd-sum)
|
||||||
|
(simd-vabs)
|
||||||
(simd-vsqrt)
|
(simd-vsqrt)
|
||||||
|
(simd-vbitand)
|
||||||
|
(simd-vbitor)
|
||||||
|
(simd-vbitxor)
|
||||||
(simd-broadcast)
|
(simd-broadcast)
|
||||||
(simd-gather-2)
|
(simd-gather-2)
|
||||||
(simd-gather-4)
|
(simd-gather-4)
|
||||||
|
|
|
||||||
|
|
@ -192,6 +192,10 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||||
|
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||||
|
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
|
||||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
|
|
@ -208,6 +212,10 @@ HOOK: %min-vector-reps cpu ( -- reps )
|
||||||
HOOK: %max-vector-reps cpu ( -- reps )
|
HOOK: %max-vector-reps cpu ( -- reps )
|
||||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %abs-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %and-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %or-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %xor-vector-reps cpu ( -- reps )
|
||||||
|
|
||||||
HOOK: %unbox-alien cpu ( dst src -- )
|
HOOK: %unbox-alien cpu ( dst src -- )
|
||||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||||
|
|
|
||||||
|
|
@ -410,30 +410,63 @@ M: x86 %div-vector-reps
|
||||||
|
|
||||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MINPS ] }
|
{ char-16-rep [ PMINSB ] }
|
||||||
{ double-2-rep [ MINPD ] }
|
|
||||||
{ uchar-16-rep [ PMINUB ] }
|
{ uchar-16-rep [ PMINUB ] }
|
||||||
{ short-8-rep [ PMINSW ] }
|
{ short-8-rep [ PMINSW ] }
|
||||||
|
{ ushort-8-rep [ PMINUW ] }
|
||||||
|
{ int-4-rep [ PMINSD ] }
|
||||||
|
{ uint-4-rep [ PMINUD ] }
|
||||||
|
{ float-4-rep [ MINPS ] }
|
||||||
|
{ double-2-rep [ MINPD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
M: x86 %min-vector-reps
|
M: x86 %min-vector-reps
|
||||||
{
|
{
|
||||||
{ sse? { float-4-rep } }
|
{ sse? { float-4-rep } }
|
||||||
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||||
|
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MAXPS ] }
|
{ char-16-rep [ PMAXSB ] }
|
||||||
{ double-2-rep [ MAXPD ] }
|
|
||||||
{ uchar-16-rep [ PMAXUB ] }
|
{ uchar-16-rep [ PMAXUB ] }
|
||||||
{ short-8-rep [ PMAXSW ] }
|
{ short-8-rep [ PMAXSW ] }
|
||||||
|
{ ushort-8-rep [ PMAXUW ] }
|
||||||
|
{ int-4-rep [ PMAXSD ] }
|
||||||
|
{ uint-4-rep [ PMAXUD ] }
|
||||||
|
{ float-4-rep [ MAXPS ] }
|
||||||
|
{ double-2-rep [ MAXPD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
M: x86 %max-vector-reps
|
M: x86 %max-vector-reps
|
||||||
{
|
{
|
||||||
{ sse? { float-4-rep } }
|
{ sse? { float-4-rep } }
|
||||||
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||||
|
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||||
|
{
|
||||||
|
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||||
|
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %horizontal-add-vector-reps
|
||||||
|
{
|
||||||
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %abs-vector ( dst src rep -- )
|
||||||
|
{
|
||||||
|
{ char-16-rep [ PABSB ] }
|
||||||
|
{ short-8-rep [ PABSW ] }
|
||||||
|
{ int-4-rep [ PABSD ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %abs-vector-reps
|
||||||
|
{
|
||||||
|
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %sqrt-vector ( dst src rep -- )
|
M: x86 %sqrt-vector ( dst src rep -- )
|
||||||
|
|
@ -448,15 +481,58 @@ M: x86 %sqrt-vector-reps
|
||||||
{ sse2? { double-2-rep } }
|
{ sse2? { double-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
M: x86 %and-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
{ float-4-rep [ ANDPS ] }
|
||||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
{ double-2-rep [ ANDPD ] }
|
||||||
} case ;
|
{ char-16-rep [ PAND ] }
|
||||||
|
{ uchar-16-rep [ PAND ] }
|
||||||
|
{ short-8-rep [ PAND ] }
|
||||||
|
{ ushort-8-rep [ PAND ] }
|
||||||
|
{ int-4-rep [ PAND ] }
|
||||||
|
{ uint-4-rep [ PAND ] }
|
||||||
|
} case drop ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector-reps
|
M: x86 %and-vector-reps
|
||||||
{
|
{
|
||||||
{ sse3? { float-4-rep double-2-rep } }
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||||
|
{
|
||||||
|
{ float-4-rep [ ORPS ] }
|
||||||
|
{ double-2-rep [ ORPD ] }
|
||||||
|
{ char-16-rep [ POR ] }
|
||||||
|
{ uchar-16-rep [ POR ] }
|
||||||
|
{ short-8-rep [ POR ] }
|
||||||
|
{ ushort-8-rep [ POR ] }
|
||||||
|
{ int-4-rep [ POR ] }
|
||||||
|
{ uint-4-rep [ POR ] }
|
||||||
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %or-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
||||||
|
{
|
||||||
|
{ float-4-rep [ XORPS ] }
|
||||||
|
{ double-2-rep [ XORPD ] }
|
||||||
|
{ char-16-rep [ PXOR ] }
|
||||||
|
{ uchar-16-rep [ PXOR ] }
|
||||||
|
{ short-8-rep [ PXOR ] }
|
||||||
|
{ ushort-8-rep [ PXOR ] }
|
||||||
|
{ int-4-rep [ PXOR ] }
|
||||||
|
{ uint-4-rep [ PXOR ] }
|
||||||
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %xor-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %unbox-alien ( dst src -- )
|
M: x86 %unbox-alien ( dst src -- )
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@ USING: accessors alien.c-types assocs byte-arrays classes
|
||||||
effects fry functors generalizations kernel literals locals
|
effects fry functors generalizations kernel literals locals
|
||||||
math math.functions math.vectors math.vectors.simd.intrinsics
|
math math.functions math.vectors math.vectors.simd.intrinsics
|
||||||
math.vectors.specialization parser prettyprint.custom sequences
|
math.vectors.specialization parser prettyprint.custom sequences
|
||||||
sequences.private strings words definitions macros cpu.architecture ;
|
sequences.private strings words definitions macros cpu.architecture
|
||||||
|
namespaces arrays quotations ;
|
||||||
QUALIFIED-WITH: math m
|
QUALIFIED-WITH: math m
|
||||||
IN: math.vectors.simd.functor
|
IN: math.vectors.simd.functor
|
||||||
|
|
||||||
|
|
@ -38,24 +39,19 @@ MACRO: simd-boa ( rep class -- simd-array )
|
||||||
{ "simd-vector" } <effect> ;
|
{ "simd-vector" } <effect> ;
|
||||||
|
|
||||||
: supported-simd-ops ( assoc rep -- assoc' )
|
: supported-simd-ops ( assoc rep -- assoc' )
|
||||||
[
|
[ simd-ops get ] dip
|
||||||
{
|
|
||||||
{ v+ (simd-v+) }
|
|
||||||
{ vs+ (simd-vs+) }
|
|
||||||
{ v+- (simd-v+-) }
|
|
||||||
{ v- (simd-v-) }
|
|
||||||
{ vs- (simd-vs-) }
|
|
||||||
{ v* (simd-v*) }
|
|
||||||
{ vs* (simd-vs*) }
|
|
||||||
{ v/ (simd-v/) }
|
|
||||||
{ vmin (simd-vmin) }
|
|
||||||
{ vmax (simd-vmax) }
|
|
||||||
{ sum (simd-sum) }
|
|
||||||
}
|
|
||||||
] dip
|
|
||||||
'[ nip _ swap supported-simd-op? ] assoc-filter
|
'[ nip _ swap supported-simd-op? ] assoc-filter
|
||||||
'[ drop _ key? ] assoc-filter ;
|
'[ drop _ key? ] assoc-filter ;
|
||||||
|
|
||||||
|
ERROR: bad-schema schema ;
|
||||||
|
|
||||||
|
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
|
||||||
|
[ simd-ops get ] dip '[
|
||||||
|
1quotation
|
||||||
|
over word-schema _ ?at [ bad-schema ] unless
|
||||||
|
[ ] 2sequence
|
||||||
|
] assoc-map ;
|
||||||
|
|
||||||
:: high-level-ops ( ctor elt-class -- assoc )
|
:: high-level-ops ( ctor elt-class -- assoc )
|
||||||
! Some SIMD operations are defined in terms of others.
|
! Some SIMD operations are defined in terms of others.
|
||||||
{
|
{
|
||||||
|
|
@ -82,11 +78,17 @@ MACRO: simd-boa ( rep class -- simd-array )
|
||||||
} append
|
} append
|
||||||
] when ;
|
] when ;
|
||||||
|
|
||||||
:: simd-vector-words ( class ctor rep assoc -- )
|
:: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
|
||||||
rep rep-component-type c-type-boxed-class :> elt-class
|
rep rep-component-type c-type-boxed-class :> elt-class
|
||||||
class
|
class
|
||||||
elt-class
|
elt-class
|
||||||
assoc rep supported-simd-ops
|
{
|
||||||
|
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
||||||
|
{ { +vector+ -> +vector+ } v->v }
|
||||||
|
{ { +vector+ -> +scalar+ } v->n }
|
||||||
|
{ { +vector+ -> +nonnegative+ } v->n }
|
||||||
|
} low-level-ops
|
||||||
|
rep supported-simd-ops
|
||||||
ctor elt-class high-level-ops assoc-union
|
ctor elt-class high-level-ops assoc-union
|
||||||
specialize-vector-words ;
|
specialize-vector-words ;
|
||||||
|
|
||||||
|
|
@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ]
|
||||||
|
|
||||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
|
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
WHERE
|
WHERE
|
||||||
|
|
@ -172,23 +175,13 @@ INSTANCE: A sequence
|
||||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||||
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
|
: A-v->v-op ( v1 quot -- v2 )
|
||||||
|
[ underlying>> A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
: A-v->n-op ( v quot -- n )
|
: A-v->n-op ( v quot -- n )
|
||||||
[ underlying>> A-rep ] dip call ; inline
|
[ underlying>> A-rep ] dip call ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep H{
|
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
|
||||||
{ sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
\ A \ A-rep define-simd-128-type
|
\ A \ A-rep define-simd-128-type
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
@ -237,6 +230,7 @@ A-deref DEFINES-PRIVATE ${A}-deref
|
||||||
|
|
||||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
|
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
WHERE
|
WHERE
|
||||||
|
|
@ -302,24 +296,15 @@ INSTANCE: A sequence
|
||||||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||||
\ A boa ; inline
|
\ A boa ; inline
|
||||||
|
|
||||||
: A-v->n-op ( v1 combine-quot reduce-quot -- v2 )
|
: A-v->v-op ( v1 combine-quot -- v2 )
|
||||||
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
|
[ [ underlying1>> A-rep ] dip call ]
|
||||||
dip call ; inline
|
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||||
|
\ A boa ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep H{
|
: A-v->n-op ( v1 combine-quot -- v2 )
|
||||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
||||||
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
|
||||||
{ sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
|
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||||
\ A \ A-rep define-simd-256-type
|
\ A \ A-rep define-simd-256-type
|
||||||
|
|
||||||
;FUNCTOR
|
;FUNCTOR
|
||||||
|
|
|
||||||
|
|
@ -2,23 +2,47 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: alien alien.c-types alien.data assocs combinators
|
USING: alien alien.c-types alien.data assocs combinators
|
||||||
cpu.architecture fry generalizations kernel libc macros math
|
cpu.architecture fry generalizations kernel libc macros math
|
||||||
sequences ;
|
sequences effects accessors namespaces lexer parser vocabs.parser
|
||||||
|
words arrays math.vectors ;
|
||||||
IN: math.vectors.simd.intrinsics
|
IN: math.vectors.simd.intrinsics
|
||||||
|
|
||||||
ERROR: bad-simd-call ;
|
ERROR: bad-simd-call ;
|
||||||
|
|
||||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
<<
|
||||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
|
||||||
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: simd-effect ( word -- effect )
|
||||||
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
|
||||||
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
|
||||||
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
SYMBOL: simd-ops
|
||||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
|
||||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
V{ } clone simd-ops set-global
|
||||||
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
|
||||||
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
SYNTAX: SIMD-OP:
|
||||||
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
scan-word dup name>> "(simd-" ")" surround create-in
|
||||||
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
|
[ nip [ bad-simd-call ] define ]
|
||||||
|
[ [ simd-effect ] dip set-stack-effect ]
|
||||||
|
[ 2array simd-ops get push ]
|
||||||
|
2tri ;
|
||||||
|
|
||||||
|
>>
|
||||||
|
|
||||||
|
SIMD-OP: v+
|
||||||
|
SIMD-OP: v-
|
||||||
|
SIMD-OP: v+-
|
||||||
|
SIMD-OP: vs+
|
||||||
|
SIMD-OP: vs-
|
||||||
|
SIMD-OP: vs*
|
||||||
|
SIMD-OP: v*
|
||||||
|
SIMD-OP: v/
|
||||||
|
SIMD-OP: vmin
|
||||||
|
SIMD-OP: vmax
|
||||||
|
SIMD-OP: vsqrt
|
||||||
|
SIMD-OP: sum
|
||||||
|
SIMD-OP: vabs
|
||||||
|
SIMD-OP: vbitand
|
||||||
|
SIMD-OP: vbitor
|
||||||
|
SIMD-OP: vbitxor
|
||||||
|
|
||||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||||
|
|
@ -82,6 +106,10 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||||
|
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||||
|
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||||
|
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||||
|
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ CONSTANT: simd-classes
|
||||||
word '[ _ declare _ execute ] ;
|
word '[ _ declare _ execute ] ;
|
||||||
|
|
||||||
: remove-float-words ( alist -- alist' )
|
: remove-float-words ( alist -- alist' )
|
||||||
[ drop { n/v v/n v/ normalize } member? not ] assoc-filter ;
|
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
|
||||||
|
|
||||||
: ops-to-check ( elt-class -- alist )
|
: ops-to-check ( elt-class -- alist )
|
||||||
[ vector-words >alist ] dip
|
[ vector-words >alist ] dip
|
||||||
|
|
@ -242,3 +242,5 @@ STRUCT: simd-struct
|
||||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||||
] compile-call
|
] compile-call
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ ] [ char-16 new 1array stack. ] unit-test
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,11 @@ H{
|
||||||
{ vneg { +vector+ -> +vector+ } }
|
{ vneg { +vector+ -> +vector+ } }
|
||||||
{ vtruncate { +vector+ -> +vector+ } }
|
{ vtruncate { +vector+ -> +vector+ } }
|
||||||
{ sum { +vector+ -> +scalar+ } }
|
{ sum { +vector+ -> +scalar+ } }
|
||||||
|
{ vabs { +vector+ -> +vector+ } }
|
||||||
|
{ vsqrt { +vector+ -> +vector+ } }
|
||||||
|
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||||
}
|
}
|
||||||
|
|
||||||
PREDICATE: vector-word < word vector-words key? ;
|
PREDICATE: vector-word < word vector-words key? ;
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,10 @@ $nl
|
||||||
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
|
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
|
||||||
{ $subsection vs+ }
|
{ $subsection vs+ }
|
||||||
{ $subsection vs- }
|
{ $subsection vs- }
|
||||||
{ $subsection vs* } ;
|
{ $subsection vs* }
|
||||||
|
"Comparing vectors:"
|
||||||
|
{ $subsection distance }
|
||||||
|
{ $subsection v~ } ;
|
||||||
|
|
||||||
ABOUT: "math-vectors"
|
ABOUT: "math-vectors"
|
||||||
|
|
||||||
|
|
@ -144,6 +147,10 @@ HELP: normalize
|
||||||
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
|
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
|
||||||
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
|
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
|
||||||
|
|
||||||
|
HELP: distance
|
||||||
|
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||||
|
{ $description "Outputs the Euclidean distance between two vectors." } ;
|
||||||
|
|
||||||
HELP: set-axis
|
HELP: set-axis
|
||||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
|
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
|
||||||
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }
|
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2005, 2008 Slava Pestov.
|
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: arrays alien.c-types kernel sequences math math.functions
|
USING: arrays alien.c-types kernel sequences math math.functions
|
||||||
hints math.order fry ;
|
hints math.order fry combinators ;
|
||||||
|
QUALIFIED-WITH: alien.c-types c
|
||||||
IN: math.vectors
|
IN: math.vectors
|
||||||
|
|
||||||
GENERIC: element-type ( obj -- c-type )
|
GENERIC: element-type ( obj -- c-type )
|
||||||
|
|
@ -31,13 +32,35 @@ GENERIC: element-type ( obj -- c-type )
|
||||||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||||
nip ;
|
nip ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
: 2saturate-map ( u v quot -- w )
|
: 2saturate-map ( u v quot -- w )
|
||||||
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
||||||
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
||||||
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
||||||
|
|
||||||
|
: vabs ( u -- v ) [ abs ] map ;
|
||||||
|
: vsqrt ( u -- v ) [ sqrt ] map ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
|
: fp-bitwise-op ( x y seq quot -- z )
|
||||||
|
swap element-type {
|
||||||
|
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
|
||||||
|
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
|
||||||
|
[ drop call ]
|
||||||
|
} case ; inline
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||||
|
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
|
||||||
|
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||||
|
|
||||||
: vfloor ( v -- _v_ ) [ floor ] map ;
|
: vfloor ( v -- _v_ ) [ floor ] map ;
|
||||||
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
||||||
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue