math.vectors.simd: new operations: vabs vsqrt vbitand vbitor vbitxor
parent
863ccb61d6
commit
abac963882
|
|
@ -350,14 +350,34 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##abs-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##sqrt-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
PURE-INSN: ##and-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##or-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##xor-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
! Boxing and unboxing aliens
|
||||
|
|
|
|||
|
|
@ -164,7 +164,11 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
|
|
|
|||
|
|
@ -55,7 +55,10 @@ UNION: two-operand-insn
|
|||
##saturated-mul-vector
|
||||
##div-vector
|
||||
##min-vector
|
||||
##max-vector ;
|
||||
##max-vector
|
||||
##and-vector
|
||||
##or-vector
|
||||
##xor-vector ;
|
||||
|
||||
GENERIC: convert-two-operand* ( insn -- )
|
||||
|
||||
|
|
|
|||
|
|
@ -180,6 +180,10 @@ CODEGEN: ##min-vector %min-vector
|
|||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##abs-vector %abs-vector
|
||||
CODEGEN: ##and-vector %and-vector
|
||||
CODEGEN: ##or-vector %or-vector
|
||||
CODEGEN: ##xor-vector %xor-vector
|
||||
CODEGEN: ##box-alien %box-alien
|
||||
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
||||
CODEGEN: ##unbox-alien %unbox-alien
|
||||
|
|
|
|||
|
|
@ -13,7 +13,12 @@ IN: compiler.tree.propagation.simd
|
|||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-sum)
|
||||
(simd-vabs)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-broadcast)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
|
|
|
|||
|
|
@ -192,6 +192,10 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||
|
||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||
|
|
@ -208,6 +212,10 @@ HOOK: %min-vector-reps cpu ( -- reps )
|
|||
HOOK: %max-vector-reps cpu ( -- reps )
|
||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %abs-vector-reps cpu ( -- reps )
|
||||
HOOK: %and-vector-reps cpu ( -- reps )
|
||||
HOOK: %or-vector-reps cpu ( -- reps )
|
||||
HOOK: %xor-vector-reps cpu ( -- reps )
|
||||
|
||||
HOOK: %unbox-alien cpu ( dst src -- )
|
||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||
|
|
|
|||
|
|
@ -410,30 +410,63 @@ M: x86 %div-vector-reps
|
|||
|
||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ MINPS ] }
|
||||
{ double-2-rep [ MINPD ] }
|
||||
{ char-16-rep [ PMINSB ] }
|
||||
{ uchar-16-rep [ PMINUB ] }
|
||||
{ short-8-rep [ PMINSW ] }
|
||||
{ ushort-8-rep [ PMINUW ] }
|
||||
{ int-4-rep [ PMINSD ] }
|
||||
{ uint-4-rep [ PMINUD ] }
|
||||
{ float-4-rep [ MINPS ] }
|
||||
{ double-2-rep [ MINPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %min-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ MAXPS ] }
|
||||
{ double-2-rep [ MAXPD ] }
|
||||
{ char-16-rep [ PMAXSB ] }
|
||||
{ uchar-16-rep [ PMAXUB ] }
|
||||
{ short-8-rep [ PMAXSW ] }
|
||||
{ ushort-8-rep [ PMAXUW ] }
|
||||
{ int-4-rep [ PMAXSD ] }
|
||||
{ uint-4-rep [ PMAXUD ] }
|
||||
{ float-4-rep [ MAXPS ] }
|
||||
{ double-2-rep [ MAXPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %max-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %abs-vector ( dst src rep -- )
|
||||
{
|
||||
{ char-16-rep [ PABSB ] }
|
||||
{ short-8-rep [ PABSW ] }
|
||||
{ int-4-rep [ PABSD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %abs-vector-reps
|
||||
{
|
||||
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sqrt-vector ( dst src rep -- )
|
||||
|
|
@ -448,15 +481,58 @@ M: x86 %sqrt-vector-reps
|
|||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
M: x86 %and-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||
} case ;
|
||||
{ float-4-rep [ ANDPS ] }
|
||||
{ double-2-rep [ ANDPD ] }
|
||||
{ char-16-rep [ PAND ] }
|
||||
{ uchar-16-rep [ PAND ] }
|
||||
{ short-8-rep [ PAND ] }
|
||||
{ ushort-8-rep [ PAND ] }
|
||||
{ int-4-rep [ PAND ] }
|
||||
{ uint-4-rep [ PAND ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %horizontal-add-vector-reps
|
||||
M: x86 %and-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ORPS ] }
|
||||
{ double-2-rep [ ORPD ] }
|
||||
{ char-16-rep [ POR ] }
|
||||
{ uchar-16-rep [ POR ] }
|
||||
{ short-8-rep [ POR ] }
|
||||
{ ushort-8-rep [ POR ] }
|
||||
{ int-4-rep [ POR ] }
|
||||
{ uint-4-rep [ POR ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %or-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ XORPS ] }
|
||||
{ double-2-rep [ XORPD ] }
|
||||
{ char-16-rep [ PXOR ] }
|
||||
{ uchar-16-rep [ PXOR ] }
|
||||
{ short-8-rep [ PXOR ] }
|
||||
{ ushort-8-rep [ PXOR ] }
|
||||
{ int-4-rep [ PXOR ] }
|
||||
{ uint-4-rep [ PXOR ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %xor-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unbox-alien ( dst src -- )
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ USING: accessors alien.c-types assocs byte-arrays classes
|
|||
effects fry functors generalizations kernel literals locals
|
||||
math math.functions math.vectors math.vectors.simd.intrinsics
|
||||
math.vectors.specialization parser prettyprint.custom sequences
|
||||
sequences.private strings words definitions macros cpu.architecture ;
|
||||
sequences.private strings words definitions macros cpu.architecture
|
||||
namespaces arrays quotations ;
|
||||
QUALIFIED-WITH: math m
|
||||
IN: math.vectors.simd.functor
|
||||
|
||||
|
|
@ -38,24 +39,19 @@ MACRO: simd-boa ( rep class -- simd-array )
|
|||
{ "simd-vector" } <effect> ;
|
||||
|
||||
: supported-simd-ops ( assoc rep -- assoc' )
|
||||
[
|
||||
{
|
||||
{ v+ (simd-v+) }
|
||||
{ vs+ (simd-vs+) }
|
||||
{ v+- (simd-v+-) }
|
||||
{ v- (simd-v-) }
|
||||
{ vs- (simd-vs-) }
|
||||
{ v* (simd-v*) }
|
||||
{ vs* (simd-vs*) }
|
||||
{ v/ (simd-v/) }
|
||||
{ vmin (simd-vmin) }
|
||||
{ vmax (simd-vmax) }
|
||||
{ sum (simd-sum) }
|
||||
}
|
||||
] dip
|
||||
[ simd-ops get ] dip
|
||||
'[ nip _ swap supported-simd-op? ] assoc-filter
|
||||
'[ drop _ key? ] assoc-filter ;
|
||||
|
||||
ERROR: bad-schema schema ;
|
||||
|
||||
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
|
||||
[ simd-ops get ] dip '[
|
||||
1quotation
|
||||
over word-schema _ ?at [ bad-schema ] unless
|
||||
[ ] 2sequence
|
||||
] assoc-map ;
|
||||
|
||||
:: high-level-ops ( ctor elt-class -- assoc )
|
||||
! Some SIMD operations are defined in terms of others.
|
||||
{
|
||||
|
|
@ -82,11 +78,17 @@ MACRO: simd-boa ( rep class -- simd-array )
|
|||
} append
|
||||
] when ;
|
||||
|
||||
:: simd-vector-words ( class ctor rep assoc -- )
|
||||
:: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
|
||||
rep rep-component-type c-type-boxed-class :> elt-class
|
||||
class
|
||||
elt-class
|
||||
assoc rep supported-simd-ops
|
||||
{
|
||||
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
||||
{ { +vector+ -> +vector+ } v->v }
|
||||
{ { +vector+ -> +scalar+ } v->n }
|
||||
{ { +vector+ -> +nonnegative+ } v->n }
|
||||
} low-level-ops
|
||||
rep supported-simd-ops
|
||||
ctor elt-class high-level-ops assoc-union
|
||||
specialize-vector-words ;
|
||||
|
||||
|
|
@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ]
|
|||
|
||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||
|
||||
WHERE
|
||||
|
|
@ -172,23 +175,13 @@ INSTANCE: A sequence
|
|||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-v->v-op ( v1 quot -- v2 )
|
||||
[ underlying>> A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v quot -- n )
|
||||
[ underlying>> A-rep ] dip call ; inline
|
||||
|
||||
\ A \ A-with \ A-rep H{
|
||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||
{ sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
|
||||
} simd-vector-words
|
||||
|
||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||
\ A \ A-rep define-simd-128-type
|
||||
|
||||
PRIVATE>
|
||||
|
|
@ -237,6 +230,7 @@ A-deref DEFINES-PRIVATE ${A}-deref
|
|||
|
||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||
|
||||
WHERE
|
||||
|
|
@ -302,24 +296,15 @@ INSTANCE: A sequence
|
|||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v1 combine-quot reduce-quot -- v2 )
|
||||
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
|
||||
dip call ; inline
|
||||
: A-v->v-op ( v1 combine-quot -- v2 )
|
||||
[ [ underlying1>> A-rep ] dip call ]
|
||||
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||
\ A boa ; inline
|
||||
|
||||
\ A \ A-with \ A-rep H{
|
||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||
{ sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
|
||||
} simd-vector-words
|
||||
: A-v->n-op ( v1 combine-quot -- v2 )
|
||||
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
||||
|
||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||
\ A \ A-rep define-simd-256-type
|
||||
|
||||
;FUNCTOR
|
||||
|
|
|
|||
|
|
@ -2,23 +2,47 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien alien.c-types alien.data assocs combinators
|
||||
cpu.architecture fry generalizations kernel libc macros math
|
||||
sequences ;
|
||||
sequences effects accessors namespaces lexer parser vocabs.parser
|
||||
words arrays math.vectors ;
|
||||
IN: math.vectors.simd.intrinsics
|
||||
|
||||
ERROR: bad-simd-call ;
|
||||
|
||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
|
||||
<<
|
||||
|
||||
: simd-effect ( word -- effect )
|
||||
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
|
||||
|
||||
SYMBOL: simd-ops
|
||||
|
||||
V{ } clone simd-ops set-global
|
||||
|
||||
SYNTAX: SIMD-OP:
|
||||
scan-word dup name>> "(simd-" ")" surround create-in
|
||||
[ nip [ bad-simd-call ] define ]
|
||||
[ [ simd-effect ] dip set-stack-effect ]
|
||||
[ 2array simd-ops get push ]
|
||||
2tri ;
|
||||
|
||||
>>
|
||||
|
||||
SIMD-OP: v+
|
||||
SIMD-OP: v-
|
||||
SIMD-OP: v+-
|
||||
SIMD-OP: vs+
|
||||
SIMD-OP: vs-
|
||||
SIMD-OP: vs*
|
||||
SIMD-OP: v*
|
||||
SIMD-OP: v/
|
||||
SIMD-OP: vmin
|
||||
SIMD-OP: vmax
|
||||
SIMD-OP: vsqrt
|
||||
SIMD-OP: sum
|
||||
SIMD-OP: vabs
|
||||
SIMD-OP: vbitand
|
||||
SIMD-OP: vbitor
|
||||
SIMD-OP: vbitxor
|
||||
|
||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
|
|
@ -82,6 +106,10 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ CONSTANT: simd-classes
|
|||
word '[ _ declare _ execute ] ;
|
||||
|
||||
: remove-float-words ( alist -- alist' )
|
||||
[ drop { n/v v/n v/ normalize } member? not ] assoc-filter ;
|
||||
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
|
||||
|
||||
: ops-to-check ( elt-class -- alist )
|
||||
[ vector-words >alist ] dip
|
||||
|
|
@ -242,3 +242,5 @@ STRUCT: simd-struct
|
|||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
[ ] [ char-16 new 1array stack. ] unit-test
|
||||
|
|
|
|||
|
|
@ -72,6 +72,11 @@ H{
|
|||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vtruncate { +vector+ -> +vector+ } }
|
||||
{ sum { +vector+ -> +scalar+ } }
|
||||
{ vabs { +vector+ -> +vector+ } }
|
||||
{ vsqrt { +vector+ -> +vector+ } }
|
||||
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||
}
|
||||
|
||||
PREDICATE: vector-word < word vector-words key? ;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,10 @@ $nl
|
|||
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
|
||||
{ $subsection vs+ }
|
||||
{ $subsection vs- }
|
||||
{ $subsection vs* } ;
|
||||
{ $subsection vs* }
|
||||
"Comparing vectors:"
|
||||
{ $subsection distance }
|
||||
{ $subsection v~ } ;
|
||||
|
||||
ABOUT: "math-vectors"
|
||||
|
||||
|
|
@ -144,6 +147,10 @@ HELP: normalize
|
|||
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
|
||||
|
||||
HELP: distance
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||
{ $description "Outputs the Euclidean distance between two vectors." } ;
|
||||
|
||||
HELP: set-axis
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2005, 2008 Slava Pestov.
|
||||
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays alien.c-types kernel sequences math math.functions
|
||||
hints math.order fry ;
|
||||
hints math.order fry combinators ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors
|
||||
|
||||
GENERIC: element-type ( obj -- c-type )
|
||||
|
|
@ -31,13 +32,35 @@ GENERIC: element-type ( obj -- c-type )
|
|||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||
nip ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: 2saturate-map ( u v quot -- w )
|
||||
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
||||
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
||||
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
||||
|
||||
: vabs ( u -- v ) [ abs ] map ;
|
||||
: vsqrt ( u -- v ) [ sqrt ] map ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: fp-bitwise-op ( x y seq quot -- z )
|
||||
swap element-type {
|
||||
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
|
||||
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
|
||||
[ drop call ]
|
||||
} case ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
|
||||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||
|
||||
: vfloor ( v -- _v_ ) [ floor ] map ;
|
||||
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
||||
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
||||
|
|
|
|||
Loading…
Reference in New Issue