math.vectors.simd: new operations: vabs vsqrt vbitand vbitor vbitxor

Slava Pestov 2009-09-23 02:46:54 -05:00
parent 863ccb61d6
commit abac963882
13 changed files with 252 additions and 82 deletions

View File

@ -350,14 +350,34 @@ def: dst
use: src1 src2 use: src1 src2
literal: rep ; literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
literal: rep ;
PURE-INSN: ##abs-vector
def: dst
use: src
literal: rep ;
PURE-INSN: ##sqrt-vector PURE-INSN: ##sqrt-vector
def: dst def: dst
use: src use: src
literal: rep ; literal: rep ;
PURE-INSN: ##horizontal-add-vector PURE-INSN: ##and-vector
def: dst/scalar-rep def: dst
use: src use: src1 src2
literal: rep ;
PURE-INSN: ##or-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##xor-vector
def: dst
use: src1 src2
literal: rep ; literal: rep ;
! Boxing and unboxing aliens ! Boxing and unboxing aliens

View File

@ -164,7 +164,11 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }

View File

@ -55,7 +55,10 @@ UNION: two-operand-insn
##saturated-mul-vector ##saturated-mul-vector
##div-vector ##div-vector
##min-vector ##min-vector
##max-vector ; ##max-vector
##and-vector
##or-vector
##xor-vector ;
GENERIC: convert-two-operand* ( insn -- ) GENERIC: convert-two-operand* ( insn -- )

View File

@ -180,6 +180,10 @@ CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector CODEGEN: ##max-vector %max-vector
CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##abs-vector %abs-vector
CODEGEN: ##and-vector %and-vector
CODEGEN: ##or-vector %or-vector
CODEGEN: ##xor-vector %xor-vector
CODEGEN: ##box-alien %box-alien CODEGEN: ##box-alien %box-alien
CODEGEN: ##box-displaced-alien %box-displaced-alien CODEGEN: ##box-displaced-alien %box-displaced-alien
CODEGEN: ##unbox-alien %unbox-alien CODEGEN: ##unbox-alien %unbox-alien

View File

@ -13,7 +13,12 @@ IN: compiler.tree.propagation.simd
(simd-v/) (simd-v/)
(simd-vmin) (simd-vmin)
(simd-vmax) (simd-vmax)
(simd-sum)
(simd-vabs)
(simd-vsqrt) (simd-vsqrt)
(simd-vbitand)
(simd-vbitor)
(simd-vbitxor)
(simd-broadcast) (simd-broadcast)
(simd-gather-2) (simd-gather-2)
(simd-gather-4) (simd-gather-4)

View File

@ -192,6 +192,10 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %abs-vector cpu ( dst src rep -- )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
HOOK: %broadcast-vector-reps cpu ( -- reps ) HOOK: %broadcast-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps )
@ -208,6 +212,10 @@ HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps ) HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps ) HOOK: %horizontal-add-vector-reps cpu ( -- reps )
HOOK: %abs-vector-reps cpu ( -- reps )
HOOK: %and-vector-reps cpu ( -- reps )
HOOK: %or-vector-reps cpu ( -- reps )
HOOK: %xor-vector-reps cpu ( -- reps )
HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )

View File

@ -410,30 +410,63 @@ M: x86 %div-vector-reps
M: x86 %min-vector ( dst src1 src2 rep -- ) M: x86 %min-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ MINPS ] } { char-16-rep [ PMINSB ] }
{ double-2-rep [ MINPD ] }
{ uchar-16-rep [ PMINUB ] } { uchar-16-rep [ PMINUB ] }
{ short-8-rep [ PMINSW ] } { short-8-rep [ PMINSW ] }
{ ushort-8-rep [ PMINUW ] }
{ int-4-rep [ PMINSD ] }
{ uint-4-rep [ PMINUD ] }
{ float-4-rep [ MINPS ] }
{ double-2-rep [ MINPD ] }
} case drop ; } case drop ;
M: x86 %min-vector-reps M: x86 %min-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep short-8-rep uchar-16-rep } } { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %max-vector ( dst src1 src2 rep -- ) M: x86 %max-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ MAXPS ] } { char-16-rep [ PMAXSB ] }
{ double-2-rep [ MAXPD ] }
{ uchar-16-rep [ PMAXUB ] } { uchar-16-rep [ PMAXUB ] }
{ short-8-rep [ PMAXSW ] } { short-8-rep [ PMAXSW ] }
{ ushort-8-rep [ PMAXUW ] }
{ int-4-rep [ PMAXSD ] }
{ uint-4-rep [ PMAXUD ] }
{ float-4-rep [ MAXPS ] }
{ double-2-rep [ MAXPD ] }
} case drop ; } case drop ;
M: x86 %max-vector-reps M: x86 %max-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep short-8-rep uchar-16-rep } } { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
{
{ char-16-rep [ PABSB ] }
{ short-8-rep [ PABSW ] }
{ int-4-rep [ PABSD ] }
} case ;
M: x86 %abs-vector-reps
{
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
} available-reps ; } available-reps ;
M: x86 %sqrt-vector ( dst src rep -- ) M: x86 %sqrt-vector ( dst src rep -- )
@ -448,15 +481,58 @@ M: x86 %sqrt-vector-reps
{ sse2? { double-2-rep } } { sse2? { double-2-rep } }
} available-reps ; } available-reps ;
M: x86 %horizontal-add-vector ( dst src rep -- ) M: x86 %and-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } { float-4-rep [ ANDPS ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } { double-2-rep [ ANDPD ] }
} case ; { char-16-rep [ PAND ] }
{ uchar-16-rep [ PAND ] }
{ short-8-rep [ PAND ] }
{ ushort-8-rep [ PAND ] }
{ int-4-rep [ PAND ] }
{ uint-4-rep [ PAND ] }
} case drop ;
M: x86 %horizontal-add-vector-reps M: x86 %and-vector-reps
{ {
{ sse3? { float-4-rep double-2-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %or-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ORPS ] }
{ double-2-rep [ ORPD ] }
{ char-16-rep [ POR ] }
{ uchar-16-rep [ POR ] }
{ short-8-rep [ POR ] }
{ ushort-8-rep [ POR ] }
{ int-4-rep [ POR ] }
{ uint-4-rep [ POR ] }
} case drop ;
M: x86 %or-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %xor-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ XORPS ] }
{ double-2-rep [ XORPD ] }
{ char-16-rep [ PXOR ] }
{ uchar-16-rep [ PXOR ] }
{ short-8-rep [ PXOR ] }
{ ushort-8-rep [ PXOR ] }
{ int-4-rep [ PXOR ] }
{ uint-4-rep [ PXOR ] }
} case drop ;
M: x86 %xor-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %unbox-alien ( dst src -- ) M: x86 %unbox-alien ( dst src -- )

View File

@ -4,7 +4,8 @@ USING: accessors alien.c-types assocs byte-arrays classes
effects fry functors generalizations kernel literals locals effects fry functors generalizations kernel literals locals
math math.functions math.vectors math.vectors.simd.intrinsics math math.functions math.vectors math.vectors.simd.intrinsics
math.vectors.specialization parser prettyprint.custom sequences math.vectors.specialization parser prettyprint.custom sequences
sequences.private strings words definitions macros cpu.architecture ; sequences.private strings words definitions macros cpu.architecture
namespaces arrays quotations ;
QUALIFIED-WITH: math m QUALIFIED-WITH: math m
IN: math.vectors.simd.functor IN: math.vectors.simd.functor
@ -38,24 +39,19 @@ MACRO: simd-boa ( rep class -- simd-array )
{ "simd-vector" } <effect> ; { "simd-vector" } <effect> ;
: supported-simd-ops ( assoc rep -- assoc' ) : supported-simd-ops ( assoc rep -- assoc' )
[ [ simd-ops get ] dip
{
{ v+ (simd-v+) }
{ vs+ (simd-vs+) }
{ v+- (simd-v+-) }
{ v- (simd-v-) }
{ vs- (simd-vs-) }
{ v* (simd-v*) }
{ vs* (simd-vs*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
{ vmax (simd-vmax) }
{ sum (simd-sum) }
}
] dip
'[ nip _ swap supported-simd-op? ] assoc-filter '[ nip _ swap supported-simd-op? ] assoc-filter
'[ drop _ key? ] assoc-filter ; '[ drop _ key? ] assoc-filter ;
ERROR: bad-schema schema ;
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
[ simd-ops get ] dip '[
1quotation
over word-schema _ ?at [ bad-schema ] unless
[ ] 2sequence
] assoc-map ;
:: high-level-ops ( ctor elt-class -- assoc ) :: high-level-ops ( ctor elt-class -- assoc )
! Some SIMD operations are defined in terms of others. ! Some SIMD operations are defined in terms of others.
{ {
@ -82,11 +78,17 @@ MACRO: simd-boa ( rep class -- simd-array )
} append } append
] when ; ] when ;
:: simd-vector-words ( class ctor rep assoc -- ) :: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
rep rep-component-type c-type-boxed-class :> elt-class rep rep-component-type c-type-boxed-class :> elt-class
class class
elt-class elt-class
assoc rep supported-simd-ops {
{ { +vector+ +vector+ -> +vector+ } vv->v }
{ { +vector+ -> +vector+ } v->v }
{ { +vector+ -> +scalar+ } v->n }
{ { +vector+ -> +nonnegative+ } v->n }
} low-level-ops
rep supported-simd-ops
ctor elt-class high-level-ops assoc-union ctor elt-class high-level-ops assoc-union
specialize-vector-words ; specialize-vector-words ;
@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ]
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
WHERE WHERE
@ -172,23 +175,13 @@ INSTANCE: A sequence
: A-vv->v-op ( v1 v2 quot -- v3 ) : A-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
: A-v->v-op ( v1 quot -- v2 )
[ underlying>> A-rep ] dip call \ A boa ; inline
: A-v->n-op ( v quot -- n ) : A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline [ underlying>> A-rep ] dip call ; inline
\ A \ A-with \ A-rep H{ \ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
{ sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
} simd-vector-words
\ A \ A-rep define-simd-128-type \ A \ A-rep define-simd-128-type
PRIVATE> PRIVATE>
@ -237,6 +230,7 @@ A-deref DEFINES-PRIVATE ${A}-deref
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
WHERE WHERE
@ -302,24 +296,15 @@ INSTANCE: A sequence
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
\ A boa ; inline \ A boa ; inline
: A-v->n-op ( v1 combine-quot reduce-quot -- v2 ) : A-v->v-op ( v1 combine-quot -- v2 )
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ] [ [ underlying1>> A-rep ] dip call ]
dip call ; inline [ [ underlying2>> A-rep ] dip call ] 2bi
\ A boa ; inline
\ A \ A-with \ A-rep H{ : A-v->n-op ( v1 combine-quot -- v2 )
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
{ sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
} simd-vector-words
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-256-type \ A \ A-rep define-simd-256-type
;FUNCTOR ;FUNCTOR

View File

@ -2,23 +2,47 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: alien alien.c-types alien.data assocs combinators USING: alien alien.c-types alien.data assocs combinators
cpu.architecture fry generalizations kernel libc macros math cpu.architecture fry generalizations kernel libc macros math
sequences ; sequences effects accessors namespaces lexer parser vocabs.parser
words arrays math.vectors ;
IN: math.vectors.simd.intrinsics IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ; ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; <<
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; : simd-effect ( word -- effect )
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ; stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ; SYMBOL: simd-ops
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; V{ } clone simd-ops set-global
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ; SYNTAX: SIMD-OP:
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ; scan-word dup name>> "(simd-" ")" surround create-in
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ; [ nip [ bad-simd-call ] define ]
[ [ simd-effect ] dip set-stack-effect ]
[ 2array simd-ops get push ]
2tri ;
>>
SIMD-OP: v+
SIMD-OP: v-
SIMD-OP: v+-
SIMD-OP: vs+
SIMD-OP: vs-
SIMD-OP: vs*
SIMD-OP: v*
SIMD-OP: v/
SIMD-OP: vmin
SIMD-OP: vmax
SIMD-OP: vsqrt
SIMD-OP: sum
SIMD-OP: vabs
SIMD-OP: vbitand
SIMD-OP: vbitor
SIMD-OP: vbitxor
: (simd-broadcast) ( x rep -- v ) bad-simd-call ; : (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
@ -82,6 +106,10 @@ M: vector-rep supported-simd-op?
{ \ (simd-vmax) [ %max-vector-reps ] } { \ (simd-vmax) [ %max-vector-reps ] }
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] } { \ (simd-vsqrt) [ %sqrt-vector-reps ] }
{ \ (simd-sum) [ %horizontal-add-vector-reps ] } { \ (simd-sum) [ %horizontal-add-vector-reps ] }
{ \ (simd-vabs) [ %abs-vector-reps ] }
{ \ (simd-vbitand) [ %and-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
{ \ (simd-broadcast) [ %broadcast-vector-reps ] } { \ (simd-broadcast) [ %broadcast-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] }

View File

@ -135,7 +135,7 @@ CONSTANT: simd-classes
word '[ _ declare _ execute ] ; word '[ _ declare _ execute ] ;
: remove-float-words ( alist -- alist' ) : remove-float-words ( alist -- alist' )
[ drop { n/v v/n v/ normalize } member? not ] assoc-filter ; [ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
: ops-to-check ( elt-class -- alist ) : ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip [ vector-words >alist ] dip
@ -242,3 +242,5 @@ STRUCT: simd-struct
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call ] compile-call
] unit-test ] unit-test
[ ] [ char-16 new 1array stack. ] unit-test

View File

@ -72,6 +72,11 @@ H{
{ vneg { +vector+ -> +vector+ } } { vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } } { vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } } { sum { +vector+ -> +scalar+ } }
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
} }
PREDICATE: vector-word < word vector-words key? ; PREDICATE: vector-word < word vector-words key? ;

View File

@ -30,7 +30,10 @@ $nl
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:" "Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
{ $subsection vs+ } { $subsection vs+ }
{ $subsection vs- } { $subsection vs- }
{ $subsection vs* } ; { $subsection vs* }
"Comparing vectors:"
{ $subsection distance }
{ $subsection v~ } ;
ABOUT: "math-vectors" ABOUT: "math-vectors"
@ -144,6 +147,10 @@ HELP: normalize
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } } { $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ; { $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
HELP: distance
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Outputs the Euclidean distance between two vectors." } ;
HELP: set-axis HELP: set-axis
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } } { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." } { $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }

View File

@ -1,7 +1,8 @@
! Copyright (C) 2005, 2008 Slava Pestov. ! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: arrays alien.c-types kernel sequences math math.functions USING: arrays alien.c-types kernel sequences math math.functions
hints math.order fry ; hints math.order fry combinators ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors IN: math.vectors
GENERIC: element-type ( obj -- c-type ) GENERIC: element-type ( obj -- c-type )
@ -31,13 +32,35 @@ GENERIC: element-type ( obj -- c-type )
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map [ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ; nip ;
<PRIVATE
: 2saturate-map ( u v quot -- w ) : 2saturate-map ( u v quot -- w )
pick element-type '[ @ _ c-type-clamp ] 2map ; inline pick element-type '[ @ _ c-type-clamp ] 2map ; inline
PRIVATE>
: vs+ ( u v -- w ) [ + ] 2saturate-map ; : vs+ ( u v -- w ) [ + ] 2saturate-map ;
: vs- ( u v -- w ) [ - ] 2saturate-map ; : vs- ( u v -- w ) [ - ] 2saturate-map ;
: vs* ( u v -- w ) [ * ] 2saturate-map ; : vs* ( u v -- w ) [ * ] 2saturate-map ;
: vabs ( u -- v ) [ abs ] map ;
: vsqrt ( u -- v ) [ sqrt ] map ;
<PRIVATE
: fp-bitwise-op ( x y seq quot -- z )
swap element-type {
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
[ drop call ]
} case ; inline
PRIVATE>
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
: vfloor ( v -- _v_ ) [ floor ] map ; : vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ;