math.vectors: add v+- word which is accelerated by SSE3
parent
47d8763340
commit
acea55c692
|
@ -310,6 +310,11 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##add-sub-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##mul-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
|
|
@ -155,6 +155,7 @@ IN: compiler.cfg.intrinsics
|
|||
{
|
||||
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
|
|
|
@ -48,6 +48,7 @@ UNION: two-operand-insn
|
|||
##max-float
|
||||
##add-vector
|
||||
##sub-vector
|
||||
##add-sub-vector
|
||||
##mul-vector
|
||||
##div-vector
|
||||
##min-vector
|
||||
|
|
|
@ -170,6 +170,7 @@ CODEGEN: ##gather-vector-4 %gather-vector-4
|
|||
CODEGEN: ##box-vector %box-vector
|
||||
CODEGEN: ##add-vector %add-vector
|
||||
CODEGEN: ##sub-vector %sub-vector
|
||||
CODEGEN: ##add-sub-vector %add-sub-vector
|
||||
CODEGEN: ##mul-vector %mul-vector
|
||||
CODEGEN: ##div-vector %div-vector
|
||||
CODEGEN: ##min-vector %min-vector
|
||||
|
|
|
@ -1,23 +1,24 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators fry
|
||||
USING: accessors byte-arrays combinators fry sequences
|
||||
compiler.tree.propagation.info cpu.architecture kernel words math
|
||||
math.intervals math.vectors.simd.intrinsics ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-v+-)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-vsqrt)
|
||||
(simd-broadcast)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
|
||||
\ (simd-sum) [
|
||||
nip dup literal?>> [
|
||||
|
@ -30,18 +31,10 @@ IN: compiler.tree.propagation.simd
|
|||
<class-info>
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ assert-positive [
|
||||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ alien-vector { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
! If SIMD is not available, inline alien-vector and set-alien-vector
|
||||
! to get a speedup
|
||||
: inline-unless-intrinsic ( word -- )
|
||||
|
|
|
@ -182,6 +182,7 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||
|
@ -194,6 +195,7 @@ HOOK: %gather-vector-2-reps cpu ( -- reps )
|
|||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||
HOOK: %add-vector-reps cpu ( -- reps )
|
||||
HOOK: %sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %mul-vector-reps cpu ( -- reps )
|
||||
HOOK: %div-vector-reps cpu ( -- reps )
|
||||
HOOK: %min-vector-reps cpu ( -- reps )
|
||||
|
|
|
@ -295,4 +295,4 @@ os windows? [
|
|||
4 "double" c-type (>>align)
|
||||
] unless
|
||||
|
||||
"cpu.x86.features" require
|
||||
check-sse
|
||||
|
|
|
@ -228,4 +228,4 @@ USE: vocabs.loader
|
|||
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
|
||||
} cond
|
||||
|
||||
"cpu.x86.features" require
|
||||
check-sse
|
||||
|
|
|
@ -341,6 +341,17 @@ M: x86 %sub-vector-reps
|
|||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ADDSUBPS ] }
|
||||
{ double-2-rep [ ADDSUBPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %add-sub-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ MULPS ] }
|
||||
|
@ -879,9 +890,10 @@ enable-min/max
|
|||
{ 42 [ enable-sse3 ] }
|
||||
} case ;
|
||||
|
||||
[ { sse_version } compile ] with-optimizer
|
||||
: check-sse ( -- )
|
||||
[ { sse_version } compile ] with-optimizer
|
||||
|
||||
"Checking for multimedia extensions: " write sse-version 30 min
|
||||
[ sse-string write " detected" print ]
|
||||
[ install-sse-check ]
|
||||
[ enable-sse ] tri
|
||||
"Checking for multimedia extensions: " write sse-version 30 min
|
||||
[ sse-string write " detected" print ]
|
||||
[ install-sse-check ]
|
||||
[ enable-sse ] tri ;
|
||||
|
|
|
@ -42,6 +42,7 @@ MACRO: simd-boa ( rep class -- simd-array )
|
|||
{
|
||||
{ v+ (simd-v+) }
|
||||
{ v- (simd-v-) }
|
||||
{ v+- (simd-v+-) }
|
||||
{ v* (simd-v*) }
|
||||
{ v/ (simd-v/) }
|
||||
{ vmin (simd-vmin) }
|
||||
|
@ -171,6 +172,7 @@ INSTANCE: A sequence
|
|||
|
||||
\ A \ A-with \ A-rep H{
|
||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||
|
@ -297,6 +299,7 @@ INSTANCE: A sequence
|
|||
\ A \ A-with \ A-rep H{
|
||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||
|
|
|
@ -8,6 +8,7 @@ IN: math.vectors.simd.intrinsics
|
|||
ERROR: bad-simd-call ;
|
||||
|
||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
|
@ -67,6 +68,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
|||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
||||
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||
|
|
|
@ -162,6 +162,7 @@ $nl
|
|||
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
|
||||
{ $subsection (simd-v+) }
|
||||
{ $subsection (simd-v-) }
|
||||
{ $subsection (simd-v+-) }
|
||||
{ $subsection (simd-v/) }
|
||||
{ $subsection (simd-vmin) }
|
||||
{ $subsection (simd-vmax) }
|
||||
|
|
|
@ -55,6 +55,7 @@ H{
|
|||
{ v* { +vector+ +vector+ -> +vector+ } }
|
||||
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
||||
|
|
|
@ -17,6 +17,7 @@ $nl
|
|||
"Combining two vectors to form another vector with " { $link 2map } ":"
|
||||
{ $subsection v+ }
|
||||
{ $subsection v- }
|
||||
{ $subsection v+- }
|
||||
{ $subsection v* }
|
||||
{ $subsection v/ }
|
||||
{ $subsection vmax }
|
||||
|
@ -57,6 +58,17 @@ HELP: v-
|
|||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
|
||||
|
||||
HELP: v+-
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
|
||||
{ $examples
|
||||
{ $example
|
||||
"USING: math.vectors prettyprint ;"
|
||||
"{ 1 2 3 } { 2 3 2 } v+- ."
|
||||
"{ -1 5 1 }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: [v-]
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;
|
||||
|
|
|
@ -17,4 +17,6 @@ USING: math.vectors tools.test ;
|
|||
|
||||
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
|
||||
|
||||
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
|
||||
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
|
||||
|
||||
[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test
|
|
@ -24,6 +24,11 @@ IN: math.vectors
|
|||
: vmax ( u v -- w ) [ max ] 2map ;
|
||||
: vmin ( u v -- w ) [ min ] 2map ;
|
||||
|
||||
: v+- ( u v -- w )
|
||||
[ t ] 2dip
|
||||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||
nip ;
|
||||
|
||||
: vfloor ( v -- _v_ ) [ floor ] map ;
|
||||
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
||||
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
||||
|
|
Loading…
Reference in New Issue