math.vectors: add v+- word which is accelerated by SSE3

db4
Slava Pestov 2009-09-20 17:43:16 -05:00
parent 47d8763340
commit acea55c692
16 changed files with 71 additions and 30 deletions

View File

@ -310,6 +310,11 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##add-sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-vector
def: dst
use: src1 src2

View File

@ -155,6 +155,7 @@ IN: compiler.cfg.intrinsics
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }

View File

@ -48,6 +48,7 @@ UNION: two-operand-insn
##max-float
##add-vector
##sub-vector
##add-sub-vector
##mul-vector
##div-vector
##min-vector

View File

@ -170,6 +170,7 @@ CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector

View File

@ -1,23 +1,24 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators fry
USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd.intrinsics ;
IN: compiler.tree.propagation.simd
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
{
(simd-v+)
(simd-v-)
(simd-v+-)
(simd-v*)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-vsqrt)
(simd-broadcast)
(simd-gather-2)
(simd-gather-4)
alien-vector
} [ { byte-array } "default-output-classes" set-word-prop ] each
\ (simd-sum) [
nip dup literal?>> [
@ -30,18 +31,10 @@ IN: compiler.tree.propagation.simd
<class-info>
] "outputs" set-word-prop
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
\ alien-vector { byte-array } "default-output-classes" set-word-prop
! If SIMD is not available, inline alien-vector and set-alien-vector
! to get a speedup
: inline-unless-intrinsic ( word -- )

View File

@ -182,6 +182,7 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
@ -194,6 +195,7 @@ HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )

View File

@ -295,4 +295,4 @@ os windows? [
4 "double" c-type (>>align)
] unless
"cpu.x86.features" require
check-sse

View File

@ -228,4 +228,4 @@ USE: vocabs.loader
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
"cpu.x86.features" require
check-sse

View File

@ -341,6 +341,17 @@ M: x86 %sub-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
} case drop ;
M: x86 %add-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MULPS ] }
@ -879,9 +890,10 @@ enable-min/max
{ 42 [ enable-sse3 ] }
} case ;
[ { sse_version } compile ] with-optimizer
: check-sse ( -- )
[ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version 30 min
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri
"Checking for multimedia extensions: " write sse-version 30 min
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri ;

View File

@ -42,6 +42,7 @@ MACRO: simd-boa ( rep class -- simd-array )
{
{ v+ (simd-v+) }
{ v- (simd-v-) }
{ v+- (simd-v+-) }
{ v* (simd-v*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
@ -171,6 +172,7 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
@ -297,6 +299,7 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }

View File

@ -8,6 +8,7 @@ IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
@ -67,6 +68,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] }

View File

@ -162,6 +162,7 @@ $nl
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) }
{ $subsection (simd-v-) }
{ $subsection (simd-v+-) }
{ $subsection (simd-v/) }
{ $subsection (simd-vmin) }
{ $subsection (simd-vmax) }

View File

@ -55,6 +55,7 @@ H{
{ v* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }

View File

@ -17,6 +17,7 @@ $nl
"Combining two vectors to form another vector with " { $link 2map } ":"
{ $subsection v+ }
{ $subsection v- }
{ $subsection v+- }
{ $subsection v* }
{ $subsection v/ }
{ $subsection vmax }
@ -57,6 +58,17 @@ HELP: v-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
HELP: v+-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
{ $examples
{ $example
"USING: math.vectors prettyprint ;"
"{ 1 2 3 } { 2 3 2 } v+- ."
"{ -1 5 1 }"
}
} ;
HELP: [v-]
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;

View File

@ -17,4 +17,6 @@ USING: math.vectors tools.test ;
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test

View File

@ -24,6 +24,11 @@ IN: math.vectors
: vmax ( u v -- w ) [ max ] 2map ;
: vmin ( u v -- w ) [ min ] 2map ;
: v+- ( u v -- w )
[ t ] 2dip
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ;
: vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ;