From 78c949b9b7e4794fab831e8e9e4931783e3fcdb4 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 17:43:16 -0500 Subject: [PATCH] math.vectors: add v+- word which is accelerated by SSE3 --- .../cfg/instructions/instructions.factor | 5 +++ .../compiler/cfg/intrinsics/intrinsics.factor | 1 + .../cfg/two-operand/two-operand.factor | 1 + basis/compiler/codegen/codegen.factor | 1 + .../tree/propagation/simd/simd.factor | 37 ++++++++----------- basis/cpu/architecture/architecture.factor | 2 + basis/cpu/x86/32/32.factor | 2 +- basis/cpu/x86/64/64.factor | 2 +- basis/cpu/x86/x86.factor | 22 ++++++++--- .../math/vectors/simd/functor/functor.factor | 3 ++ .../vectors/simd/intrinsics/intrinsics.factor | 2 + basis/math/vectors/simd/simd-docs.factor | 1 + .../specialization/specialization.factor | 1 + basis/math/vectors/vectors-docs.factor | 12 ++++++ basis/math/vectors/vectors-tests.factor | 4 +- basis/math/vectors/vectors.factor | 5 +++ 16 files changed, 71 insertions(+), 30 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 32e5d46c61..63297b9bdf 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -310,6 +310,11 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##add-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##mul-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 0daab82395..5b3fd1b324 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -155,6 +155,7 @@ IN: compiler.cfg.intrinsics { { math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index 20fa1d0b18..c275756046 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -48,6 +48,7 @@ UNION: two-operand-insn ##max-float ##add-vector ##sub-vector + ##add-sub-vector ##mul-vector ##div-vector ##min-vector diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index ddf5aa0e02..14246a3fbf 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -170,6 +170,7 @@ CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##add-sub-vector %add-sub-vector CODEGEN: ##mul-vector %mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 42c1f35617..db39985c94 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,23 +1,24 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators fry +USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd -\ (simd-v+) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v-) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v*) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v/) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop +{ + (simd-v+) + (simd-v-) + (simd-v+-) + (simd-v*) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-vsqrt) + (simd-broadcast) + (simd-gather-2) + (simd-gather-4) + alien-vector +} [ { byte-array } "default-output-classes" set-word-prop ] each \ (simd-sum) [ nip dup literal?>> [ @@ -30,18 +31,10 @@ IN: compiler.tree.propagation.simd ] "outputs" set-word-prop -\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop - \ assert-positive [ real [0,inf] value-info-intersect ] "outputs" set-word-prop -\ alien-vector { byte-array } "default-output-classes" set-word-prop - ! If SIMD is not available, inline alien-vector and set-alien-vector ! to get a speedup : inline-unless-intrinsic ( word -- ) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 61e4e2df37..331d459adf 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -182,6 +182,7 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) @@ -194,6 +195,7 @@ HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %add-sub-vector-reps cpu ( -- reps ) HOOK: %mul-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 9939154512..172b500cd5 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -295,4 +295,4 @@ os windows? [ 4 "double" c-type (>>align) ] unless -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index f4018b1508..3958ba5ec8 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -228,4 +228,4 @@ USE: vocabs.loader { [ os winnt? ] [ "cpu.x86.64.winnt" require ] } } cond -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 322b123d99..a132947cf1 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -341,6 +341,17 @@ M: x86 %sub-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ADDSUBPS ] } + { double-2-rep [ ADDSUBPD ] } + } case drop ; + +M: x86 %add-sub-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %mul-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MULPS ] } @@ -879,9 +890,10 @@ enable-min/max { 42 [ enable-sse3 ] } } case ; -[ { sse_version } compile ] with-optimizer +: check-sse ( -- ) + [ { sse_version } compile ] with-optimizer -"Checking for multimedia extensions: " write sse-version 30 min -[ sse-string write " detected" print ] -[ install-sse-check ] -[ enable-sse ] tri + "Checking for multimedia extensions: " write sse-version 30 min + [ sse-string write " detected" print ] + [ install-sse-check ] + [ enable-sse ] tri ; diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 57126f1bf8..2141914d1c 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -42,6 +42,7 @@ MACRO: simd-boa ( rep class -- simd-array ) { { v+ (simd-v+) } { v- (simd-v-) } + { v+- (simd-v+-) } { v* (simd-v*) } { v/ (simd-v/) } { vmin (simd-vmin) } @@ -171,6 +172,7 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } @@ -297,6 +299,7 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index a7d019af81..6d39b9e70a 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -8,6 +8,7 @@ IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; : (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; @@ -67,6 +68,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } { \ (simd-v-) [ %sub-vector-reps ] } { \ (simd-v*) [ %mul-vector-reps ] } { \ (simd-v/) [ %div-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 42512feb6f..d108d70b26 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -162,6 +162,7 @@ $nl "It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." { $subsection (simd-v+) } { $subsection (simd-v-) } +{ $subsection (simd-v+-) } { $subsection (simd-v/) } { $subsection (simd-vmin) } { $subsection (simd-vmax) } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 21ec9f64f3..1a85f5ade7 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -55,6 +55,7 @@ H{ { v* { +vector+ +vector+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } } + { v+- { +vector+ +vector+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } } diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 7456597278..4f2f093216 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -17,6 +17,7 @@ $nl "Combining two vectors to form another vector with " { $link 2map } ":" { $subsection v+ } { $subsection v- } +{ $subsection v+- } { $subsection v* } { $subsection v/ } { $subsection vmax } @@ -57,6 +58,17 @@ HELP: v- { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ; +HELP: v+- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." } +{ $examples + { $example + "USING: math.vectors prettyprint ;" + "{ 1 2 3 } { 2 3 2 } v+- ." + "{ -1 5 1 }" + } +} ; + HELP: [v-] { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ; diff --git a/basis/math/vectors/vectors-tests.factor b/basis/math/vectors/vectors-tests.factor index 3e56644d3e..fc482815a9 100644 --- a/basis/math/vectors/vectors-tests.factor +++ b/basis/math/vectors/vectors-tests.factor @@ -17,4 +17,6 @@ USING: math.vectors tools.test ; [ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test -[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test \ No newline at end of file +[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test + +[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test \ No newline at end of file diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index dd48525b53..deda1dc505 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -24,6 +24,11 @@ IN: math.vectors : vmax ( u v -- w ) [ max ] 2map ; : vmin ( u v -- w ) [ min ] 2map ; +: v+- ( u v -- w ) + [ t ] 2dip + [ [ not ] 2dip pick [ + ] [ - ] if ] 2map + nip ; + : vfloor ( v -- _v_ ) [ floor ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ;