diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index cf0f668db3..1dcfb4fd64 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -402,6 +402,12 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##blend-vector +def: dst +use: mask src1 src2 +temp: temp +literal: rep ; + PURE-INSN: ##shl-vector def: dst use: src1 src2/scalar-rep diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 76dace1f28..e97b5f090a 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -181,6 +181,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v?) [ emit-blend-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 51eced4e35..d41d001159 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -38,6 +38,9 @@ MACRO: if-literals-match ( quots -- ) : [binary] ( quot -- quot' ) '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline +: [ternary] ( quot -- quot' ) + '[ [ ds-drop 3inputs ] dip @ ds-push ] ; inline + : emit-binary-vector-op ( node quot -- ) [binary] emit-vector-op ; inline @@ -95,6 +98,10 @@ MACRO: if-literals-match ( quots -- ) [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline +: emit-blend-vector ( node -- ) + [ ^^blend-vector ] [ternary] + { [ representation? ] } if-literals-match ; inline + : emit-alien-vector ( node -- ) dup [ '[ diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index b0307f685d..b07c29e231 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -188,6 +188,7 @@ CODEGEN: ##and-vector %and-vector CODEGEN: ##andn-vector %andn-vector CODEGEN: ##or-vector %or-vector CODEGEN: ##xor-vector %xor-vector +CODEGEN: ##blend-vector %blend-vector CODEGEN: ##shl-vector %shl-vector CODEGEN: ##shr-vector %shr-vector CODEGEN: ##integer>scalar %integer>scalar diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index e2c2b15f2d..805f5ec158 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -28,6 +28,7 @@ IN: compiler.tree.propagation.simd (simd-with) (simd-gather-2) (simd-gather-4) + (simd-v?) alien-vector } [ { byte-array } "default-output-classes" set-word-prop ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 3b1f57d08e..9e2e1f5ac3 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -242,6 +242,7 @@ HOOK: %and-vector cpu ( dst src1 src2 rep -- ) HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) HOOK: %or-vector cpu ( dst src1 src2 rep -- ) HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) +HOOK: %blend-vector cpu ( dst mask src1 src2 temp rep -- ) HOOK: %shl-vector cpu ( dst src1 src2 rep -- ) HOOK: %shr-vector cpu ( dst src1 src2 rep -- ) HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- ) @@ -275,6 +276,7 @@ HOOK: %and-vector-reps cpu ( -- reps ) HOOK: %andn-vector-reps cpu ( -- reps ) HOOK: %or-vector-reps cpu ( -- reps ) HOOK: %xor-vector-reps cpu ( -- reps ) +HOOK: %blend-vector-reps cpu ( -- reps ) HOOK: %shl-vector-reps cpu ( -- reps ) HOOK: %shr-vector-reps cpu ( -- reps ) HOOK: %horizontal-shl-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 006d38f384..8503ac83ea 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -286,6 +286,7 @@ M: ppc %and-vector-reps { } ; M: ppc %andn-vector-reps { } ; M: ppc %or-vector-reps { } ; M: ppc %xor-vector-reps { } ; +M: ppc %blend-vector-reps { } ; M: ppc %shl-vector-reps { } ; M: ppc %shr-vector-reps { } ; M: ppc %horizontal-shl-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index eaaab19662..414ba2b6de 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1011,6 +1011,17 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M:: x86 %blend-vector ( dst mask src1 src2 temp rep -- ) + temp src1 mask rep %and-vector + dst mask src2 rep %andn-vector + dst dst temp rep %or-vector ; + +M: x86 %blend-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 6a7771c2c3..aea415a27c 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -30,6 +30,9 @@ ERROR: bad-length got expected ; [ nip ] } case ; inline +: element>boolean ( elt class -- bool ) + vector-false-value = not ; inline + MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; @@ -169,6 +172,7 @@ A{ DEFINES ${A}{ SET-NTH [ T dup c:c-setter c:array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] +A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -235,6 +239,9 @@ INSTANCE: A sequence v-op ( v1 v2 v3 quot -- v4 ) + [ [ underlying>> ] tri@ A-rep ] dip call \ A boa ; inline + : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline @@ -255,6 +262,7 @@ simd new \ A-with >>ctor \ A-rep >>rep { + { { +vector+ +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } @@ -316,6 +324,7 @@ A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] +A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -383,6 +392,11 @@ M: A pprint* pprint-object ; INSTANCE: A sequence +: A-vvv->v-op ( v1 v2 v3 quot -- v4 ) + [ [ [ underlying1>> ] tri@ A-rep ] dip call ] + [ [ [ underlying2>> ] tri@ A-rep ] dip call ] 3bi + \ A boa ; inline + : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ [ underlying1>> ] bi@ A-rep ] dip call ] [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi @@ -411,6 +425,7 @@ simd new \ A-with >>ctor \ A-rep >>rep { + { { +vector+ +vector+ +vector+ -> +vector+ } A-vvv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6008a20844..2f6d61bd53 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -49,6 +49,7 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle +SIMD-OP: v? : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; @@ -121,6 +122,7 @@ M: vector-rep supported-simd-op? { \ (simd-vbitandn) [ %andn-vector-reps ] } { \ (simd-vbitor) [ %or-vector-reps ] } { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-v?) [ %blend-vector-reps ] } { \ (simd-vlshift) [ %shl-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index ea9947a0c5..21d024a50e 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -92,6 +92,7 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } + { v? { +vector+ +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -159,4 +160,4 @@ ERROR: bad-vector-word word ; vector-words keys [ [ vector-word-custom-inlining ] "custom-inlining" set-word-prop -] each \ No newline at end of file +] each diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index a3d51752bd..dee849cb7a 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -101,7 +101,7 @@ PRIVATE> : v? ( ? true false -- w ) [ ? ] pick 3map-as ; -: vmask ( u ? -- u' ) swap dup dup vbitxor v? ; +: vmask ( u ? -- u' ) vbitand ; inline : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 0c4c3e1866..bc213fec3a 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c - factors { t t t f } vmask :> factors' + factors float-4{ t t t f } vmask :> factors' factors' { 0 3 3 3 } vshuffle factors' { 3 1 3 3 } vshuffle @@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c float-4{ 0.0 0.0 0.0 1.0 } :> c4 - { t t t f } offset c4 v? :> offset' + float-4{ t t t f } offset c4 v? :> offset' - offset' { 3 3 3 0 } vshuffle { t f f t } vmask - offset' { 3 3 3 1 } vshuffle { f t f t } vmask - offset' { 3 3 3 2 } vshuffle { f f t t } vmask + offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask + offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask + offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask c4 c set-rows ; @@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v* - { t t t f } vmask :> triangle-a + float-4{ t t t f } vmask :> triangle-a ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi @@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 matrix4 (struct) :> c near near near far + 2 near far * * float-4-boa :> num - { t t f f } xy near far - float-4-with v? :> denom + float-4{ t t f f } xy near far - float-4-with v? :> denom num denom v/ :> fov - fov { 0 0 0 0 } vshuffle { t f f f } vmask - fov { 1 1 1 1 } vshuffle { f t f f } vmask - fov { 2 2 2 3 } vshuffle { f f t t } vmask + fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask + fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask + fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask float-4{ 0.0 0.0 -1.0 0.0 } c set-rows ;