add a %blend-vector intrinsic for v?
parent
e56cd5cc12
commit
7db7b63552
|
@ -402,6 +402,12 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##blend-vector
|
||||||
|
def: dst
|
||||||
|
use: mask src1 src2
|
||||||
|
temp: temp
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##shl-vector
|
PURE-INSN: ##shl-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2/scalar-rep
|
use: src1 src2/scalar-rep
|
||||||
|
|
|
@ -181,6 +181,7 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-v?) [ emit-blend-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||||
} enable-intrinsics ;
|
} enable-intrinsics ;
|
||||||
|
|
|
@ -38,6 +38,9 @@ MACRO: if-literals-match ( quots -- )
|
||||||
: [binary] ( quot -- quot' )
|
: [binary] ( quot -- quot' )
|
||||||
'[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
|
'[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
|
||||||
|
|
||||||
|
: [ternary] ( quot -- quot' )
|
||||||
|
'[ [ ds-drop 3inputs ] dip @ ds-push ] ; inline
|
||||||
|
|
||||||
: emit-binary-vector-op ( node quot -- )
|
: emit-binary-vector-op ( node quot -- )
|
||||||
[binary] emit-vector-op ; inline
|
[binary] emit-vector-op ; inline
|
||||||
|
|
||||||
|
@ -95,6 +98,10 @@ MACRO: if-literals-match ( quots -- )
|
||||||
[ ^^select-vector ] [unary/param]
|
[ ^^select-vector ] [unary/param]
|
||||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
|
: emit-blend-vector ( node -- )
|
||||||
|
[ ^^blend-vector ] [ternary]
|
||||||
|
{ [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
: emit-alien-vector ( node -- )
|
: emit-alien-vector ( node -- )
|
||||||
dup [
|
dup [
|
||||||
'[
|
'[
|
||||||
|
|
|
@ -188,6 +188,7 @@ CODEGEN: ##and-vector %and-vector
|
||||||
CODEGEN: ##andn-vector %andn-vector
|
CODEGEN: ##andn-vector %andn-vector
|
||||||
CODEGEN: ##or-vector %or-vector
|
CODEGEN: ##or-vector %or-vector
|
||||||
CODEGEN: ##xor-vector %xor-vector
|
CODEGEN: ##xor-vector %xor-vector
|
||||||
|
CODEGEN: ##blend-vector %blend-vector
|
||||||
CODEGEN: ##shl-vector %shl-vector
|
CODEGEN: ##shl-vector %shl-vector
|
||||||
CODEGEN: ##shr-vector %shr-vector
|
CODEGEN: ##shr-vector %shr-vector
|
||||||
CODEGEN: ##integer>scalar %integer>scalar
|
CODEGEN: ##integer>scalar %integer>scalar
|
||||||
|
|
|
@ -28,6 +28,7 @@ IN: compiler.tree.propagation.simd
|
||||||
(simd-with)
|
(simd-with)
|
||||||
(simd-gather-2)
|
(simd-gather-2)
|
||||||
(simd-gather-4)
|
(simd-gather-4)
|
||||||
|
(simd-v?)
|
||||||
alien-vector
|
alien-vector
|
||||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||||
|
|
||||||
|
|
|
@ -242,6 +242,7 @@ HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %blend-vector cpu ( dst mask src1 src2 temp rep -- )
|
||||||
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -275,6 +276,7 @@ HOOK: %and-vector-reps cpu ( -- reps )
|
||||||
HOOK: %andn-vector-reps cpu ( -- reps )
|
HOOK: %andn-vector-reps cpu ( -- reps )
|
||||||
HOOK: %or-vector-reps cpu ( -- reps )
|
HOOK: %or-vector-reps cpu ( -- reps )
|
||||||
HOOK: %xor-vector-reps cpu ( -- reps )
|
HOOK: %xor-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %blend-vector-reps cpu ( -- reps )
|
||||||
HOOK: %shl-vector-reps cpu ( -- reps )
|
HOOK: %shl-vector-reps cpu ( -- reps )
|
||||||
HOOK: %shr-vector-reps cpu ( -- reps )
|
HOOK: %shr-vector-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
|
HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -286,6 +286,7 @@ M: ppc %and-vector-reps { } ;
|
||||||
M: ppc %andn-vector-reps { } ;
|
M: ppc %andn-vector-reps { } ;
|
||||||
M: ppc %or-vector-reps { } ;
|
M: ppc %or-vector-reps { } ;
|
||||||
M: ppc %xor-vector-reps { } ;
|
M: ppc %xor-vector-reps { } ;
|
||||||
|
M: ppc %blend-vector-reps { } ;
|
||||||
M: ppc %shl-vector-reps { } ;
|
M: ppc %shl-vector-reps { } ;
|
||||||
M: ppc %shr-vector-reps { } ;
|
M: ppc %shr-vector-reps { } ;
|
||||||
M: ppc %horizontal-shl-vector-reps { } ;
|
M: ppc %horizontal-shl-vector-reps { } ;
|
||||||
|
|
|
@ -1011,6 +1011,17 @@ M: x86 %xor-vector-reps
|
||||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M:: x86 %blend-vector ( dst mask src1 src2 temp rep -- )
|
||||||
|
temp src1 mask rep %and-vector
|
||||||
|
dst mask src2 rep %andn-vector
|
||||||
|
dst dst temp rep %or-vector ;
|
||||||
|
|
||||||
|
M: x86 %blend-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||||
[ two-operand ] keep
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
|
|
|
@ -30,6 +30,9 @@ ERROR: bad-length got expected ;
|
||||||
[ nip ]
|
[ nip ]
|
||||||
} case ; inline
|
} case ; inline
|
||||||
|
|
||||||
|
: element>boolean ( elt class -- bool )
|
||||||
|
vector-false-value = not ; inline
|
||||||
|
|
||||||
MACRO: simd-boa ( rep class -- simd-array )
|
MACRO: simd-boa ( rep class -- simd-array )
|
||||||
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
|
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
|
||||||
|
|
||||||
|
@ -169,6 +172,7 @@ A{ DEFINES ${A}{
|
||||||
SET-NTH [ T dup c:c-setter c:array-accessor ]
|
SET-NTH [ T dup c:c-setter c:array-accessor ]
|
||||||
|
|
||||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
|
A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||||
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
||||||
|
@ -235,6 +239,9 @@ INSTANCE: A sequence
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
: A-vvv->v-op ( v1 v2 v3 quot -- v4 )
|
||||||
|
[ [ underlying>> ] tri@ A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||||
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
|
@ -255,6 +262,7 @@ simd new
|
||||||
\ A-with >>ctor
|
\ A-with >>ctor
|
||||||
\ A-rep >>rep
|
\ A-rep >>rep
|
||||||
{
|
{
|
||||||
|
{ { +vector+ +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||||
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||||
|
@ -316,6 +324,7 @@ A{ DEFINES ${A}{
|
||||||
A-deref DEFINES-PRIVATE ${A}-deref
|
A-deref DEFINES-PRIVATE ${A}-deref
|
||||||
|
|
||||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
|
A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||||
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
||||||
|
@ -383,6 +392,11 @@ M: A pprint* pprint-object ;
|
||||||
|
|
||||||
INSTANCE: A sequence
|
INSTANCE: A sequence
|
||||||
|
|
||||||
|
: A-vvv->v-op ( v1 v2 v3 quot -- v4 )
|
||||||
|
[ [ [ underlying1>> ] tri@ A-rep ] dip call ]
|
||||||
|
[ [ [ underlying2>> ] tri@ A-rep ] dip call ] 3bi
|
||||||
|
\ A boa ; inline
|
||||||
|
|
||||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||||
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
|
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
|
||||||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||||
|
@ -411,6 +425,7 @@ simd new
|
||||||
\ A-with >>ctor
|
\ A-with >>ctor
|
||||||
\ A-rep >>rep
|
\ A-rep >>rep
|
||||||
{
|
{
|
||||||
|
{ { +vector+ +vector+ +vector+ -> +vector+ } A-vvv->v-op }
|
||||||
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||||
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||||
|
|
|
@ -49,6 +49,7 @@ SIMD-OP: vrshift
|
||||||
SIMD-OP: hlshift
|
SIMD-OP: hlshift
|
||||||
SIMD-OP: hrshift
|
SIMD-OP: hrshift
|
||||||
SIMD-OP: vshuffle
|
SIMD-OP: vshuffle
|
||||||
|
SIMD-OP: v?
|
||||||
|
|
||||||
: (simd-with) ( x rep -- v ) bad-simd-call ;
|
: (simd-with) ( x rep -- v ) bad-simd-call ;
|
||||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||||
|
@ -121,6 +122,7 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
||||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||||
|
{ \ (simd-v?) [ %blend-vector-reps ] }
|
||||||
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
||||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||||
|
|
|
@ -92,6 +92,7 @@ H{
|
||||||
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
||||||
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
||||||
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
||||||
|
{ v? { +vector+ +vector+ +vector+ -> +vector+ } }
|
||||||
}
|
}
|
||||||
|
|
||||||
PREDICATE: vector-word < word vector-words key? ;
|
PREDICATE: vector-word < word vector-words key? ;
|
||||||
|
|
|
@ -101,7 +101,7 @@ PRIVATE>
|
||||||
|
|
||||||
: v? ( ? true false -- w ) [ ? ] pick 3map-as ;
|
: v? ( ? true false -- w ) [ ? ] pick 3map-as ;
|
||||||
|
|
||||||
: vmask ( u ? -- u' ) swap dup dup vbitxor v? ;
|
: vmask ( u ? -- u' ) vbitand ; inline
|
||||||
|
|
||||||
: vfloor ( u -- v ) [ floor ] map ;
|
: vfloor ( u -- v ) [ floor ] map ;
|
||||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||||
|
|
|
@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 )
|
||||||
TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 )
|
TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 )
|
||||||
matrix4 (struct) :> c
|
matrix4 (struct) :> c
|
||||||
|
|
||||||
factors { t t t f } vmask :> factors'
|
factors float-4{ t t t f } vmask :> factors'
|
||||||
|
|
||||||
factors' { 0 3 3 3 } vshuffle
|
factors' { 0 3 3 3 } vshuffle
|
||||||
factors' { 3 1 3 3 } vshuffle
|
factors' { 3 1 3 3 } vshuffle
|
||||||
|
@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
|
||||||
matrix4 (struct) :> c
|
matrix4 (struct) :> c
|
||||||
|
|
||||||
float-4{ 0.0 0.0 0.0 1.0 } :> c4
|
float-4{ 0.0 0.0 0.0 1.0 } :> c4
|
||||||
{ t t t f } offset c4 v? :> offset'
|
float-4{ t t t f } offset c4 v? :> offset'
|
||||||
|
|
||||||
offset' { 3 3 3 0 } vshuffle { t f f t } vmask
|
offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask
|
||||||
offset' { 3 3 3 1 } vshuffle { f t f t } vmask
|
offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask
|
||||||
offset' { 3 3 3 2 } vshuffle { f f t t } vmask
|
offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask
|
||||||
c4
|
c4
|
||||||
|
|
||||||
c set-rows ;
|
c set-rows ;
|
||||||
|
@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
|
||||||
axis2 cc ones axis2 v- v* v+ :> diagonal
|
axis2 cc ones axis2 v- v* v+ :> diagonal
|
||||||
|
|
||||||
axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v*
|
axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v*
|
||||||
{ t t t f } vmask :> triangle-a
|
float-4{ t t t f } vmask :> triangle-a
|
||||||
ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b
|
ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b
|
||||||
triangle-a triangle-b v+ :> triangle-lo
|
triangle-a triangle-b v+ :> triangle-lo
|
||||||
triangle-a triangle-b v- :> triangle-hi
|
triangle-a triangle-b v- :> triangle-hi
|
||||||
|
@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
|
||||||
matrix4 (struct) :> c
|
matrix4 (struct) :> c
|
||||||
|
|
||||||
near near near far + 2 near far * * float-4-boa :> num
|
near near near far + 2 near far * * float-4-boa :> num
|
||||||
{ t t f f } xy near far - float-4-with v? :> denom
|
float-4{ t t f f } xy near far - float-4-with v? :> denom
|
||||||
num denom v/ :> fov
|
num denom v/ :> fov
|
||||||
|
|
||||||
fov { 0 0 0 0 } vshuffle { t f f f } vmask
|
fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask
|
||||||
fov { 1 1 1 1 } vshuffle { f t f f } vmask
|
fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask
|
||||||
fov { 2 2 2 3 } vshuffle { f f t t } vmask
|
fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask
|
||||||
float-4{ 0.0 0.0 -1.0 0.0 }
|
float-4{ 0.0 0.0 -1.0 0.0 }
|
||||||
|
|
||||||
c set-rows ;
|
c set-rows ;
|
||||||
|
|
Loading…
Reference in New Issue