add a %blend-vector intrinsic for v?
							parent
							
								
									e56cd5cc12
								
							
						
					
					
						commit
						7db7b63552
					
				| 
						 | 
				
			
			@ -402,6 +402,12 @@ def: dst
 | 
			
		|||
use: src1 src2
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##blend-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: mask src1 src2
 | 
			
		||||
temp: temp
 | 
			
		||||
literal: rep ;
 | 
			
		||||
 | 
			
		||||
PURE-INSN: ##shl-vector
 | 
			
		||||
def: dst
 | 
			
		||||
use: src1 src2/scalar-rep
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -181,6 +181,7 @@ IN: compiler.cfg.intrinsics
 | 
			
		|||
        { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
 | 
			
		||||
        { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
 | 
			
		||||
        { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
 | 
			
		||||
        { math.vectors.simd.intrinsics:(simd-v?) [ emit-blend-vector ] }
 | 
			
		||||
        { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
 | 
			
		||||
        { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
 | 
			
		||||
    } enable-intrinsics ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,6 +38,9 @@ MACRO: if-literals-match ( quots -- )
 | 
			
		|||
: [binary] ( quot -- quot' )
 | 
			
		||||
    '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
 | 
			
		||||
 | 
			
		||||
: [ternary] ( quot -- quot' )
 | 
			
		||||
    '[ [ ds-drop 3inputs ] dip @ ds-push ] ; inline
 | 
			
		||||
 | 
			
		||||
: emit-binary-vector-op ( node quot -- )
 | 
			
		||||
    [binary] emit-vector-op ; inline
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -95,6 +98,10 @@ MACRO: if-literals-match ( quots -- )
 | 
			
		|||
    [ ^^select-vector ] [unary/param]
 | 
			
		||||
    { [ integer? ] [ representation? ] } if-literals-match ; inline
 | 
			
		||||
 | 
			
		||||
: emit-blend-vector ( node -- )
 | 
			
		||||
    [ ^^blend-vector ] [ternary]
 | 
			
		||||
    { [ representation? ] } if-literals-match ; inline
 | 
			
		||||
 | 
			
		||||
: emit-alien-vector ( node -- )
 | 
			
		||||
    dup [
 | 
			
		||||
        '[
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -188,6 +188,7 @@ CODEGEN: ##and-vector %and-vector
 | 
			
		|||
CODEGEN: ##andn-vector %andn-vector
 | 
			
		||||
CODEGEN: ##or-vector %or-vector
 | 
			
		||||
CODEGEN: ##xor-vector %xor-vector
 | 
			
		||||
CODEGEN: ##blend-vector %blend-vector
 | 
			
		||||
CODEGEN: ##shl-vector %shl-vector
 | 
			
		||||
CODEGEN: ##shr-vector %shr-vector
 | 
			
		||||
CODEGEN: ##integer>scalar %integer>scalar
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,6 +28,7 @@ IN: compiler.tree.propagation.simd
 | 
			
		|||
    (simd-with)
 | 
			
		||||
    (simd-gather-2)
 | 
			
		||||
    (simd-gather-4)
 | 
			
		||||
    (simd-v?)
 | 
			
		||||
    alien-vector
 | 
			
		||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -242,6 +242,7 @@ HOOK: %and-vector cpu ( dst src1 src2 rep -- )
 | 
			
		|||
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %blend-vector cpu ( dst mask src1 src2 temp rep -- )
 | 
			
		||||
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
 | 
			
		||||
| 
						 | 
				
			
			@ -275,6 +276,7 @@ HOOK: %and-vector-reps cpu ( -- reps )
 | 
			
		|||
HOOK: %andn-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %or-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %xor-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %blend-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %shl-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %shr-vector-reps cpu ( -- reps )
 | 
			
		||||
HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -286,6 +286,7 @@ M: ppc %and-vector-reps { } ;
 | 
			
		|||
M: ppc %andn-vector-reps { } ;
 | 
			
		||||
M: ppc %or-vector-reps { } ;
 | 
			
		||||
M: ppc %xor-vector-reps { } ;
 | 
			
		||||
M: ppc %blend-vector-reps { } ;
 | 
			
		||||
M: ppc %shl-vector-reps { } ;
 | 
			
		||||
M: ppc %shr-vector-reps { } ;
 | 
			
		||||
M: ppc %horizontal-shl-vector-reps { } ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1011,6 +1011,17 @@ M: x86 %xor-vector-reps
 | 
			
		|||
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M:: x86 %blend-vector ( dst mask src1 src2 temp rep -- )
 | 
			
		||||
    temp src1 mask rep %and-vector
 | 
			
		||||
    dst  mask src2 rep %andn-vector
 | 
			
		||||
    dst  dst  temp rep %or-vector ;
 | 
			
		||||
 | 
			
		||||
M: x86 %blend-vector-reps
 | 
			
		||||
    {
 | 
			
		||||
        { sse? { float-4-rep } }
 | 
			
		||||
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
 | 
			
		||||
    } available-reps ;
 | 
			
		||||
 | 
			
		||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
 | 
			
		||||
    [ two-operand ] keep
 | 
			
		||||
    {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,9 @@ ERROR: bad-length got expected ;
 | 
			
		|||
        [ nip ]
 | 
			
		||||
    } case ; inline
 | 
			
		||||
 | 
			
		||||
: element>boolean ( elt class -- bool )
 | 
			
		||||
    vector-false-value = not ; inline
 | 
			
		||||
 | 
			
		||||
MACRO: simd-boa ( rep class -- simd-array )
 | 
			
		||||
    [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -169,6 +172,7 @@ A{           DEFINES ${A}{
 | 
			
		|||
SET-NTH      [ T dup c:c-setter c:array-accessor ]
 | 
			
		||||
 | 
			
		||||
A-rep        [ A name>> "-rep" append "cpu.architecture" lookup ]
 | 
			
		||||
A-vvv->v-op  DEFINES-PRIVATE ${A}-vvv->v-op
 | 
			
		||||
A-vv->v-op   DEFINES-PRIVATE ${A}-vv->v-op
 | 
			
		||||
A-vn->v-op   DEFINES-PRIVATE ${A}-vn->v-op
 | 
			
		||||
A-vv->n-op   DEFINES-PRIVATE ${A}-vv->n-op
 | 
			
		||||
| 
						 | 
				
			
			@ -235,6 +239,9 @@ INSTANCE: A sequence
 | 
			
		|||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: A-vvv->v-op ( v1 v2 v3 quot -- v4 )
 | 
			
		||||
    [ [ underlying>> ] tri@ A-rep ] dip call \ A boa ; inline
 | 
			
		||||
 | 
			
		||||
: A-vv->v-op ( v1 v2 quot -- v3 )
 | 
			
		||||
    [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -255,6 +262,7 @@ simd new
 | 
			
		|||
    \ A-with >>ctor
 | 
			
		||||
    \ A-rep >>rep
 | 
			
		||||
    {
 | 
			
		||||
        { { +vector+ +vector+ +vector+ -> +vector+ } A-vv->v-op }
 | 
			
		||||
        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
 | 
			
		||||
        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
 | 
			
		||||
        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
 | 
			
		||||
| 
						 | 
				
			
			@ -316,6 +324,7 @@ A{           DEFINES ${A}{
 | 
			
		|||
A-deref      DEFINES-PRIVATE ${A}-deref
 | 
			
		||||
 | 
			
		||||
A-rep        [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
 | 
			
		||||
A-vvv->v-op  DEFINES-PRIVATE ${A}-vvv->v-op
 | 
			
		||||
A-vv->v-op   DEFINES-PRIVATE ${A}-vv->v-op
 | 
			
		||||
A-vn->v-op   DEFINES-PRIVATE ${A}-vn->v-op
 | 
			
		||||
A-vv->n-op   DEFINES-PRIVATE ${A}-vv->n-op
 | 
			
		||||
| 
						 | 
				
			
			@ -383,6 +392,11 @@ M: A pprint* pprint-object ;
 | 
			
		|||
 | 
			
		||||
INSTANCE: A sequence
 | 
			
		||||
 | 
			
		||||
: A-vvv->v-op ( v1 v2 v3 quot -- v4 )
 | 
			
		||||
    [ [ [ underlying1>> ] tri@ A-rep ] dip call ]
 | 
			
		||||
    [ [ [ underlying2>> ] tri@ A-rep ] dip call ] 3bi
 | 
			
		||||
    \ A boa ; inline
 | 
			
		||||
 | 
			
		||||
: A-vv->v-op ( v1 v2 quot -- v3 )
 | 
			
		||||
    [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
 | 
			
		||||
    [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
 | 
			
		||||
| 
						 | 
				
			
			@ -411,6 +425,7 @@ simd new
 | 
			
		|||
    \ A-with >>ctor
 | 
			
		||||
    \ A-rep >>rep
 | 
			
		||||
    {
 | 
			
		||||
        { { +vector+ +vector+ +vector+ -> +vector+ } A-vvv->v-op }
 | 
			
		||||
        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
 | 
			
		||||
        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
 | 
			
		||||
        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,6 +49,7 @@ SIMD-OP: vrshift
 | 
			
		|||
SIMD-OP: hlshift
 | 
			
		||||
SIMD-OP: hrshift
 | 
			
		||||
SIMD-OP: vshuffle
 | 
			
		||||
SIMD-OP: v?
 | 
			
		||||
 | 
			
		||||
: (simd-with) ( x rep -- v ) bad-simd-call ;
 | 
			
		||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +122,7 @@ M: vector-rep supported-simd-op?
 | 
			
		|||
        { \ (simd-vbitandn) [ %andn-vector-reps           ] }
 | 
			
		||||
        { \ (simd-vbitor)   [ %or-vector-reps             ] }
 | 
			
		||||
        { \ (simd-vbitxor)  [ %xor-vector-reps            ] }
 | 
			
		||||
        { \ (simd-v?)       [ %blend-vector-reps          ] }
 | 
			
		||||
        { \ (simd-vlshift)  [ %shl-vector-reps            ] }
 | 
			
		||||
        { \ (simd-vrshift)  [ %shr-vector-reps            ] }
 | 
			
		||||
        { \ (simd-hlshift)  [ %horizontal-shl-vector-reps ] }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -92,6 +92,7 @@ H{
 | 
			
		|||
    { hrshift { +vector+ +literal+ -> +vector+ } }
 | 
			
		||||
    { vshuffle { +vector+ +literal+ -> +vector+ } }
 | 
			
		||||
    { vbroadcast { +vector+ +literal+ -> +vector+ } }
 | 
			
		||||
    { v? { +vector+ +vector+ +vector+ -> +vector+ } }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PREDICATE: vector-word < word vector-words key? ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -101,7 +101,7 @@ PRIVATE>
 | 
			
		|||
 | 
			
		||||
: v?   ( ? true false -- w ) [ ? ] pick 3map-as ;
 | 
			
		||||
 | 
			
		||||
: vmask ( u ? -- u' ) swap dup dup vbitxor v? ;
 | 
			
		||||
: vmask ( u ? -- u' ) vbitand ; inline
 | 
			
		||||
 | 
			
		||||
: vfloor    ( u -- v ) [ floor ] map ;
 | 
			
		||||
: vceiling  ( u -- v ) [ ceiling ] map ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 )
 | 
			
		|||
TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 )
 | 
			
		||||
    matrix4 (struct) :> c
 | 
			
		||||
 | 
			
		||||
    factors { t t t f } vmask :> factors'
 | 
			
		||||
    factors float-4{ t t t f } vmask :> factors'
 | 
			
		||||
 | 
			
		||||
    factors' { 0 3 3 3 } vshuffle
 | 
			
		||||
    factors' { 3 1 3 3 } vshuffle
 | 
			
		||||
| 
						 | 
				
			
			@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
 | 
			
		|||
    matrix4 (struct) :> c
 | 
			
		||||
 | 
			
		||||
    float-4{ 0.0 0.0 0.0 1.0 } :> c4
 | 
			
		||||
    { t t t f } offset c4 v? :> offset'
 | 
			
		||||
    float-4{ t t t f } offset c4 v? :> offset'
 | 
			
		||||
 | 
			
		||||
    offset' { 3 3 3 0 } vshuffle { t f f t } vmask
 | 
			
		||||
    offset' { 3 3 3 1 } vshuffle { f t f t } vmask
 | 
			
		||||
    offset' { 3 3 3 2 } vshuffle { f f t t } vmask
 | 
			
		||||
    offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask
 | 
			
		||||
    offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask
 | 
			
		||||
    offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask
 | 
			
		||||
    c4
 | 
			
		||||
 | 
			
		||||
    c set-rows ;
 | 
			
		||||
| 
						 | 
				
			
			@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
 | 
			
		|||
    axis2 cc ones axis2 v- v* v+ :> diagonal
 | 
			
		||||
 | 
			
		||||
    axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v*
 | 
			
		||||
    { t t t f } vmask :> triangle-a
 | 
			
		||||
    float-4{ t t t f } vmask :> triangle-a
 | 
			
		||||
    ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b
 | 
			
		||||
    triangle-a triangle-b v+ :> triangle-lo
 | 
			
		||||
    triangle-a triangle-b v- :> triangle-hi
 | 
			
		||||
| 
						 | 
				
			
			@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
 | 
			
		|||
    matrix4 (struct) :> c
 | 
			
		||||
 | 
			
		||||
    near near near far + 2 near far * * float-4-boa :> num
 | 
			
		||||
    { t t f f } xy near far - float-4-with v? :> denom
 | 
			
		||||
    float-4{ t t f f } xy near far - float-4-with v? :> denom
 | 
			
		||||
    num denom v/ :> fov
 | 
			
		||||
 | 
			
		||||
    fov { 0 0 0 0 } vshuffle { t f f f } vmask
 | 
			
		||||
    fov { 1 1 1 1 } vshuffle { f t f f } vmask
 | 
			
		||||
    fov { 2 2 2 3 } vshuffle { f f t t } vmask
 | 
			
		||||
    fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask
 | 
			
		||||
    fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask
 | 
			
		||||
    fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask
 | 
			
		||||
    float-4{ 0.0 0.0 -1.0 0.0 }
 | 
			
		||||
 | 
			
		||||
    c set-rows ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue