Merge branch 'master' of git://factorcode.org/git/factor
commit
c5dd8d0b20
|
@ -163,8 +163,8 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||||
|
|
|
@ -10,8 +10,8 @@ compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||||
compiler.cfg.instructions compiler.cfg.registers
|
compiler.cfg.instructions compiler.cfg.registers
|
||||||
compiler.cfg.intrinsics.alien
|
compiler.cfg.intrinsics.alien
|
||||||
specialized-arrays ;
|
specialized-arrays ;
|
||||||
FROM: alien.c-types => heap-size char uchar float double ;
|
FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ;
|
||||||
SPECIALIZED-ARRAYS: float double ;
|
SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ;
|
||||||
IN: compiler.cfg.intrinsics.simd
|
IN: compiler.cfg.intrinsics.simd
|
||||||
|
|
||||||
MACRO: check-elements ( quots -- )
|
MACRO: check-elements ( quots -- )
|
||||||
|
@ -155,28 +155,79 @@ MACRO: if-literals-match ( quots -- )
|
||||||
[ ^^not-vector ]
|
[ ^^not-vector ]
|
||||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
||||||
|
|
||||||
:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
|
:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
|
||||||
{cc,swap} first2 :> swap? :> cc
|
{cc,swap} first2 :> swap? :> cc
|
||||||
swap?
|
swap?
|
||||||
[ src2 src1 rep cc ^^compare-vector ]
|
[ src2 src1 rep cc ^^compare-vector ]
|
||||||
[ src1 src2 rep cc ^^compare-vector ] if ;
|
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||||
|
|
||||||
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
|
:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||||
rep orig-cc %compare-vector-ccs :> not? :> ccs
|
rep orig-cc %compare-vector-ccs :> not? :> ccs
|
||||||
|
|
||||||
ccs empty?
|
ccs empty?
|
||||||
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||||
[
|
[
|
||||||
ccs unclip :> first-cc :> rest-ccs
|
ccs unclip :> first-cc :> rest-ccs
|
||||||
src1 src2 rep first-cc (generate-compare-vector) :> first-dst
|
src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst
|
||||||
|
|
||||||
rest-ccs first-dst
|
rest-ccs first-dst
|
||||||
[ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
|
[ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ]
|
||||||
reduce
|
reduce
|
||||||
|
|
||||||
not? [ rep generate-not-vector ] when
|
not? [ rep generate-not-vector ] when
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
|
: sign-bit-mask ( rep -- byte-array )
|
||||||
|
unsign-rep {
|
||||||
|
{ char-16-rep [ uchar-array{
|
||||||
|
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||||
|
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||||
|
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||||
|
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||||
|
} underlying>> ] }
|
||||||
|
{ short-8-rep [ ushort-array{
|
||||||
|
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
|
||||||
|
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
|
||||||
|
} underlying>> ] }
|
||||||
|
{ int-4-rep [ uint-array{
|
||||||
|
HEX: 8000,0000 HEX: 8000,0000
|
||||||
|
HEX: 8000,0000 HEX: 8000,0000
|
||||||
|
} underlying>> ] }
|
||||||
|
{ longlong-2-rep [ ulonglong-array{
|
||||||
|
HEX: 8000,0000,0000,0000
|
||||||
|
HEX: 8000,0000,0000,0000
|
||||||
|
} underlying>> ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||||
|
orig-cc order-cc {
|
||||||
|
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] }
|
||||||
|
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] }
|
||||||
|
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] }
|
||||||
|
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
|
||||||
|
{
|
||||||
|
{
|
||||||
|
[ rep orig-cc %compare-vector-reps member? ]
|
||||||
|
[ src1 src2 rep orig-cc (generate-compare-vector) ]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
[ rep %min-vector-reps member? ]
|
||||||
|
[ src1 src2 rep orig-cc (generate-minmax-compare-vector) ]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
[ rep unsign-rep orig-cc %compare-vector-reps member? ]
|
||||||
|
[
|
||||||
|
rep sign-bit-mask ^^load-constant :> sign-bits
|
||||||
|
src1 sign-bits rep ^^xor-vector
|
||||||
|
src2 sign-bits rep ^^xor-vector
|
||||||
|
rep unsign-rep orig-cc (generate-compare-vector)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
} cond ;
|
||||||
|
|
||||||
:: generate-unpack-vector-head ( src rep -- dst )
|
:: generate-unpack-vector-head ( src rep -- dst )
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
|
@ -265,3 +316,17 @@ MACRO: if-literals-match ( quots -- )
|
||||||
]
|
]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
: generate-min-vector ( src1 src2 rep -- dst )
|
||||||
|
dup %min-vector-reps member?
|
||||||
|
[ ^^min-vector ] [
|
||||||
|
[ cc< generate-compare-vector ]
|
||||||
|
[ generate-blend-vector ] 3bi
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: generate-max-vector ( src1 src2 rep -- dst )
|
||||||
|
dup %max-vector-reps member?
|
||||||
|
[ ^^max-vector ] [
|
||||||
|
[ cc> generate-compare-vector ]
|
||||||
|
[ generate-blend-vector ] 3bi
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
|
|
@ -515,3 +515,48 @@ M: ##scalar>vector rewrite
|
||||||
M: ##xor-vector rewrite
|
M: ##xor-vector rewrite
|
||||||
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
|
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
|
||||||
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
|
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
|
||||||
|
|
||||||
|
: vector-not? ( expr -- ? )
|
||||||
|
{
|
||||||
|
[ not-vector-expr? ]
|
||||||
|
[ {
|
||||||
|
[ xor-vector-expr? ]
|
||||||
|
[ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
|
||||||
|
} 1&& ]
|
||||||
|
} 1|| ;
|
||||||
|
|
||||||
|
GENERIC: vector-not-src ( expr -- vreg )
|
||||||
|
M: not-vector-expr vector-not-src src>> vn>vreg ;
|
||||||
|
M: xor-vector-expr vector-not-src
|
||||||
|
dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
|
||||||
|
|
||||||
|
M: ##and-vector rewrite
|
||||||
|
{
|
||||||
|
{ [ dup src1>> vreg>expr vector-not? ] [
|
||||||
|
{
|
||||||
|
[ dst>> ]
|
||||||
|
[ src1>> vreg>expr vector-not-src ]
|
||||||
|
[ src2>> ]
|
||||||
|
[ rep>> ]
|
||||||
|
} cleave \ ##andn-vector new-insn
|
||||||
|
] }
|
||||||
|
{ [ dup src2>> vreg>expr vector-not? ] [
|
||||||
|
{
|
||||||
|
[ dst>> ]
|
||||||
|
[ src2>> vreg>expr vector-not-src ]
|
||||||
|
[ src1>> ]
|
||||||
|
[ rep>> ]
|
||||||
|
} cleave \ ##andn-vector new-insn
|
||||||
|
] }
|
||||||
|
[ drop f ]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
|
M: ##andn-vector rewrite
|
||||||
|
dup src1>> vreg>expr vector-not? [
|
||||||
|
{
|
||||||
|
[ dst>> ]
|
||||||
|
[ src1>> vreg>expr vector-not-src ]
|
||||||
|
[ src2>> ]
|
||||||
|
[ rep>> ]
|
||||||
|
} cleave \ ##and-vector new-insn
|
||||||
|
] [ drop f ] if ;
|
||||||
|
|
|
@ -1281,6 +1281,128 @@ cell 8 = [
|
||||||
} value-numbering-step
|
} value-numbering-step
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
! NOT x AND y => x ANDN y
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 4 1 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 4 1 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
! x AND NOT y => y ANDN x
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 1 4 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 1 4 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
! NOT x ANDN y => x AND y
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 4 1 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 0 1 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 4 1 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
! AND <=> ANDN
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
T{ ##and-vector f 6 0 2 float-4-rep }
|
||||||
|
T{ ##or-vector f 7 5 6 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##fill-vector f 3 float-4-rep }
|
||||||
|
T{ ##xor-vector f 4 0 3 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 4 1 float-4-rep }
|
||||||
|
T{ ##andn-vector f 6 4 2 float-4-rep }
|
||||||
|
T{ ##or-vector f 7 5 6 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##andn-vector f 5 0 1 float-4-rep }
|
||||||
|
T{ ##and-vector f 6 0 2 float-4-rep }
|
||||||
|
T{ ##or-vector f 7 5 6 float-4-rep }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
T{ ##not-vector f 4 0 float-4-rep }
|
||||||
|
T{ ##and-vector f 5 4 1 float-4-rep }
|
||||||
|
T{ ##andn-vector f 6 4 2 float-4-rep }
|
||||||
|
T{ ##or-vector f 7 5 6 float-4-rep }
|
||||||
|
} value-numbering-step
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
! branch folding
|
||||||
|
|
||||||
: test-branch-folding ( insns -- insns' n )
|
: test-branch-folding ( insns -- insns' n )
|
||||||
<basic-block>
|
<basic-block>
|
||||||
[ V{ 0 1 } clone >>successors basic-block set value-numbering-step ] keep
|
[ V{ 0 1 } clone >>successors basic-block set value-numbering-step ] keep
|
||||||
|
|
|
@ -893,7 +893,7 @@ M: x86 %compare-vector ( dst src1 src2 rep cc -- )
|
||||||
|
|
||||||
M: x86 %compare-vector-reps
|
M: x86 %compare-vector-reps
|
||||||
{
|
{
|
||||||
{ [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
|
{ [ dup { cc= cc/= cc/<>= cc<>= } memq? ] [ drop %compare-vector-eq-reps ] }
|
||||||
[ drop %compare-vector-ord-reps ]
|
[ drop %compare-vector-ord-reps ]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
@ -1098,7 +1098,7 @@ M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||||
M: x86 %min-vector-reps
|
M: x86 %min-vector-reps
|
||||||
{
|
{
|
||||||
{ sse? { float-4-rep } }
|
{ sse? { float-4-rep } }
|
||||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
|
||||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
@ -1118,7 +1118,7 @@ M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||||
M: x86 %max-vector-reps
|
M: x86 %max-vector-reps
|
||||||
{
|
{
|
||||||
{ sse? { float-4-rep } }
|
{ sse? { float-4-rep } }
|
||||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
|
||||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
|
|
@ -163,8 +163,8 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||||
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
||||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
{ \ (simd-vmin) [ %min-vector-reps cc< %compare-vector-reps union ] }
|
||||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
{ \ (simd-vmax) [ %max-vector-reps cc> %compare-vector-reps union ] }
|
||||||
{ \ (simd-v.) [ %dot-vector-reps ] }
|
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||||
|
@ -193,12 +193,12 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
||||||
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
|
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
|
||||||
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
|
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
|
||||||
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
{ \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] }
|
||||||
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
{ \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] }
|
||||||
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
{ \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] }
|
||||||
{ \ (simd-v>) [ cc> %compare-vector-reps ] }
|
{ \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] }
|
||||||
{ \ (simd-v>=) [ cc>= %compare-vector-reps ] }
|
{ \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] }
|
||||||
{ \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] }
|
{ \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] }
|
||||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
{ \ (simd-vany?) [ %test-vector-reps ] }
|
{ \ (simd-vany?) [ %test-vector-reps ] }
|
||||||
|
|
|
@ -101,6 +101,7 @@ $nl
|
||||||
vxor
|
vxor
|
||||||
vnot
|
vnot
|
||||||
v?
|
v?
|
||||||
|
vif
|
||||||
}
|
}
|
||||||
"Entire vector tests:"
|
"Entire vector tests:"
|
||||||
{ $subsections
|
{ $subsections
|
||||||
|
@ -534,10 +535,19 @@ HELP: vnot
|
||||||
{ $notes "See " { $link "math-vectors-simd-logic" } " for notes on dealing with vector boolean inputs and results when using SIMD types." } ;
|
{ $notes "See " { $link "math-vectors-simd-logic" } " for notes on dealing with vector boolean inputs and results when using SIMD types." } ;
|
||||||
|
|
||||||
HELP: v?
|
HELP: v?
|
||||||
{ $values { "mask" "a sequence of booleans" } { "true" "a sequence of numbers" } { "false" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
{ $values { "mask" "a sequence of booleans" } { "true" "a sequence of numbers" } { "false" "a sequence of numbers" } { "result" "a sequence of numbers" } }
|
||||||
{ $description "Creates a new sequence by selecting elements from the " { $snippet "true" } " and " { $snippet "false" } " sequences based on whether the corresponding bits of the " { $snippet "mask" } " sequence are set or not." }
|
{ $description "Creates a new sequence by selecting elements from the " { $snippet "true" } " and " { $snippet "false" } " sequences based on whether the corresponding bits of the " { $snippet "mask" } " sequence are set or not." }
|
||||||
{ $notes "See " { $link "math-vectors-simd-logic" } " for notes on dealing with vector boolean inputs and results when using SIMD types." } ;
|
{ $notes "See " { $link "math-vectors-simd-logic" } " for notes on dealing with vector boolean inputs and results when using SIMD types." } ;
|
||||||
|
|
||||||
|
HELP: vif
|
||||||
|
{ $values { "mask" "a sequence of booleans" } { "true-quot" { $quotation "( -- vector )" } } { "false-quot" { $quotation "( -- vector )" } } { "result" "a sequence" } }
|
||||||
|
{ $description "If all of the elements of " { $snippet "mask" } " are true, " { $snippet "true-quot" } " is called and its output value returned. If all of the elements of " { $snippet "mask" } " are false, " { $snippet "false-quot" } " is called and its output value returned. Otherwise, both quotations are called and " { $snippet "mask" } " is used to select elements from each output as with " { $link v? } "." }
|
||||||
|
{ $notes "See " { $link "math-vectors-simd-logic" } " for notes on dealing with vector boolean inputs and results when using SIMD types."
|
||||||
|
$nl
|
||||||
|
"For most conditional SIMD code, unless a case is exceptionally expensive to compute, it is usually most efficient to just compute all cases and blend them with " { $link v? } " instead of using " { $snippet "vif" } "." } ;
|
||||||
|
|
||||||
|
{ v? vif } related-words
|
||||||
|
|
||||||
HELP: vany?
|
HELP: vany?
|
||||||
{ $values { "v" "a sequence of booleans" } { "?" "a boolean" } }
|
{ $values { "v" "a sequence of booleans" } { "?" "a boolean" } }
|
||||||
{ $description "Returns true if any element of " { $snippet "v" } " is true." }
|
{ $description "Returns true if any element of " { $snippet "v" } " is true." }
|
||||||
|
|
|
@ -142,9 +142,16 @@ M: simd-128 vshuffle ( u perm -- v )
|
||||||
: vunordered? ( u v -- w ) [ unordered? ] 2map ;
|
: vunordered? ( u v -- w ) [ unordered? ] 2map ;
|
||||||
: v= ( u v -- w ) [ = ] 2map ;
|
: v= ( u v -- w ) [ = ] 2map ;
|
||||||
|
|
||||||
: v? ( mask true false -- w )
|
: v? ( mask true false -- result )
|
||||||
[ vand ] [ vandn ] bi-curry* bi vor ; inline
|
[ vand ] [ vandn ] bi-curry* bi vor ; inline
|
||||||
|
|
||||||
|
:: vif ( mask true-quot false-quot -- result )
|
||||||
|
{
|
||||||
|
{ [ mask vall? ] [ true-quot call ] }
|
||||||
|
{ [ mask vnone? ] [ false-quot call ] }
|
||||||
|
[ mask true-quot call false-quot call v? ]
|
||||||
|
} cond ; inline
|
||||||
|
|
||||||
: vfloor ( u -- v ) [ floor ] map ;
|
: vfloor ( u -- v ) [ floor ] map ;
|
||||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||||
|
@ -175,20 +182,20 @@ PRIVATE>
|
||||||
|
|
||||||
: bilerp ( aa ba ab bb {t,u} -- a_tu )
|
: bilerp ( aa ba ab bb {t,u} -- a_tu )
|
||||||
[ first lerp ] [ second lerp ] bi-curry
|
[ first lerp ] [ second lerp ] bi-curry
|
||||||
[ 2bi@ ] [ call ] bi* ;
|
[ 2bi@ ] [ call ] bi* ; inline
|
||||||
|
|
||||||
: vlerp ( a b t -- a_t )
|
: vlerp ( a b t -- a_t )
|
||||||
[ lerp ] 3map ;
|
[ over v- ] dip v* v+ ; inline
|
||||||
|
|
||||||
: vnlerp ( a b t -- a_t )
|
: vnlerp ( a b t -- a_t )
|
||||||
[ lerp ] curry 2map ;
|
[ over v- ] dip v*n v+ ; inline
|
||||||
|
|
||||||
: vbilerp ( aa ba ab bb {t,u} -- a_tu )
|
: vbilerp ( aa ba ab bb {t,u} -- a_tu )
|
||||||
[ first vnlerp ] [ second vnlerp ] bi-curry
|
[ first vnlerp ] [ second vnlerp ] bi-curry
|
||||||
[ 2bi@ ] [ call ] bi* ;
|
[ 2bi@ ] [ call ] bi* ; inline
|
||||||
|
|
||||||
: v~ ( a b epsilon -- ? )
|
: v~ ( a b epsilon -- ? )
|
||||||
[ ~ ] curry 2all? ;
|
[ ~ ] curry 2all? ; inline
|
||||||
|
|
||||||
HINTS: vneg { array } ;
|
HINTS: vneg { array } ;
|
||||||
HINTS: norm-sq { array } ;
|
HINTS: norm-sq { array } ;
|
||||||
|
|
Loading…
Reference in New Issue