From a2976083be3d83f53a3dff4f79de1a002e3cd5ba Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 20 Oct 2009 22:30:57 -0500 Subject: [PATCH] generate unsigned vector comparison fallbacks using min/max or xor/signed compare --- .../compiler/cfg/intrinsics/simd/simd.factor | 64 +++++++++++++++++-- basis/cpu/x86/x86.factor | 6 +- .../vectors/simd/intrinsics/intrinsics.factor | 12 ++-- 3 files changed, 67 insertions(+), 15 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 0e1beae5e0..9d17ddd0f8 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -10,8 +10,8 @@ compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien specialized-arrays ; -FROM: alien.c-types => heap-size char uchar float double ; -SPECIALIZED-ARRAYS: float double ; +FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ; +SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ; IN: compiler.cfg.intrinsics.simd MACRO: check-elements ( quots -- ) @@ -155,28 +155,79 @@ MACRO: if-literals-match ( quots -- ) [ ^^not-vector ] [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; -:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst ) +:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst ) {cc,swap} first2 :> swap? :> cc swap? [ src2 src1 rep cc ^^compare-vector ] [ src1 src2 rep cc ^^compare-vector ] if ; -:: generate-compare-vector ( src1 src2 rep orig-cc -- dst ) +:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst ) rep orig-cc %compare-vector-ccs :> not? :> ccs ccs empty? [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] [ ccs unclip :> first-cc :> rest-ccs - src1 src2 rep first-cc (generate-compare-vector) :> first-dst + src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst rest-ccs first-dst - [ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ] + [ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ] reduce not? [ rep generate-not-vector ] when ] if ; +: sign-bit-mask ( rep -- byte-array ) + unsign-rep { + { char-16-rep [ uchar-array{ + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + } underlying>> ] } + { short-8-rep [ ushort-array{ + HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 + HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 + } underlying>> ] } + { int-4-rep [ uint-array{ + HEX: 8000,0000 HEX: 8000,0000 + HEX: 8000,0000 HEX: 8000,0000 + } underlying>> ] } + { longlong-2-rep [ ulonglong-array{ + HEX: 8000,0000,0000,0000 + HEX: 8000,0000,0000,0000 + } underlying>> ] } + } case ; + +:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst ) + orig-cc order-cc { + { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] } + { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] } + { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] } + { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] } + } case ; + +:: generate-compare-vector ( src1 src2 rep orig-cc -- dst ) + { + { + [ rep orig-cc %compare-vector-reps member? ] + [ src1 src2 rep orig-cc (generate-compare-vector) ] + } + { + [ rep %min-vector-reps member? ] + [ src1 src2 rep orig-cc (generate-minmax-compare-vector) ] + } + { + [ rep unsign-rep orig-cc %compare-vector-reps member? ] + [ + rep sign-bit-mask ^^load-constant :> sign-bits + src1 sign-bits rep ^^xor-vector + src2 sign-bits rep ^^xor-vector + rep unsign-rep orig-cc (generate-compare-vector) + ] + } + } cond ; + :: generate-unpack-vector-head ( src rep -- dst ) { { @@ -278,3 +329,4 @@ MACRO: if-literals-match ( quots -- ) [ cc> generate-compare-vector ] [ generate-blend-vector ] 3bi ] if ; + diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4576956335..d99512f0f7 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -893,7 +893,7 @@ M: x86 %compare-vector ( dst src1 src2 rep cc -- ) M: x86 %compare-vector-reps { - { [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] } + { [ dup { cc= cc/= cc/<>= cc<>= } memq? ] [ drop %compare-vector-eq-reps ] } [ drop %compare-vector-ord-reps ] } cond ; @@ -1098,7 +1098,7 @@ M: x86 %min-vector ( dst src1 src2 rep -- ) M: x86 %min-vector-reps { { sse? { float-4-rep } } - { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep } } { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; @@ -1118,7 +1118,7 @@ M: x86 %max-vector ( dst src1 src2 rep -- ) M: x86 %max-vector-reps { { sse? { float-4-rep } } - { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep } } { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 761ca30375..649e444915 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -193,12 +193,12 @@ M: vector-rep supported-simd-op? { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] } { \ (simd-(vunpack-head)) [ (%unpack-reps) ] } { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] } - { \ (simd-v<=) [ cc<= %compare-vector-reps ] } - { \ (simd-v<) [ cc< %compare-vector-reps ] } - { \ (simd-v=) [ cc= %compare-vector-reps ] } - { \ (simd-v>) [ cc> %compare-vector-reps ] } - { \ (simd-v>=) [ cc>= %compare-vector-reps ] } - { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] } + { \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] } + { \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] } + { \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] } + { \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] } + { \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] } + { \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } { \ (simd-vany?) [ %test-vector-reps ] }