From fd6f370119b336a046b4d974206f603d178a768c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 7 Oct 2009 12:53:10 -0500 Subject: [PATCH 1/3] typo in convert-to-fill-vector? --- basis/compiler/cfg/representations/representations.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index 2ff293ce5c..42059f4152 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -236,7 +236,7 @@ M: ##phi conversions-for-insn : convert-to-fill-vector? ( insn -- ? ) { [ dst>> rep-of vector-rep? ] - [ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ] + [ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ] } 1&& ; : (convert-to-zero/fill-vector) ( insn -- dst rep ) From f2c9eb79e237fde7222871319db854c9cc80a8c6 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 7 Oct 2009 14:09:46 -0500 Subject: [PATCH 2/3] decompose %unpack-vector-head/tail into %compare-vector/%merge-vector-head/tail or %tail>head-vector/%unpack-vector-head insns when there isn't an actual unpack insn; get rid of fake x86 implementations --- .../cfg/instructions/instructions.factor | 7 ++- .../compiler/cfg/intrinsics/intrinsics.factor | 4 +- .../compiler/cfg/intrinsics/simd/simd.factor | 36 +++++++++++- basis/compiler/codegen/codegen.factor | 1 + basis/cpu/architecture/architecture.factor | 8 ++- basis/cpu/x86/x86.factor | 55 +++++++------------ .../vectors/simd/intrinsics/intrinsics.factor | 10 +++- 7 files changed, 76 insertions(+), 45 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 02e70e7bc9..1f1310b34e 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -280,6 +280,11 @@ def: dst use: src literal: shuffle rep ; +PURE-INSN: ##tail>head-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##merge-vector-head def: dst use: src1 src2 @@ -303,13 +308,11 @@ literal: rep ; PURE-INSN: ##unpack-vector-head def: dst use: src -temp: temp literal: rep ; PURE-INSN: ##unpack-vector-tail def: dst use: src -temp: temp literal: rep ; PURE-INSN: ##integer>float-vector diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index f7dc950980..a870492701 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -200,8 +200,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ ^^unpack-vector-head ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ ^^unpack-vector-tail ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index faec6cceb4..ec45762c3c 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -2,8 +2,9 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays fry cpu.architecture kernel math sequences math.vectors.simd.intrinsics macros generalizations -combinators combinators.short-circuit arrays +combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks +compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien ; @@ -121,3 +122,36 @@ MACRO: if-literals-match ( quots -- ) [ ^^not-vector ] [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; +:: generate-unpack-vector-head ( src rep -- dst ) + { + { + [ rep %unpack-vector-head-reps member? ] + [ src rep ^^unpack-vector-head ] + } + [ + rep ^^zero-vector :> zero + zero src rep cc> ^^compare-vector :> sign + src sign rep ^^merge-vector-head + ] + } cond ; + +:: generate-unpack-vector-tail ( src rep -- dst ) + { + { + [ rep %unpack-vector-tail-reps member? ] + [ src rep ^^unpack-vector-tail ] + } + { + [ rep %unpack-vector-head-reps member? ] + [ + src rep ^^tail>head-vector :> tail + tail rep ^^unpack-vector-head + ] + } + [ + rep ^^zero-vector :> zero + zero src rep cc> ^^compare-vector :> sign + src sign rep ^^merge-vector-tail + ] + } cond ; + diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 59ad85d28f..7c7f9a696c 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -164,6 +164,7 @@ CODEGEN: ##fill-vector %fill-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector +CODEGEN: ##tail>head-vector %tail>head-vector CODEGEN: ##merge-vector-head %merge-vector-head CODEGEN: ##merge-vector-tail %merge-vector-tail CODEGEN: ##signed-pack-vector %signed-pack-vector diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index b1417a590e..7ae2187533 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -230,12 +230,13 @@ HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %tail>head-vector cpu ( dst src rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- ) HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- ) -HOOK: %unpack-vector-head cpu ( dst src temp rep -- ) -HOOK: %unpack-vector-tail cpu ( dst src temp rep -- ) +HOOK: %unpack-vector-head cpu ( dst src rep -- ) +HOOK: %unpack-vector-tail cpu ( dst src rep -- ) HOOK: %integer>float-vector cpu ( dst src rep -- ) HOOK: %float>integer-vector cpu ( dst src rep -- ) HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) @@ -279,7 +280,8 @@ HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %signed-pack-vector-reps cpu ( -- reps ) HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) -HOOK: %unpack-vector-reps cpu ( -- reps ) +HOOK: %unpack-vector-head-reps cpu ( -- reps ) +HOOK: %unpack-vector-tail-reps cpu ( -- reps ) HOOK: %integer>float-vector-reps cpu ( -- reps ) HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 2c2f4189d4..81ed333f1c 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -774,45 +774,32 @@ M: x86 %unsigned-pack-vector-reps { sse4.1? { int-4-rep } } } available-reps ; -:: %sign-extension-vector ( dst src rep -- ) - dst rep %zero-vector - dst src rep { - { char-16-rep [ PCMPGTB ] } - { short-8-rep [ PCMPGTW ] } - { int-4-rep [ PCMPGTD ] } - { longlong-2-rep [ PCMPGTQ ] } +M: x86 %tail>head-vector ( dst src rep -- ) + dup { + { float-4-rep [ drop MOVHLPS ] } + { double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] } + [ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ] } case ; -:: (%unpack-vector-signs) ( dst src rep -- ) - dst rep signed-int-vector-rep? - [ src rep %sign-extension-vector ] - [ rep %zero-vector ] if ; - -M:: x86 %unpack-vector-head ( dst src temp rep -- ) - temp src rep (%unpack-vector-signs) - dst src rep %copy - dst temp rep unsign-rep { - { char-16-rep [ PUNPCKLBW ] } - { short-8-rep [ PUNPCKLWD ] } - { int-4-rep [ PUNPCKLDQ ] } - { longlong-2-rep [ PUNPCKLQDQ ] } - } case ; - -M:: x86 %unpack-vector-tail ( dst src temp rep -- ) - temp src rep (%unpack-vector-signs) - dst src rep %copy - dst temp rep unsign-rep { - { char-16-rep [ PUNPCKHBW ] } - { short-8-rep [ PUNPCKHWD ] } - { int-4-rep [ PUNPCKHDQ ] } - { longlong-2-rep [ PUNPCKHQDQ ] } - } case ; - -M: x86 %unpack-vector-reps ( -- reps ) +M: x86 %unpack-vector-head ( dst src rep -- ) { - { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + { char-16-rep [ PMOVSXBW ] } + { uchar-16-rep [ PMOVZXBW ] } + { short-8-rep [ PMOVSXWD ] } + { ushort-8-rep [ PMOVZXWD ] } + { int-4-rep [ PMOVSXDQ ] } + { uint-4-rep [ PMOVZXDQ ] } + { float-4-rep [ CVTPS2PD ] } + } case ; + +M: x86 %unpack-vector-head-reps ( -- reps ) + { + { sse2? { float-4-rep } } + { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %unpack-vector-tail-reps ( -- reps ) { } ; + M: x86 %integer>float-vector ( dst src rep -- ) { { int-4-rep [ CVTDQ2PS ] } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 753d5a88e9..0efb0c2417 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -4,7 +4,7 @@ USING: alien alien.c-types alien.data assocs combinators cpu.architecture compiler.cfg.comparisons fry generalizations kernel libc macros math math.vectors.conversion.backend -sequences effects accessors namespaces +sequences sets effects accessors namespaces lexer parser vocabs.parser words arrays math.vectors ; IN: math.vectors.simd.intrinsics @@ -137,6 +137,10 @@ MACRO: (simd-boa) ( rep -- quot ) GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) +: (%unpack-reps) ( -- reps ) + %merge-vector-reps [ int-vector-rep? ] filter + %unpack-vector-head-reps union ; + M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } @@ -174,8 +178,8 @@ M: vector-rep supported-simd-op? { \ (simd-(v>integer)) [ %float>integer-vector-reps ] } { \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] } { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] } - { \ (simd-(vunpack-head)) [ %unpack-vector-reps ] } - { \ (simd-(vunpack-tail)) [ %unpack-vector-reps ] } + { \ (simd-(vunpack-head)) [ (%unpack-reps) ] } + { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] } { \ (simd-v<=) [ cc<= %compare-vector-reps ] } { \ (simd-v<) [ cc< %compare-vector-reps ] } { \ (simd-v=) [ cc= %compare-vector-reps ] } From dd691a61e84d63e532d3e1e7e110481f56c56a4a Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 7 Oct 2009 15:27:03 -0500 Subject: [PATCH 3/3] break vector compare intrinsics into %compare, %or, and %not instructions that map directly to cpu instructions --- .../cfg/instructions/instructions.factor | 2 - .../compiler/cfg/intrinsics/intrinsics.factor | 12 +-- .../compiler/cfg/intrinsics/simd/simd.factor | 22 +++++ basis/cpu/architecture/architecture.factor | 19 ++++- basis/cpu/x86/x86.factor | 81 ++++++++++--------- 5 files changed, 86 insertions(+), 50 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 1f1310b34e..57d88a2d86 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -328,7 +328,6 @@ literal: rep ; PURE-INSN: ##compare-vector def: dst use: src1 src2 -temp: temp literal: rep cc ; PURE-INSN: ##test-vector @@ -816,7 +815,6 @@ UNION: kill-vreg-insn UNION: def-is-use-insn ##box-alien ##box-displaced-alien -##compare-vector ##string-nth ##unbox-any-c-ptr ; diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index a870492701..2dcd6d4b45 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -177,12 +177,12 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index ec45762c3c..9986588e3e 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -122,6 +122,28 @@ MACRO: if-literals-match ( quots -- ) [ ^^not-vector ] [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; +:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst ) + {cc,swap} first2 :> swap? :> cc + swap? + [ src2 src1 rep cc ^^compare-vector ] + [ src1 src2 rep cc ^^compare-vector ] if ; + +:: generate-compare-vector ( src1 src2 rep orig-cc -- dst ) + rep orig-cc %compare-vector-ccs :> not? :> ccs + + ccs empty? + [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] + [ + ccs unclip :> first-cc :> rest-ccs + src1 src2 rep first-cc (generate-compare-vector) :> first-dst + + rest-ccs first-dst + [ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ] + reduce + + not? [ rep generate-not-vector ] when + ] if ; + :: generate-unpack-vector-head ( src rep -- dst ) { { diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 7ae2187533..c7a7f0c5ef 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2006, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays generic kernel kernel.private math -memory namespaces make sequences layouts system hashtables +USING: accessors arrays assocs generic kernel kernel.private +math memory namespaces make sequences layouts system hashtables classes alien byte-arrays combinators words sets fry ; IN: cpu.architecture @@ -95,6 +95,18 @@ double-rep vector-rep scalar-rep ; +: unsign-rep ( rep -- rep' ) + { + { uint-4-rep int-4-rep } + { ulonglong-2-rep longlong-2-rep } + { ushort-8-rep short-8-rep } + { uchar-16-rep char-16-rep } + { uchar-scalar-rep char-scalar-rep } + { ushort-scalar-rep short-scalar-rep } + { uint-scalar-rep int-scalar-rep } + { ulonglong-scalar-rep longlong-scalar-rep } + } ?at drop ; + ! Register classes SINGLETONS: int-regs float-regs ; @@ -239,7 +251,7 @@ HOOK: %unpack-vector-head cpu ( dst src rep -- ) HOOK: %unpack-vector-tail cpu ( dst src rep -- ) HOOK: %integer>float-vector cpu ( dst src rep -- ) HOOK: %float>integer-vector cpu ( dst src rep -- ) -HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) +HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) @@ -285,6 +297,7 @@ HOOK: %unpack-vector-tail-reps cpu ( -- reps ) HOOK: %integer>float-vector-reps cpu ( -- reps ) HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps ) +HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? ) HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 81ed333f1c..13727bdc61 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -588,14 +588,6 @@ M: x86 %fill-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -: unsign-rep ( rep -- rep' ) - { - { uint-4-rep int-4-rep } - { ulonglong-2-rep longlong-2-rep } - { ushort-8-rep short-8-rep } - { uchar-16-rep char-16-rep } - } ?at drop ; - ! M:: x86 %broadcast-vector ( dst src rep -- ) ! rep unsign-rep { ! { float-4-rep [ @@ -820,14 +812,10 @@ M: x86 %float>integer-vector-reps { sse2? { float-4-rep } } } available-reps ; -:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' ) - cc { cc> cc>= cc/> cc/>= } member? - [ dst src2 src1 rep two-operand rep cc swap-cc ] - [ dst src1 src2 rep two-operand rep cc ] if ; : (%compare-float-vector) ( dst src rep double single -- ) [ double-2-rep eq? ] 2dip if ; inline -: %compare-float-vector ( dst src1 src2 temp rep cc -- ) - compare-float-v-operands { +: %compare-float-vector ( dst src rep cc -- ) + { { cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] } { cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] } { cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] } @@ -838,16 +826,6 @@ M: x86 %float>integer-vector-reps { cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] } } case ; -:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' ) - cc order-cc :> occ - occ { - { cc= [ f dst src1 src2 rep two-operand rep cc= ] } - { cc/= [ dst temp src1 src2 rep two-operand rep cc= ] } - { cc<= [ dst temp src1 src2 rep two-operand rep cc> ] } - { cc< [ f dst src2 src1 rep two-operand rep cc> ] } - { cc> [ f dst src1 src2 rep two-operand rep cc> ] } - { cc>= [ dst temp src2 src1 rep two-operand rep cc> ] } - } case ; :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- ) rep unsign-rep :> rep' dst src rep' { @@ -856,18 +834,14 @@ M: x86 %float>integer-vector-reps { short-8-rep [ int16 call ] } { char-16-rep [ int8 call ] } } case ; inline -:: (%not-vector) ( dst src rep -- ) - dst rep %fill-vector - dst dst src rep %xor-vector ; -:: %compare-int-vector ( dst src1 src2 temp rep cc -- ) - dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst - cmp-dst src' rep cc' { +: %compare-int-vector ( dst src rep cc -- ) + { { cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] } { cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] } - } case - not-dst [ cmp-dst rep (%not-vector) ] when* ; + } case ; -M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) +M: x86 %compare-vector ( dst src1 src2 rep cc -- ) + [ [ two-operand ] keep ] dip over float-vector-rep? [ %compare-float-vector ] [ %compare-int-vector ] if ; @@ -878,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse4.1? { longlong-2-rep ulonglong-2-rep } } } available-reps ; -: %compare-vector-unord-reps ( -- reps ) - { - { sse? { float-4-rep } } - { sse2? { double-2-rep } } - } available-reps ; : %compare-vector-ord-reps ( -- reps ) { { sse? { float-4-rep } } @@ -893,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) M: x86 %compare-vector-reps { { [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] } - { [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] } [ drop %compare-vector-ord-reps ] } cond ; +: %compare-float-vector-ccs ( cc -- ccs not? ) + { + { cc< [ { { cc< f } } f ] } + { cc<= [ { { cc<= f } } f ] } + { cc> [ { { cc< t } } f ] } + { cc>= [ { { cc<= t } } f ] } + { cc= [ { { cc= f } } f ] } + { cc<> [ { { cc< f } { cc< t } } f ] } + { cc<>= [ { { cc<>= f } } f ] } + { cc/< [ { { cc/< f } } f ] } + { cc/<= [ { { cc/<= f } } f ] } + { cc/> [ { { cc/< t } } f ] } + { cc/>= [ { { cc/<= t } } f ] } + { cc/= [ { { cc/= f } } f ] } + { cc/<> [ { { cc/= f } { cc/<>= f } } f ] } + { cc/<>= [ { { cc/<>= f } } f ] } + } case ; + +: %compare-int-vector-ccs ( cc -- ccs not? ) + order-cc { + { cc< [ { { cc> t } } f ] } + { cc<= [ { { cc> f } } t ] } + { cc> [ { { cc> f } } f ] } + { cc>= [ { { cc> t } } t ] } + { cc= [ { { cc= f } } f ] } + { cc/= [ { { cc= f } } t ] } + { t [ { } t ] } + { f [ { } f ] } + } case ; + +M: x86 %compare-vector-ccs + swap float-vector-rep? + [ %compare-float-vector-ccs ] + [ %compare-int-vector-ccs ] if ; + :: %test-vector-mask ( dst temp mask vcc -- ) vcc { { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }