From e74b0b2a7b21996bfc9026fa0fbb82909d4403ee Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 15 May 2010 23:48:22 -0700 Subject: [PATCH] compiler.cfg.instructions: ##gather-int-vector-2/4 instructions that map to SSE4 PINSR/PEXTR --- .../cfg/instructions/instructions.factor | 10 +++++++++ basis/compiler/codegen/codegen.factor | 2 ++ basis/cpu/architecture/architecture.factor | 6 +++++ basis/cpu/x86/x86.factor | 22 +++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 38ca3a93b0..b469866668 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -295,11 +295,21 @@ def: dst use: src1/scalar-rep src2/scalar-rep literal: rep ; +PURE-INSN: ##gather-int-vector-2 +def: dst +use: src1/int-rep src2/int-rep +literal: rep ; + PURE-INSN: ##gather-vector-4 def: dst use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep literal: rep ; +PURE-INSN: ##gather-int-vector-4 +def: dst +use: src1/int-rep src2/int-rep src3/int-rep src4/int-rep +literal: rep ; + PURE-INSN: ##shuffle-vector def: dst use: src shuffle diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index d5ab84bf45..b787220b56 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -182,6 +182,8 @@ CODEGEN: ##zero-vector %zero-vector CODEGEN: ##fill-vector %fill-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 +CODEGEN: ##gather-int-vector-2 %gather-int-vector-2 +CODEGEN: ##gather-int-vector-4 %gather-int-vector-4 CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm CODEGEN: ##shuffle-vector-halves-imm %shuffle-vector-halves-imm CODEGEN: ##shuffle-vector %shuffle-vector diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index d3638dd377..b0d2747ce0 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -299,7 +299,9 @@ HOOK: %float>integer cpu ( dst src -- ) HOOK: %zero-vector cpu ( dst rep -- ) HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) +HOOK: %gather-int-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) +HOOK: %gather-int-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- ) HOOK: %shuffle-vector-halves-imm cpu ( dst src1 src2 shuffle rep -- ) @@ -354,7 +356,9 @@ HOOK: %scalar>vector cpu ( dst src rep -- ) HOOK: %zero-vector-reps cpu ( -- reps ) HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) +HOOK: %gather-int-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %gather-int-vector-4-reps cpu ( -- reps ) HOOK: %alien-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-imm-reps cpu ( -- reps ) @@ -403,7 +407,9 @@ HOOK: %horizontal-shr-vector-imm-reps cpu ( -- reps ) M: object %zero-vector-reps { } ; M: object %fill-vector-reps { } ; M: object %gather-vector-2-reps { } ; +M: object %gather-int-vector-2-reps { } ; M: object %gather-vector-4-reps { } ; +M: object %gather-int-vector-4-reps { } ; M: object %alien-vector-reps { } ; M: object %shuffle-vector-reps { } ; M: object %shuffle-vector-imm-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 78c80264ed..9790f6e7dd 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -744,6 +744,18 @@ M: x86 %gather-vector-4-reps { sse2? { float-4-rep int-4-rep uint-4-rep } } } available-reps ; +M:: x86 %gather-int-vector-4 ( dst src1 src2 src3 src4 rep -- ) + dst rep %zero-vector + dst src1 32-bit-version-of 0 PINSRD + dst src2 32-bit-version-of 1 PINSRD + dst src3 32-bit-version-of 2 PINSRD + dst src4 32-bit-version-of 3 PINSRD ; + +M: x86 %gather-int-vector-4-reps + { + { sse4.1? { int-4-rep uint-4-rep } } + } available-reps ; + M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) rep signed-rep { { double-2-rep [ @@ -761,6 +773,16 @@ M: x86 %gather-vector-2-reps { sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M:: x86.64 %gather-int-vector-2 ( dst src1 src2 rep -- ) + dst rep %zero-vector + dst src1 0 PINSRQ + dst src2 1 PINSRQ ; + +M: x86.64 %gather-int-vector-2-reps + { + { sse4.1? { longlong-2-rep ulonglong-2-rep } } + } available-reps ; + : sse1-float-4-shuffle ( dst shuffle -- ) { { { 0 1 2 3 } [ drop ] }