diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 6b61cb53cb..8e668c8f11 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -290,6 +290,36 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##signed-pack-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##unsigned-pack-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##unpack-vector-head +def: dst +use: src +literal: rep ; + +PURE-INSN: ##unpack-vector-tail +def: dst +use: src +literal: rep ; + +PURE-INSN: ##integer>float-vector +def: dst +use: src +literal: rep ; + +PURE-INSN: ##float>integer-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##compare-vector def: dst use: src1 src2 @@ -783,6 +813,8 @@ UNION: def-is-use-insn ##box-displaced-alien ##compare-vector ##not-vector +##unpack-vector-head +##unpack-vector-tail ##string-nth ##unbox-any-c-ptr ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 121f09a5a8..7366555fff 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -165,6 +165,12 @@ CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##merge-vector-head %merge-vector-head CODEGEN: ##merge-vector-tail %merge-vector-tail +CODEGEN: ##signed-pack-vector %signed-pack-vector +CODEGEN: ##unsigned-pack-vector %unsigned-pack-vector +CODEGEN: ##unpack-vector-head %unpack-vector-head +CODEGEN: ##unpack-vector-tail %unpack-vector-tail +CODEGEN: ##integer>float-vector %integer>float-vector +CODEGEN: ##float>integer-vector %float>integer-vector CODEGEN: ##compare-vector %compare-vector CODEGEN: ##test-vector %test-vector CODEGEN: ##add-vector %add-vector diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 5ce16ad731..67f5018f58 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -56,6 +56,18 @@ uint-4-rep longlong-2-rep ulonglong-2-rep ; +UNION: signed-int-vector-rep +char-16-rep +short-8-rep +int-4-rep +longlong-2-rep ; + +UNION: unsigned-int-vector-rep +uchar-16-rep +ushort-8-rep +uint-4-rep +ulonglong-2-rep ; + UNION: scalar-rep char-scalar-rep uchar-scalar-rep @@ -220,6 +232,12 @@ HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) +HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- ) +HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- ) +HOOK: %unpack-vector-head cpu ( dst src rep -- ) +HOOK: %unpack-vector-tail cpu ( dst src rep -- ) +HOOK: %integer>float-vector cpu ( dst src rep -- ) +HOOK: %float>integer-vector cpu ( dst src rep -- ) HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) @@ -259,6 +277,11 @@ HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) +HOOK: %signed-pack-vector-reps cpu ( -- reps ) +HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) +HOOK: %unpack-vector-reps cpu ( -- reps ) +HOOK: %integer>float-vector-reps cpu ( -- reps ) +HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps ) HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 9394e864f0..32c92a8da0 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -263,6 +263,11 @@ M: ppc %gather-vector-2-reps { } ; M: ppc %gather-vector-4-reps { } ; M: ppc %shuffle-vector-reps { } ; M: ppc %merge-vector-reps { } ; +M: ppc %signed-pack-vector-reps { } ; +M: ppc %unsigned-pack-vector-reps { } ; +M: ppc %unpack-vector-reps { } ; +M: ppc %integer>float-vector-reps { } ; +M: ppc %float>integer-vector-reps { } ; M: ppc %compare-vector-reps drop { } ; M: ppc %test-vector-reps { } ; M: ppc %add-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index fa5d99101b..0b1d4aa74f 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -749,6 +749,87 @@ M: x86 %merge-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %signed-pack-vector + [ two-operand ] keep + { + { int-4-rep [ PACKSSDW ] } + { short-8-rep [ PACKSSWB ] } + } case ; + +M: x86 %signed-pack-vector-reps + { + { sse2? { short-8-rep int-4-rep } } + } available-reps ; + +M: x86 %unsigned-pack-vector + [ two-operand ] keep + unsign-rep { + { int-4-rep [ PACKUSDW ] } + { short-8-rep [ PACKUSWB ] } + } case ; + +M: x86 %unsigned-pack-vector-reps + { + { sse2? { ushort-8-rep short-8-rep uint-4-rep int-4-rep } } + } available-reps ; + +:: %sign-extension-vector ( dst src rep -- ) + dst rep %zero-vector + dst src rep { + { char-16-rep [ PCMPGTB ] } + { short-8-rep [ PCMPGTW ] } + { int-4-rep [ PCMPGTD ] } + { longlong-2-rep [ PCMPGTQ ] } + } case ; + +:: (%unpack-vector-dest) ( dst src rep -- ) + dst rep signed-int-vector-rep? + [ src rep %sign-extension-vector ] + [ rep %zero-vector ] if ; + +M: x86 %unpack-vector-head ( dst src rep -- ) + [ (%unpack-vector-dest) ] 3keep + unsign-rep { + { char-16-rep [ PUNPCKLBW ] } + { short-8-rep [ PUNPCKLWD ] } + { int-4-rep [ PUNPCKLDQ ] } + { longlong-2-rep [ PUNPCKLQDQ ] } + } case ; + +M: x86 %unpack-vector-tail ( dst src rep -- ) + [ (%unpack-vector-dest) ] 3keep + unsign-rep { + { char-16-rep [ PUNPCKHBW ] } + { short-8-rep [ PUNPCKHWD ] } + { int-4-rep [ PUNPCKHDQ ] } + { longlong-2-rep [ PUNPCKHQDQ ] } + } case ; + +M: x86 %unpack-vector-reps ( -- reps ) + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + +M: x86 %integer>float-vector ( dst src rep -- ) + { + { int-4-rep [ CVTDQ2PS ] } + } case ; + +M: x86 %integer>float-vector-reps + { + { sse2? { int-4-rep } } + } available-reps ; + +M: x86 %float>integer-vector ( dst src rep -- ) + { + { float-4-rep [ CVTPS2DQ ] } + } case ; + +M: x86 %float>integer-vector-reps + { + { sse2? { float-4-rep } } + } available-reps ; + :: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' ) cc { cc> cc>= cc/> cc/>= } member? [ dst src2 src1 rep two-operand rep cc swap-cc ]