math.vectors.simd: add fast intrinsic for 'nth', replace broadcast primitive with shuffles
parent
6382aaabd5
commit
f395d83379
|
@ -278,11 +278,6 @@ PURE-INSN: ##zero-vector
|
||||||
def: dst
|
def: dst
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##broadcast-vector
|
|
||||||
def: dst
|
|
||||||
use: src/scalar-rep
|
|
||||||
literal: rep ;
|
|
||||||
|
|
||||||
PURE-INSN: ##gather-vector-2
|
PURE-INSN: ##gather-vector-2
|
||||||
def: dst
|
def: dst
|
||||||
use: src1/scalar-rep src2/scalar-rep
|
use: src1/scalar-rep src2/scalar-rep
|
||||||
|
@ -298,11 +293,6 @@ def: dst
|
||||||
use: src
|
use: src
|
||||||
literal: shuffle rep ;
|
literal: shuffle rep ;
|
||||||
|
|
||||||
PURE-INSN: ##select-vector
|
|
||||||
def: dst
|
|
||||||
use: src
|
|
||||||
literal: n rep ;
|
|
||||||
|
|
||||||
PURE-INSN: ##add-vector
|
PURE-INSN: ##add-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
|
@ -418,7 +408,7 @@ def: dst
|
||||||
use: src1 src2/scalar-rep
|
use: src1 src2/scalar-rep
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
! Scalar/integer conversion
|
! Scalar/vector conversion
|
||||||
PURE-INSN: ##scalar>integer
|
PURE-INSN: ##scalar>integer
|
||||||
def: dst/int-rep
|
def: dst/int-rep
|
||||||
use: src
|
use: src
|
||||||
|
@ -429,6 +419,16 @@ def: dst
|
||||||
use: src/int-rep
|
use: src/int-rep
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##vector>scalar
|
||||||
|
def: dst/scalar-rep
|
||||||
|
use: src
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##scalar>vector
|
||||||
|
def: dst
|
||||||
|
use: src/scalar-rep
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
! Boxing and unboxing aliens
|
! Boxing and unboxing aliens
|
||||||
PURE-INSN: ##box-alien
|
PURE-INSN: ##box-alien
|
||||||
def: dst/int-rep
|
def: dst/int-rep
|
||||||
|
|
|
@ -175,7 +175,7 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
|
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
|
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-broadcast) [ emit-broadcast-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||||
sequences macros generalizations combinators
|
sequences math.vectors.simd.intrinsics macros generalizations
|
||||||
combinators.short-circuit arrays compiler.tree.propagation.info
|
combinators combinators.short-circuit arrays
|
||||||
compiler.cfg.builder.blocks compiler.cfg.stacks
|
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||||
compiler.cfg.stacks.local compiler.cfg.hats
|
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||||
compiler.cfg.instructions compiler.cfg.registers
|
compiler.cfg.instructions compiler.cfg.registers
|
||||||
compiler.cfg.intrinsics.alien ;
|
compiler.cfg.intrinsics.alien ;
|
||||||
IN: compiler.cfg.intrinsics.simd
|
IN: compiler.cfg.intrinsics.simd
|
||||||
|
@ -70,6 +70,19 @@ MACRO: if-literals-match ( quots -- )
|
||||||
[ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ]
|
[ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ]
|
||||||
{ [ shuffle? ] [ representation? ] } if-literals-match ; inline
|
{ [ shuffle? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
|
: ^^broadcast-vector ( src rep -- dst )
|
||||||
|
[ ^^scalar>vector ] keep
|
||||||
|
[ rep-components 0 <array> ] keep
|
||||||
|
^^shuffle-vector ;
|
||||||
|
|
||||||
|
: emit-broadcast-vector ( node -- )
|
||||||
|
[ ^^broadcast-vector ] emit-unary-vector-op ;
|
||||||
|
|
||||||
|
: ^^select-vector ( src n rep -- dst )
|
||||||
|
[ rep-components swap <array> ] keep
|
||||||
|
[ ^^shuffle-vector ] keep
|
||||||
|
^^vector>scalar ;
|
||||||
|
|
||||||
: emit-select-vector ( node -- )
|
: emit-select-vector ( node -- )
|
||||||
[ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ]
|
[ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ]
|
||||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
|
@ -162,11 +162,9 @@ CODEGEN: ##integer>float %integer>float
|
||||||
CODEGEN: ##float>integer %float>integer
|
CODEGEN: ##float>integer %float>integer
|
||||||
CODEGEN: ##unbox-vector %unbox-vector
|
CODEGEN: ##unbox-vector %unbox-vector
|
||||||
CODEGEN: ##zero-vector %zero-vector
|
CODEGEN: ##zero-vector %zero-vector
|
||||||
CODEGEN: ##broadcast-vector %broadcast-vector
|
|
||||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||||
CODEGEN: ##select-vector %select-vector
|
|
||||||
CODEGEN: ##box-vector %box-vector
|
CODEGEN: ##box-vector %box-vector
|
||||||
CODEGEN: ##add-vector %add-vector
|
CODEGEN: ##add-vector %add-vector
|
||||||
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||||
|
@ -193,6 +191,8 @@ CODEGEN: ##shl-vector %shl-vector
|
||||||
CODEGEN: ##shr-vector %shr-vector
|
CODEGEN: ##shr-vector %shr-vector
|
||||||
CODEGEN: ##integer>scalar %integer>scalar
|
CODEGEN: ##integer>scalar %integer>scalar
|
||||||
CODEGEN: ##scalar>integer %scalar>integer
|
CODEGEN: ##scalar>integer %scalar>integer
|
||||||
|
CODEGEN: ##vector>scalar %vector>scalar
|
||||||
|
CODEGEN: ##scalar>vector %scalar>vector
|
||||||
CODEGEN: ##box-alien %box-alien
|
CODEGEN: ##box-alien %box-alien
|
||||||
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
||||||
CODEGEN: ##unbox-alien %unbox-alien
|
CODEGEN: ##unbox-alien %unbox-alien
|
||||||
|
|
|
@ -28,7 +28,6 @@ IN: compiler.tree.propagation.simd
|
||||||
(simd-broadcast)
|
(simd-broadcast)
|
||||||
(simd-gather-2)
|
(simd-gather-2)
|
||||||
(simd-gather-4)
|
(simd-gather-4)
|
||||||
(simd-select)
|
|
||||||
alien-vector
|
alien-vector
|
||||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||||
|
|
||||||
|
@ -46,6 +45,8 @@ IN: compiler.tree.propagation.simd
|
||||||
|
|
||||||
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
\ assert-positive [
|
\ assert-positive [
|
||||||
real [0,inf] <class/interval-info> value-info-intersect
|
real [0,inf] <class/interval-info> value-info-intersect
|
||||||
] "outputs" set-word-prop
|
] "outputs" set-word-prop
|
||||||
|
|
|
@ -212,11 +212,9 @@ HOOK: %box-vector cpu ( dst src temp rep -- )
|
||||||
HOOK: %unbox-vector cpu ( dst src rep -- )
|
HOOK: %unbox-vector cpu ( dst src rep -- )
|
||||||
|
|
||||||
HOOK: %zero-vector cpu ( dst rep -- )
|
HOOK: %zero-vector cpu ( dst rep -- )
|
||||||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||||
HOOK: %select-vector cpu ( dst src n rep -- )
|
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -243,13 +241,13 @@ HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
|
||||||
HOOK: %integer>scalar cpu ( dst src rep -- )
|
HOOK: %integer>scalar cpu ( dst src rep -- )
|
||||||
HOOK: %scalar>integer cpu ( dst src rep -- )
|
HOOK: %scalar>integer cpu ( dst src rep -- )
|
||||||
|
HOOK: %vector>scalar cpu ( dst src rep -- )
|
||||||
|
HOOK: %scalar>vector cpu ( dst src rep -- )
|
||||||
|
|
||||||
HOOK: %zero-vector-reps cpu ( -- reps )
|
HOOK: %zero-vector-reps cpu ( -- reps )
|
||||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
|
||||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||||
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||||
HOOK: %select-vector-reps cpu ( -- reps )
|
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -184,6 +184,7 @@ M: ppc %shr-imm swapd SRWI ;
|
||||||
M: ppc %sar SRAW ;
|
M: ppc %sar SRAW ;
|
||||||
M: ppc %sar-imm SRAWI ;
|
M: ppc %sar-imm SRAWI ;
|
||||||
M: ppc %not NOT ;
|
M: ppc %not NOT ;
|
||||||
|
M: ppc %neg NEG ;
|
||||||
|
|
||||||
:: overflow-template ( label dst src1 src2 insn -- )
|
:: overflow-template ( label dst src1 src2 insn -- )
|
||||||
0 0 LI
|
0 0 LI
|
||||||
|
@ -262,9 +263,10 @@ M: ppc %single>double-float double-rep %copy ;
|
||||||
M: ppc %double>single-float double-rep %copy ;
|
M: ppc %double>single-float double-rep %copy ;
|
||||||
|
|
||||||
! VMX/AltiVec not supported yet
|
! VMX/AltiVec not supported yet
|
||||||
M: ppc %broadcast-vector-reps { } ;
|
M: ppc %zero-vector-reps { } ;
|
||||||
M: ppc %gather-vector-2-reps { } ;
|
M: ppc %gather-vector-2-reps { } ;
|
||||||
M: ppc %gather-vector-4-reps { } ;
|
M: ppc %gather-vector-4-reps { } ;
|
||||||
|
M: ppc %shuffle-vector-reps { } ;
|
||||||
M: ppc %add-vector-reps { } ;
|
M: ppc %add-vector-reps { } ;
|
||||||
M: ppc %saturated-add-vector-reps { } ;
|
M: ppc %saturated-add-vector-reps { } ;
|
||||||
M: ppc %add-sub-vector-reps { } ;
|
M: ppc %add-sub-vector-reps { } ;
|
||||||
|
@ -275,14 +277,19 @@ M: ppc %saturated-mul-vector-reps { } ;
|
||||||
M: ppc %div-vector-reps { } ;
|
M: ppc %div-vector-reps { } ;
|
||||||
M: ppc %min-vector-reps { } ;
|
M: ppc %min-vector-reps { } ;
|
||||||
M: ppc %max-vector-reps { } ;
|
M: ppc %max-vector-reps { } ;
|
||||||
|
M: ppc %dot-vector-reps { } ;
|
||||||
M: ppc %sqrt-vector-reps { } ;
|
M: ppc %sqrt-vector-reps { } ;
|
||||||
M: ppc %horizontal-add-vector-reps { } ;
|
M: ppc %horizontal-add-vector-reps { } ;
|
||||||
|
M: ppc %horizontal-sub-vector-reps { } ;
|
||||||
M: ppc %abs-vector-reps { } ;
|
M: ppc %abs-vector-reps { } ;
|
||||||
M: ppc %and-vector-reps { } ;
|
M: ppc %and-vector-reps { } ;
|
||||||
|
M: ppc %andn-vector-reps { } ;
|
||||||
M: ppc %or-vector-reps { } ;
|
M: ppc %or-vector-reps { } ;
|
||||||
M: ppc %xor-vector-reps { } ;
|
M: ppc %xor-vector-reps { } ;
|
||||||
M: ppc %shl-vector-reps { } ;
|
M: ppc %shl-vector-reps { } ;
|
||||||
M: ppc %shr-vector-reps { } ;
|
M: ppc %shr-vector-reps { } ;
|
||||||
|
M: ppc %horizontal-shl-vector-reps { } ;
|
||||||
|
M: ppc %horizontal-shr-vector-reps { } ;
|
||||||
|
|
||||||
M: ppc %unbox-alien ( dst src -- )
|
M: ppc %unbox-alien ( dst src -- )
|
||||||
alien-offset LWZ ;
|
alien-offset LWZ ;
|
||||||
|
|
|
@ -600,42 +600,42 @@ M: x86 %zero-vector-reps
|
||||||
{ uchar-16-rep char-16-rep }
|
{ uchar-16-rep char-16-rep }
|
||||||
} ?at drop ;
|
} ?at drop ;
|
||||||
|
|
||||||
M:: x86 %broadcast-vector ( dst src rep -- )
|
! M:: x86 %broadcast-vector ( dst src rep -- )
|
||||||
rep unsign-rep {
|
! rep unsign-rep {
|
||||||
{ float-4-rep [
|
! { float-4-rep [
|
||||||
dst src float-4-rep %copy
|
! dst src float-4-rep %copy
|
||||||
dst dst { 0 0 0 0 } SHUFPS
|
! dst dst { 0 0 0 0 } SHUFPS
|
||||||
] }
|
! ] }
|
||||||
{ double-2-rep [
|
! { double-2-rep [
|
||||||
dst src MOVDDUP
|
! dst src MOVDDUP
|
||||||
] }
|
! ] }
|
||||||
{ longlong-2-rep [
|
! { longlong-2-rep [
|
||||||
dst src =
|
! dst src =
|
||||||
[ dst dst PUNPCKLQDQ ]
|
! [ dst dst PUNPCKLQDQ ]
|
||||||
[ dst src { 0 1 0 1 } PSHUFD ]
|
! [ dst src { 0 1 0 1 } PSHUFD ]
|
||||||
if
|
! if
|
||||||
] }
|
! ] }
|
||||||
{ int-4-rep [
|
! { int-4-rep [
|
||||||
dst src { 0 0 0 0 } PSHUFD
|
! dst src { 0 0 0 0 } PSHUFD
|
||||||
] }
|
! ] }
|
||||||
{ short-8-rep [
|
! { short-8-rep [
|
||||||
dst src { 0 0 0 0 } PSHUFLW
|
! dst src { 0 0 0 0 } PSHUFLW
|
||||||
dst dst PUNPCKLQDQ
|
! dst dst PUNPCKLQDQ
|
||||||
] }
|
! ] }
|
||||||
{ char-16-rep [
|
! { char-16-rep [
|
||||||
dst src char-16-rep %copy
|
! dst src char-16-rep %copy
|
||||||
dst dst PUNPCKLBW
|
! dst dst PUNPCKLBW
|
||||||
dst dst { 0 0 0 0 } PSHUFLW
|
! dst dst { 0 0 0 0 } PSHUFLW
|
||||||
dst dst PUNPCKLQDQ
|
! dst dst PUNPCKLQDQ
|
||||||
] }
|
! ] }
|
||||||
} case ;
|
! } case ;
|
||||||
|
!
|
||||||
M: x86 %broadcast-vector-reps
|
! M: x86 %broadcast-vector-reps
|
||||||
{
|
! {
|
||||||
! Can't do this with sse1 since it will want to unbox
|
! ! Can't do this with sse1 since it will want to unbox
|
||||||
! a double-precision float and convert to single precision
|
! ! a double-precision float and convert to single precision
|
||||||
{ sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
! { sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
||||||
} available-reps ;
|
! } available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||||
rep unsign-rep {
|
rep unsign-rep {
|
||||||
|
@ -721,11 +721,10 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||||
|
|
||||||
M: x86 %shuffle-vector-reps
|
M: x86 %shuffle-vector-reps
|
||||||
{
|
{
|
||||||
{ sse2? { double-2-rep float-4-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %select-vector-reps { } ;
|
|
||||||
|
|
||||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
[ two-operand ] keep
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
|
@ -1044,8 +1043,9 @@ M: x86 %shr-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %integer>scalar drop MOVD ;
|
M: x86 %integer>scalar drop MOVD ;
|
||||||
|
|
||||||
M: x86 %scalar>integer drop MOVD ;
|
M: x86 %scalar>integer drop MOVD ;
|
||||||
|
M: x86 %vector>scalar %copy ;
|
||||||
|
M: x86 %scalar>vector %copy ;
|
||||||
|
|
||||||
M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
|
M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
|
||||||
M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
|
M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
|
||||||
|
|
|
@ -5,7 +5,7 @@ functors generalizations kernel literals locals math math.functions
|
||||||
math.vectors math.vectors.private math.vectors.simd.intrinsics
|
math.vectors math.vectors.private math.vectors.simd.intrinsics
|
||||||
math.vectors.specialization parser prettyprint.custom sequences
|
math.vectors.specialization parser prettyprint.custom sequences
|
||||||
sequences.private strings words definitions macros cpu.architecture
|
sequences.private strings words definitions macros cpu.architecture
|
||||||
namespaces arrays quotations ;
|
namespaces arrays quotations combinators sets ;
|
||||||
QUALIFIED-WITH: math m
|
QUALIFIED-WITH: math m
|
||||||
IN: math.vectors.simd.functor
|
IN: math.vectors.simd.functor
|
||||||
|
|
||||||
|
@ -28,11 +28,23 @@ MACRO: simd-boa ( rep class -- simd-array )
|
||||||
:: define-with-custom-inlining ( word rep class -- )
|
:: define-with-custom-inlining ( word rep class -- )
|
||||||
word [
|
word [
|
||||||
drop
|
drop
|
||||||
rep \ (simd-broadcast) supported-simd-op? [
|
rep \ (simd-vshuffle) supported-simd-op? [
|
||||||
[ rep rep-coerce rep (simd-broadcast) class boa ]
|
[ rep rep-coerce rep (simd-broadcast) class boa ]
|
||||||
] [ word def>> ] if
|
] [ word def>> ] if
|
||||||
] "custom-inlining" set-word-prop ;
|
] "custom-inlining" set-word-prop ;
|
||||||
|
|
||||||
|
: simd-nth-fast ( rep -- quot )
|
||||||
|
[ rep-components ] keep
|
||||||
|
'[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
|
||||||
|
'[ swap >fixnum _ case ] ;
|
||||||
|
|
||||||
|
: simd-nth-slow ( rep -- quot )
|
||||||
|
rep-component-type dup c-type-getter-boxer array-accessor ;
|
||||||
|
|
||||||
|
MACRO: simd-nth ( rep -- x )
|
||||||
|
dup \ (simd-vshuffle) supported-simd-op?
|
||||||
|
[ simd-nth-fast ] [ simd-nth-slow ] if ;
|
||||||
|
|
||||||
: boa-effect ( rep n -- effect )
|
: boa-effect ( rep n -- effect )
|
||||||
[ rep-components ] dip *
|
[ rep-components ] dip *
|
||||||
[ CHAR: a + 1string ] map
|
[ CHAR: a + 1string ] map
|
||||||
|
@ -45,8 +57,8 @@ MACRO: simd-boa ( rep class -- simd-array )
|
||||||
|
|
||||||
ERROR: bad-schema schema ;
|
ERROR: bad-schema schema ;
|
||||||
|
|
||||||
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
|
: low-level-ops ( simd-ops alist -- alist' )
|
||||||
[ simd-ops get ] dip '[
|
'[
|
||||||
1quotation
|
1quotation
|
||||||
over word-schema _ ?at [ bad-schema ] unless
|
over word-schema _ ?at [ bad-schema ] unless
|
||||||
[ ] 2sequence
|
[ ] 2sequence
|
||||||
|
@ -73,21 +85,17 @@ ERROR: bad-schema schema ;
|
||||||
! in the general case.
|
! in the general case.
|
||||||
elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
|
elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
|
||||||
|
|
||||||
:: simd-vector-words ( class ctor rep vv->v vn->v vv->n v->v v->n -- )
|
TUPLE: simd class elt-class ops wrappers ctor rep ;
|
||||||
rep rep-component-type c-type-boxed-class :> elt-class
|
|
||||||
class
|
: define-simd ( simd -- )
|
||||||
elt-class
|
dup rep>> rep-component-type c-type-boxed-class >>elt-class
|
||||||
{
|
{
|
||||||
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
[ class>> ]
|
||||||
{ { +vector+ +scalar+ -> +vector+ } vn->v }
|
[ elt-class>> ]
|
||||||
{ { +vector+ +literal+ -> +vector+ } vn->v }
|
[ [ ops>> ] [ wrappers>> ] bi low-level-ops ]
|
||||||
{ { +vector+ +vector+ -> +scalar+ } vv->n }
|
[ rep>> supported-simd-ops ]
|
||||||
{ { +vector+ -> +vector+ } v->v }
|
[ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
|
||||||
{ { +vector+ -> +scalar+ } v->n }
|
} cleave
|
||||||
{ { +vector+ -> +nonnegative+ } v->n }
|
|
||||||
} low-level-ops
|
|
||||||
rep supported-simd-ops
|
|
||||||
ctor elt-class high-level-ops assoc-union
|
|
||||||
specialize-vector-words ;
|
specialize-vector-words ;
|
||||||
|
|
||||||
:: define-simd-128-type ( class rep -- )
|
:: define-simd-128-type ( class rep -- )
|
||||||
|
@ -101,6 +109,11 @@ ERROR: bad-schema schema ;
|
||||||
rep >>rep
|
rep >>rep
|
||||||
class typedef ;
|
class typedef ;
|
||||||
|
|
||||||
|
: (define-simd-128) ( simd -- )
|
||||||
|
simd-ops get >>ops
|
||||||
|
[ define-simd ]
|
||||||
|
[ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
|
||||||
|
|
||||||
FUNCTOR: define-simd-128 ( T -- )
|
FUNCTOR: define-simd-128 ( T -- )
|
||||||
|
|
||||||
N [ 16 T heap-size /i ]
|
N [ 16 T heap-size /i ]
|
||||||
|
@ -112,7 +125,6 @@ A-cast DEFINES ${A}-cast
|
||||||
>A DEFINES >${A}
|
>A DEFINES >${A}
|
||||||
A{ DEFINES ${A}{
|
A{ DEFINES ${A}{
|
||||||
|
|
||||||
NTH [ T dup c-type-getter-boxer array-accessor ]
|
|
||||||
SET-NTH [ T dup c-setter array-accessor ]
|
SET-NTH [ T dup c-setter array-accessor ]
|
||||||
|
|
||||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
|
@ -131,7 +143,7 @@ M: A clone underlying>> clone \ A boa ; inline
|
||||||
|
|
||||||
M: A length drop N ; inline
|
M: A length drop N ; inline
|
||||||
|
|
||||||
M: A nth-unsafe underlying>> NTH call ; inline
|
M: A nth-unsafe underlying>> A-rep simd-nth ; inline
|
||||||
|
|
||||||
M: A set-nth-unsafe underlying>> SET-NTH call ; inline
|
M: A set-nth-unsafe underlying>> SET-NTH call ; inline
|
||||||
|
|
||||||
|
@ -193,8 +205,20 @@ INSTANCE: A sequence
|
||||||
: A-v->n-op ( v quot -- n )
|
: A-v->n-op ( v quot -- n )
|
||||||
[ underlying>> A-rep ] dip call ; inline
|
[ underlying>> A-rep ] dip call ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
simd new
|
||||||
\ A \ A-rep define-simd-128-type
|
\ A >>class
|
||||||
|
\ A-with >>ctor
|
||||||
|
\ A-rep >>rep
|
||||||
|
{
|
||||||
|
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
|
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||||
|
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||||
|
{ { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
|
||||||
|
{ { +vector+ -> +vector+ } A-v->v-op }
|
||||||
|
{ { +vector+ -> +scalar+ } A-v->n-op }
|
||||||
|
{ { +vector+ -> +nonnegative+ } A-v->n-op }
|
||||||
|
} >>wrappers
|
||||||
|
(define-simd-128)
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
@ -223,6 +247,11 @@ SLOT: underlying2
|
||||||
rep >>rep
|
rep >>rep
|
||||||
class typedef ;
|
class typedef ;
|
||||||
|
|
||||||
|
: (define-simd-256) ( simd -- )
|
||||||
|
simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops
|
||||||
|
[ define-simd ]
|
||||||
|
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
|
||||||
|
|
||||||
FUNCTOR: define-simd-256 ( T -- )
|
FUNCTOR: define-simd-256 ( T -- )
|
||||||
|
|
||||||
N [ 32 T heap-size /i ]
|
N [ 32 T heap-size /i ]
|
||||||
|
@ -332,7 +361,19 @@ INSTANCE: A sequence
|
||||||
: A-v->n-op ( v1 combine-quot -- v2 )
|
: A-v->n-op ( v1 combine-quot -- v2 )
|
||||||
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
simd new
|
||||||
\ A \ A-rep define-simd-256-type
|
\ A >>class
|
||||||
|
\ A-with >>ctor
|
||||||
|
\ A-rep >>rep
|
||||||
|
{
|
||||||
|
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||||
|
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||||
|
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||||
|
{ { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
|
||||||
|
{ { +vector+ -> +vector+ } A-v->v-op }
|
||||||
|
{ { +vector+ -> +scalar+ } A-v->n-op }
|
||||||
|
{ { +vector+ -> +nonnegative+ } A-v->n-op }
|
||||||
|
} >>wrappers
|
||||||
|
(define-simd-256)
|
||||||
|
|
||||||
;FUNCTOR
|
;FUNCTOR
|
||||||
|
|
|
@ -126,8 +126,6 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||||
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
|
||||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
{ \ (simd-select) [ %select-vector-reps ] }
|
|
||||||
} case member? ;
|
} case member? ;
|
||||||
|
|
|
@ -21,13 +21,13 @@ ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operat
|
||||||
$nl
|
$nl
|
||||||
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
|
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
|
||||||
$nl
|
$nl
|
||||||
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
|
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
|
||||||
$nl
|
$nl
|
||||||
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
|
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
|
||||||
$nl
|
$nl
|
||||||
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
|
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
|
||||||
$nl
|
$nl
|
||||||
"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types."
|
"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types, a faster instruction for " { $link v. } ", and a few other things."
|
||||||
$nl
|
$nl
|
||||||
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
|
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
|
||||||
$nl
|
$nl
|
||||||
|
@ -183,7 +183,7 @@ $nl
|
||||||
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
|
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
|
||||||
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
|
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
|
||||||
$nl
|
$nl
|
||||||
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
|
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
|
||||||
|
|
||||||
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
||||||
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
|
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
|
||||||
|
|
|
@ -5,7 +5,8 @@ math.vectors.simd.private prettyprint random sequences system
|
||||||
tools.test vocabs assocs compiler.cfg.debugger words
|
tools.test vocabs assocs compiler.cfg.debugger words
|
||||||
locals math.vectors.specialization combinators cpu.architecture
|
locals math.vectors.specialization combinators cpu.architecture
|
||||||
math.vectors.simd.intrinsics namespaces byte-arrays alien
|
math.vectors.simd.intrinsics namespaces byte-arrays alien
|
||||||
specialized-arrays classes.struct eval ;
|
specialized-arrays classes.struct eval classes.algebra sets
|
||||||
|
quotations ;
|
||||||
QUALIFIED-WITH: alien.c-types c
|
QUALIFIED-WITH: alien.c-types c
|
||||||
SPECIALIZED-ARRAY: c:float
|
SPECIALIZED-ARRAY: c:float
|
||||||
SIMD: c:char
|
SIMD: c:char
|
||||||
|
@ -34,6 +35,20 @@ IN: math.vectors.simd.tests
|
||||||
|
|
||||||
[ V{ float-4 } ] [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test
|
[ V{ float-4 } ] [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ V{ float } ] [ [ { float-4 } declare second ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ V{ int-4 } ] [ [ { int-4 int-4 } declare v+ ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ [ { int-4 } declare second ] final-classes first integer class<= ] unit-test
|
||||||
|
|
||||||
|
[ V{ longlong-2 } ] [ [ { longlong-2 longlong-2 } declare v+ ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test
|
||||||
|
|
||||||
! Test puns; only on x86
|
! Test puns; only on x86
|
||||||
cpu x86? [
|
cpu x86? [
|
||||||
[ double-2{ 4 1024 } ] [
|
[ double-2{ 4 1024 } ] [
|
||||||
|
@ -78,9 +93,10 @@ CONSTANT: simd-classes
|
||||||
: boa-ctors ( -- seq )
|
: boa-ctors ( -- seq )
|
||||||
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
|
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
|
||||||
|
|
||||||
: check-optimizer ( seq inputs quot eq-quot -- )
|
: check-optimizer ( seq quot eq-quot -- failures )
|
||||||
'[
|
'[
|
||||||
@
|
@
|
||||||
|
[ dup [ class ] { } map-as ] dip '[ _ declare @ ]
|
||||||
{
|
{
|
||||||
[ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
|
[ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
|
||||||
[ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
|
[ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
|
||||||
|
@ -104,7 +120,7 @@ CONSTANT: simd-classes
|
||||||
|
|
||||||
[ { } ] [
|
[ { } ] [
|
||||||
with-ctors [
|
with-ctors [
|
||||||
[ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
|
[ 1000 random '[ _ ] ] dip '[ _ execute ]
|
||||||
] [ = ] check-optimizer
|
] [ = ] check-optimizer
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
@ -112,10 +128,8 @@ CONSTANT: simd-classes
|
||||||
|
|
||||||
[ { } ] [
|
[ { } ] [
|
||||||
boa-ctors [
|
boa-ctors [
|
||||||
dup stack-effect in>> length
|
[ stack-effect in>> length [ 1000 random ] [ ] replicate-as ] keep
|
||||||
[ nip [ 1000 random ] [ ] replicate-as ]
|
'[ _ execute ]
|
||||||
[ fixnum <array> swap '[ _ declare _ execute ] ]
|
|
||||||
2bi
|
|
||||||
] [ = ] check-optimizer
|
] [ = ] check-optimizer
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
@ -126,31 +140,22 @@ CONSTANT: simd-classes
|
||||||
|
|
||||||
:: check-vector-op ( word inputs class elt-class -- inputs quot )
|
:: check-vector-op ( word inputs class elt-class -- inputs quot )
|
||||||
inputs [
|
inputs [
|
||||||
[
|
{
|
||||||
{
|
{ +vector+ [ class random-vector ] }
|
||||||
{ +vector+ [ class random-vector ] }
|
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
|
||||||
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
|
} case
|
||||||
} case
|
] [ ] map-as
|
||||||
] [ ] map-as
|
word '[ _ execute ] ;
|
||||||
] [
|
|
||||||
[
|
|
||||||
{
|
|
||||||
{ +vector+ [ class ] }
|
|
||||||
{ +scalar+ [ elt-class ] }
|
|
||||||
} case
|
|
||||||
] map
|
|
||||||
] bi
|
|
||||||
word '[ _ declare _ execute ] ;
|
|
||||||
|
|
||||||
: remove-float-words ( alist -- alist' )
|
: remove-float-words ( alist -- alist' )
|
||||||
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
|
{ vsqrt n/v v/n v/ normalize } unique assoc-diff ;
|
||||||
|
|
||||||
: remove-integer-words ( alist -- alist' )
|
: remove-integer-words ( alist -- alist' )
|
||||||
[ drop { vlshift vrshift } member? not ] assoc-filter ;
|
{ vlshift vrshift } unique assoc-diff ;
|
||||||
|
|
||||||
: remove-special-words ( alist -- alist' )
|
: remove-special-words ( alist -- alist' )
|
||||||
! These have their own tests later
|
! These have their own tests later
|
||||||
[ drop { hlshift hrshift vshuffle } member? not ] assoc-filter ;
|
{ hlshift hrshift vshuffle } unique assoc-diff ;
|
||||||
|
|
||||||
: ops-to-check ( elt-class -- alist )
|
: ops-to-check ( elt-class -- alist )
|
||||||
[ vector-words >alist ] dip
|
[ vector-words >alist ] dip
|
||||||
|
@ -189,13 +194,89 @@ simd-classes&reps [
|
||||||
[ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
|
[ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
|
||||||
] each
|
] each
|
||||||
|
|
||||||
! Other regressions
|
"== Checking shifts and permutations" print
|
||||||
[ 8000000 ] [
|
|
||||||
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
|
[ int-4{ 256 512 1024 2048 } ]
|
||||||
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
|
[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
|
||||||
] unit-test
|
|
||||||
|
[ int-4{ 256 512 1024 2048 } ]
|
||||||
|
[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
|
||||||
|
|
||||||
|
[ int-4{ 1 2 4 8 } ]
|
||||||
|
[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
|
||||||
|
|
||||||
|
[ int-4{ 1 2 4 8 } ]
|
||||||
|
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
|
||||||
|
|
||||||
|
! Shuffles
|
||||||
|
: shuffles-for ( n -- shuffles )
|
||||||
|
{
|
||||||
|
{ 2 [
|
||||||
|
{
|
||||||
|
{ 0 1 }
|
||||||
|
{ 1 1 }
|
||||||
|
{ 1 0 }
|
||||||
|
{ 0 0 }
|
||||||
|
}
|
||||||
|
] }
|
||||||
|
{ 4 [
|
||||||
|
{
|
||||||
|
{ 1 2 3 0 }
|
||||||
|
{ 0 1 2 3 }
|
||||||
|
{ 1 1 2 2 }
|
||||||
|
{ 0 0 1 1 }
|
||||||
|
{ 2 2 3 3 }
|
||||||
|
{ 0 1 0 1 }
|
||||||
|
{ 2 3 2 3 }
|
||||||
|
{ 0 0 2 2 }
|
||||||
|
{ 1 1 3 3 }
|
||||||
|
{ 0 1 0 1 }
|
||||||
|
{ 2 2 3 3 }
|
||||||
|
}
|
||||||
|
] }
|
||||||
|
{ 8 [
|
||||||
|
4 shuffles-for
|
||||||
|
4 shuffles-for
|
||||||
|
[ [ 4 + ] map ] map
|
||||||
|
[ append ] 2map
|
||||||
|
] }
|
||||||
|
[ dup '[ _ random ] replicate 1array ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
simd-classes [
|
||||||
|
[ [ { } ] ] dip
|
||||||
|
[ new length shuffles-for ] keep
|
||||||
|
'[
|
||||||
|
_ [ [ _ new [ length iota ] keep like 1quotation ] dip '[ _ vshuffle ] ]
|
||||||
|
[ = ] check-optimizer
|
||||||
|
] unit-test
|
||||||
|
] each
|
||||||
|
|
||||||
|
"== Checking element access" print
|
||||||
|
|
||||||
|
! Test element access -- it should box bignums for int-4 on x86
|
||||||
|
: test-accesses ( seq -- failures )
|
||||||
|
[ length >array ] keep
|
||||||
|
'[ [ _ 1quotation ] dip '[ _ swap nth ] ] [ = ] check-optimizer ; inline
|
||||||
|
|
||||||
|
[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ int-4{ HEX: 7fffffff 3 4 -8 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ uint-4{ HEX: ffffffff 2 3 4 } test-accesses ] unit-test
|
||||||
|
|
||||||
|
[ { } ] [ double-2{ 1.0 2.0 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test
|
||||||
|
|
||||||
|
[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
|
||||||
|
|
||||||
|
[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test
|
||||||
|
[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test
|
||||||
|
|
||||||
|
"== Checking alien operations" print
|
||||||
|
|
||||||
! Vector alien intrinsics
|
|
||||||
[ float-4{ 1 2 3 4 } ] [
|
[ float-4{ 1 2 3 4 } ] [
|
||||||
[
|
[
|
||||||
float-4{ 1 2 3 4 }
|
float-4{ 1 2 3 4 }
|
||||||
|
@ -259,60 +340,12 @@ STRUCT: simd-struct
|
||||||
] compile-call
|
] compile-call
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
"== Misc tests" print
|
||||||
|
|
||||||
[ ] [ char-16 new 1array stack. ] unit-test
|
[ ] [ char-16 new 1array stack. ] unit-test
|
||||||
|
|
||||||
[ int-4{ 256 512 1024 2048 } ]
|
! Other regressions
|
||||||
[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
|
[ 8000000 ] [
|
||||||
|
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
|
||||||
[ int-4{ 256 512 1024 2048 } ]
|
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
|
||||||
[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
|
] unit-test
|
||||||
|
|
||||||
[ int-4{ 1 2 4 8 } ]
|
|
||||||
[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
|
|
||||||
|
|
||||||
[ int-4{ 1 2 4 8 } ]
|
|
||||||
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
|
|
||||||
|
|
||||||
! Shuffles
|
|
||||||
: test-shuffle ( input shuffle -- failures )
|
|
||||||
[ dup class 1array ] dip
|
|
||||||
'[ _ declare _ vshuffle ]
|
|
||||||
[ call ] [ compile-call ] 2bi = not ; inline
|
|
||||||
|
|
||||||
: shuffles-for ( seq -- shuffles )
|
|
||||||
length {
|
|
||||||
{ 2 [
|
|
||||||
{
|
|
||||||
{ 0 1 }
|
|
||||||
{ 1 1 }
|
|
||||||
{ 1 0 }
|
|
||||||
{ 0 0 }
|
|
||||||
}
|
|
||||||
] }
|
|
||||||
{ 4 [
|
|
||||||
{
|
|
||||||
{ 1 2 3 0 }
|
|
||||||
{ 0 1 2 3 }
|
|
||||||
{ 1 1 2 2 }
|
|
||||||
{ 0 0 1 1 }
|
|
||||||
{ 2 2 3 3 }
|
|
||||||
{ 0 1 0 1 }
|
|
||||||
{ 2 3 2 3 }
|
|
||||||
{ 0 0 2 2 }
|
|
||||||
{ 1 1 3 3 }
|
|
||||||
{ 0 1 0 1 }
|
|
||||||
{ 2 2 3 3 }
|
|
||||||
}
|
|
||||||
] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: test-shuffles ( input -- failures )
|
|
||||||
dup shuffles-for [ test-shuffle ] with filter ; inline
|
|
||||||
|
|
||||||
[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-shuffles ] unit-test
|
|
||||||
[ { } ] [ int-4{ 1 2 3 4 } test-shuffles ] unit-test
|
|
||||||
[ { } ] [ uint-4{ 1 2 3 4 } test-shuffles ] unit-test
|
|
||||||
|
|
||||||
[ { } ] [ double-2{ 1.0 2.0 } test-shuffles ] unit-test
|
|
||||||
[ { } ] [ longlong-2{ 1 2 } test-shuffles ] unit-test
|
|
||||||
[ { } ] [ ulonglong-2{ 1 2 } test-shuffles ] unit-test
|
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: alien.c-types combinators fry kernel parser math math.parser
|
USING: alien.c-types combinators fry kernel parser math math.parser
|
||||||
math.vectors.simd.functor sequences splitting vocabs.generated
|
math.vectors.simd.functor sequences splitting vocabs.generated
|
||||||
vocabs.loader vocabs.parser words accessors ;
|
vocabs.loader vocabs.parser words accessors vocabs compiler.units
|
||||||
|
definitions ;
|
||||||
QUALIFIED-WITH: alien.c-types c
|
QUALIFIED-WITH: alien.c-types c
|
||||||
IN: math.vectors.simd
|
IN: math.vectors.simd
|
||||||
|
|
||||||
|
@ -17,6 +18,12 @@ ERROR: bad-base-type type ;
|
||||||
dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } memq?
|
dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } memq?
|
||||||
[ bad-base-type ] unless ;
|
[ bad-base-type ] unless ;
|
||||||
|
|
||||||
|
: forget-instances ( -- )
|
||||||
|
[
|
||||||
|
"math.vectors.simd.instances" child-vocabs
|
||||||
|
[ forget-vocab ] each
|
||||||
|
] with-compilation-unit ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: define-simd-vocab ( type -- vocab )
|
: define-simd-vocab ( type -- vocab )
|
||||||
|
@ -29,3 +36,4 @@ PRIVATE>
|
||||||
|
|
||||||
SYNTAX: SIMD:
|
SYNTAX: SIMD:
|
||||||
scan-word define-simd-vocab use-vocab ;
|
scan-word define-simd-vocab use-vocab ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue