Merge branch 'more-simd'
commit
6382aaabd5
|
@ -51,9 +51,6 @@ insn-classes get [
|
||||||
: ^^unbox-c-ptr ( src class -- dst )
|
: ^^unbox-c-ptr ( src class -- dst )
|
||||||
[ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ;
|
[ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ;
|
||||||
|
|
||||||
: ^^neg ( src -- dst )
|
|
||||||
[ 0 ^^load-literal ] dip ^^sub ;
|
|
||||||
|
|
||||||
: ^^allot-tuple ( n -- dst )
|
: ^^allot-tuple ( n -- dst )
|
||||||
2 + cells tuple ^^allot ;
|
2 + cells tuple ^^allot ;
|
||||||
|
|
||||||
|
|
|
@ -186,6 +186,10 @@ PURE-INSN: ##not
|
||||||
def: dst/int-rep
|
def: dst/int-rep
|
||||||
use: src/int-rep ;
|
use: src/int-rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##neg
|
||||||
|
def: dst/int-rep
|
||||||
|
use: src/int-rep ;
|
||||||
|
|
||||||
PURE-INSN: ##log2
|
PURE-INSN: ##log2
|
||||||
def: dst/int-rep
|
def: dst/int-rep
|
||||||
use: src/int-rep ;
|
use: src/int-rep ;
|
||||||
|
@ -270,6 +274,10 @@ def: dst
|
||||||
use: src/int-rep
|
use: src/int-rep
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##zero-vector
|
||||||
|
def: dst
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##broadcast-vector
|
PURE-INSN: ##broadcast-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src/scalar-rep
|
use: src/scalar-rep
|
||||||
|
@ -285,6 +293,16 @@ def: dst
|
||||||
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
|
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##shuffle-vector
|
||||||
|
def: dst
|
||||||
|
use: src
|
||||||
|
literal: shuffle rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##select-vector
|
||||||
|
def: dst
|
||||||
|
use: src
|
||||||
|
literal: n rep ;
|
||||||
|
|
||||||
PURE-INSN: ##add-vector
|
PURE-INSN: ##add-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
|
@ -335,6 +353,11 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##dot-vector
|
||||||
|
def: dst/scalar-rep
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##horizontal-add-vector
|
PURE-INSN: ##horizontal-add-vector
|
||||||
def: dst/scalar-rep
|
def: dst/scalar-rep
|
||||||
use: src
|
use: src
|
||||||
|
|
|
@ -164,6 +164,7 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||||
|
@ -177,6 +178,8 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||||
|
|
|
@ -1,32 +1,51 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||||
sequences compiler.tree.propagation.info
|
sequences macros generalizations combinators
|
||||||
|
combinators.short-circuit arrays compiler.tree.propagation.info
|
||||||
compiler.cfg.builder.blocks compiler.cfg.stacks
|
compiler.cfg.builder.blocks compiler.cfg.stacks
|
||||||
compiler.cfg.stacks.local compiler.cfg.hats
|
compiler.cfg.stacks.local compiler.cfg.hats
|
||||||
compiler.cfg.instructions compiler.cfg.registers
|
compiler.cfg.instructions compiler.cfg.registers
|
||||||
compiler.cfg.intrinsics.alien ;
|
compiler.cfg.intrinsics.alien ;
|
||||||
IN: compiler.cfg.intrinsics.simd
|
IN: compiler.cfg.intrinsics.simd
|
||||||
|
|
||||||
: emit-vector-op ( node quot: ( rep -- ) -- )
|
MACRO: check-elements ( quots -- )
|
||||||
[ dup node-input-infos last literal>> dup representation? ] dip
|
[ length '[ _ firstn ] ]
|
||||||
'[ nip @ ] [ drop emit-primitive ] if ; inline
|
[ '[ _ spread ] ]
|
||||||
|
[ length 1 - \ and <repetition> [ ] like ]
|
||||||
|
tri 3append ;
|
||||||
|
|
||||||
: emit-binary-vector-op ( node quot -- )
|
MACRO: if-literals-match ( quots -- )
|
||||||
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
|
[ length ] [ ] [ length ] tri
|
||||||
|
! n quots n n
|
||||||
: emit-unary-vector-op ( node quot -- )
|
'[
|
||||||
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
|
! node quot
|
||||||
|
|
||||||
: emit-horizontal-shift ( node quot -- )
|
|
||||||
[
|
[
|
||||||
dup node-input-infos
|
dup node-input-infos
|
||||||
[ second literal>> ] [ third literal>> ] bi
|
_ tail-slice* [ literal>> ] map
|
||||||
2dup [ integer? ] [ representation? ] bi* and
|
dup _ check-elements
|
||||||
] dip
|
] dip
|
||||||
'[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ]
|
swap [
|
||||||
[ 2drop emit-primitive ]
|
! node literals quot
|
||||||
if ; inline
|
[ _ firstn ] dip call
|
||||||
|
drop
|
||||||
|
] [ 2drop emit-primitive ] if
|
||||||
|
] ;
|
||||||
|
|
||||||
|
: emit-vector-op ( node quot: ( rep -- ) -- )
|
||||||
|
{ [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
|
: emit-binary-vector-op ( node quot -- )
|
||||||
|
'[ [ ds-drop 2inputs ] dip @ ds-push ]
|
||||||
|
emit-vector-op ; inline
|
||||||
|
|
||||||
|
: emit-unary-vector-op ( node quot -- )
|
||||||
|
'[ [ ds-drop ds-pop ] dip @ ds-push ]
|
||||||
|
emit-vector-op ; inline
|
||||||
|
|
||||||
|
: emit-horizontal-shift ( node quot -- )
|
||||||
|
'[ [ -2 inc-d ds-pop ] 2dip @ ds-push ]
|
||||||
|
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
: emit-gather-vector-2 ( node -- )
|
: emit-gather-vector-2 ( node -- )
|
||||||
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
||||||
|
@ -45,6 +64,16 @@ IN: compiler.cfg.intrinsics.simd
|
||||||
ds-push
|
ds-push
|
||||||
] emit-vector-op ;
|
] emit-vector-op ;
|
||||||
|
|
||||||
|
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||||
|
|
||||||
|
: emit-shuffle-vector ( node -- )
|
||||||
|
[ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ]
|
||||||
|
{ [ shuffle? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
|
: emit-select-vector ( node -- )
|
||||||
|
[ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ]
|
||||||
|
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||||
|
|
||||||
: emit-alien-vector ( node -- )
|
: emit-alien-vector ( node -- )
|
||||||
dup [
|
dup [
|
||||||
'[
|
'[
|
||||||
|
|
|
@ -142,6 +142,7 @@ CODEGEN: ##sar-imm %sar-imm
|
||||||
CODEGEN: ##min %min
|
CODEGEN: ##min %min
|
||||||
CODEGEN: ##max %max
|
CODEGEN: ##max %max
|
||||||
CODEGEN: ##not %not
|
CODEGEN: ##not %not
|
||||||
|
CODEGEN: ##neg %neg
|
||||||
CODEGEN: ##log2 %log2
|
CODEGEN: ##log2 %log2
|
||||||
CODEGEN: ##copy %copy
|
CODEGEN: ##copy %copy
|
||||||
CODEGEN: ##unbox-float %unbox-float
|
CODEGEN: ##unbox-float %unbox-float
|
||||||
|
@ -160,9 +161,12 @@ CODEGEN: ##double>single-float %double>single-float
|
||||||
CODEGEN: ##integer>float %integer>float
|
CODEGEN: ##integer>float %integer>float
|
||||||
CODEGEN: ##float>integer %float>integer
|
CODEGEN: ##float>integer %float>integer
|
||||||
CODEGEN: ##unbox-vector %unbox-vector
|
CODEGEN: ##unbox-vector %unbox-vector
|
||||||
|
CODEGEN: ##zero-vector %zero-vector
|
||||||
CODEGEN: ##broadcast-vector %broadcast-vector
|
CODEGEN: ##broadcast-vector %broadcast-vector
|
||||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||||
|
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||||
|
CODEGEN: ##select-vector %select-vector
|
||||||
CODEGEN: ##box-vector %box-vector
|
CODEGEN: ##box-vector %box-vector
|
||||||
CODEGEN: ##add-vector %add-vector
|
CODEGEN: ##add-vector %add-vector
|
||||||
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||||
|
@ -174,6 +178,7 @@ CODEGEN: ##saturated-mul-vector %saturated-mul-vector
|
||||||
CODEGEN: ##div-vector %div-vector
|
CODEGEN: ##div-vector %div-vector
|
||||||
CODEGEN: ##min-vector %min-vector
|
CODEGEN: ##min-vector %min-vector
|
||||||
CODEGEN: ##max-vector %max-vector
|
CODEGEN: ##max-vector %max-vector
|
||||||
|
CODEGEN: ##dot-vector %dot-vector
|
||||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
||||||
|
|
|
@ -24,22 +24,27 @@ IN: compiler.tree.propagation.simd
|
||||||
(simd-vrshift)
|
(simd-vrshift)
|
||||||
(simd-hlshift)
|
(simd-hlshift)
|
||||||
(simd-hrshift)
|
(simd-hrshift)
|
||||||
|
(simd-vshuffle)
|
||||||
(simd-broadcast)
|
(simd-broadcast)
|
||||||
(simd-gather-2)
|
(simd-gather-2)
|
||||||
(simd-gather-4)
|
(simd-gather-4)
|
||||||
|
(simd-select)
|
||||||
alien-vector
|
alien-vector
|
||||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||||
|
|
||||||
\ (simd-sum) [
|
: scalar-output-class ( rep -- class )
|
||||||
nip dup literal?>> [
|
dup literal?>> [
|
||||||
literal>> scalar-rep-of {
|
literal>> scalar-rep-of {
|
||||||
{ float-rep [ float ] }
|
{ float-rep [ float ] }
|
||||||
{ double-rep [ float ] }
|
{ double-rep [ float ] }
|
||||||
[ integer ]
|
[ drop integer ]
|
||||||
} case
|
} case
|
||||||
] [ drop real ] if
|
] [ drop real ] if
|
||||||
<class-info>
|
<class-info> ;
|
||||||
] "outputs" set-word-prop
|
|
||||||
|
\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
\ assert-positive [
|
\ assert-positive [
|
||||||
real [0,inf] <class/interval-info> value-info-intersect
|
real [0,inf] <class/interval-info> value-info-intersect
|
||||||
|
|
|
@ -180,6 +180,7 @@ HOOK: %sar-imm cpu ( dst src1 src2 -- )
|
||||||
HOOK: %min cpu ( dst src1 src2 -- )
|
HOOK: %min cpu ( dst src1 src2 -- )
|
||||||
HOOK: %max cpu ( dst src1 src2 -- )
|
HOOK: %max cpu ( dst src1 src2 -- )
|
||||||
HOOK: %not cpu ( dst src -- )
|
HOOK: %not cpu ( dst src -- )
|
||||||
|
HOOK: %neg cpu ( dst src -- )
|
||||||
HOOK: %log2 cpu ( dst src -- )
|
HOOK: %log2 cpu ( dst src -- )
|
||||||
|
|
||||||
HOOK: %copy cpu ( dst src rep -- )
|
HOOK: %copy cpu ( dst src rep -- )
|
||||||
|
@ -210,9 +211,12 @@ HOOK: %float>integer cpu ( dst src -- )
|
||||||
HOOK: %box-vector cpu ( dst src temp rep -- )
|
HOOK: %box-vector cpu ( dst src temp rep -- )
|
||||||
HOOK: %unbox-vector cpu ( dst src rep -- )
|
HOOK: %unbox-vector cpu ( dst src rep -- )
|
||||||
|
|
||||||
|
HOOK: %zero-vector cpu ( dst rep -- )
|
||||||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
|
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||||
|
HOOK: %select-vector cpu ( dst src n rep -- )
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -223,6 +227,7 @@ HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||||
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
||||||
|
@ -239,9 +244,12 @@ HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %integer>scalar cpu ( dst src rep -- )
|
HOOK: %integer>scalar cpu ( dst src rep -- )
|
||||||
HOOK: %scalar>integer cpu ( dst src rep -- )
|
HOOK: %scalar>integer cpu ( dst src rep -- )
|
||||||
|
|
||||||
|
HOOK: %zero-vector-reps cpu ( -- reps )
|
||||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||||
|
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %select-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||||
|
@ -252,6 +260,7 @@ HOOK: %saturated-mul-vector-reps cpu ( -- reps )
|
||||||
HOOK: %div-vector-reps cpu ( -- reps )
|
HOOK: %div-vector-reps cpu ( -- reps )
|
||||||
HOOK: %min-vector-reps cpu ( -- reps )
|
HOOK: %min-vector-reps cpu ( -- reps )
|
||||||
HOOK: %max-vector-reps cpu ( -- reps )
|
HOOK: %max-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %dot-vector-reps cpu ( -- reps )
|
||||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
|
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -129,6 +129,7 @@ M: x86 %min int-rep two-operand [ CMP ] [ CMOVG ] 2bi ;
|
||||||
M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ;
|
M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ;
|
||||||
|
|
||||||
M: x86 %not int-rep one-operand NOT ;
|
M: x86 %not int-rep one-operand NOT ;
|
||||||
|
M: x86 %neg int-rep one-operand NEG ;
|
||||||
M: x86 %log2 BSR ;
|
M: x86 %log2 BSR ;
|
||||||
|
|
||||||
GENERIC: copy-register* ( dst src rep -- )
|
GENERIC: copy-register* ( dst src rep -- )
|
||||||
|
@ -578,6 +579,19 @@ MACRO: available-reps ( alist -- )
|
||||||
reverse [ { } ] suffix
|
reverse [ { } ] suffix
|
||||||
'[ _ cond ] ;
|
'[ _ cond ] ;
|
||||||
|
|
||||||
|
M: x86 %zero-vector
|
||||||
|
{
|
||||||
|
{ double-2-rep [ dup XORPD ] }
|
||||||
|
{ float-4-rep [ dup XORPS ] }
|
||||||
|
[ drop dup PXOR ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %zero-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
: unsign-rep ( rep -- rep' )
|
: unsign-rep ( rep -- rep' )
|
||||||
{
|
{
|
||||||
{ uint-4-rep int-4-rep }
|
{ uint-4-rep int-4-rep }
|
||||||
|
@ -663,6 +677,55 @@ M: x86 %gather-vector-2-reps
|
||||||
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
: double-2-shuffle ( dst shuffle -- )
|
||||||
|
{
|
||||||
|
{ { 0 1 } [ drop ] }
|
||||||
|
{ { 0 0 } [ dup UNPCKLPD ] }
|
||||||
|
{ { 1 1 } [ dup UNPCKHPD ] }
|
||||||
|
[ dupd SHUFPD ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: float-4-shuffle ( dst shuffle -- )
|
||||||
|
{
|
||||||
|
{ { 0 1 2 3 } [ drop ] }
|
||||||
|
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
|
||||||
|
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
|
||||||
|
{ { 0 1 0 1 } [ dup MOVLHPS ] }
|
||||||
|
{ { 2 3 2 3 } [ dup MOVHLPS ] }
|
||||||
|
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
|
||||||
|
{ { 2 2 3 3 } [ dup UNPCKHPS ] }
|
||||||
|
[ dupd SHUFPS ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: int-4-shuffle ( dst shuffle -- )
|
||||||
|
{
|
||||||
|
{ { 0 1 2 3 } [ drop ] }
|
||||||
|
{ { 0 0 1 1 } [ dup PUNPCKLDQ ] }
|
||||||
|
{ { 2 2 3 3 } [ dup PUNPCKHDQ ] }
|
||||||
|
{ { 0 1 0 1 } [ dup PUNPCKLQDQ ] }
|
||||||
|
{ { 2 3 2 3 } [ dup PUNPCKHQDQ ] }
|
||||||
|
[ dupd PSHUFD ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: longlong-2-shuffle ( dst shuffle -- )
|
||||||
|
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
|
||||||
|
|
||||||
|
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||||
|
dst src rep %copy
|
||||||
|
dst shuffle rep unsign-rep {
|
||||||
|
{ double-2-rep [ double-2-shuffle ] }
|
||||||
|
{ float-4-rep [ float-4-shuffle ] }
|
||||||
|
{ int-4-rep [ int-4-shuffle ] }
|
||||||
|
{ longlong-2-rep [ longlong-2-shuffle ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %shuffle-vector-reps
|
||||||
|
{
|
||||||
|
{ sse2? { double-2-rep float-4-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %select-vector-reps { } ;
|
||||||
|
|
||||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
[ two-operand ] keep
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
|
@ -820,6 +883,28 @@ M: x86 %max-vector-reps
|
||||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %dot-vector
|
||||||
|
[ two-operand ] keep
|
||||||
|
{
|
||||||
|
{ float-4-rep [
|
||||||
|
sse4.1?
|
||||||
|
[ HEX: ff DPPS ]
|
||||||
|
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
|
||||||
|
if
|
||||||
|
] }
|
||||||
|
{ double-2-rep [
|
||||||
|
sse4.1?
|
||||||
|
[ HEX: ff DPPD ]
|
||||||
|
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
|
||||||
|
if
|
||||||
|
] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %dot-vector-reps
|
||||||
|
{
|
||||||
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||||
|
|
|
@ -55,7 +55,7 @@ ERROR: bad-schema schema ;
|
||||||
:: high-level-ops ( ctor elt-class -- assoc )
|
:: high-level-ops ( ctor elt-class -- assoc )
|
||||||
! Some SIMD operations are defined in terms of others.
|
! Some SIMD operations are defined in terms of others.
|
||||||
{
|
{
|
||||||
{ vneg [ [ dup v- ] keep v- ] }
|
{ vneg [ [ dup vbitxor ] keep v- ] }
|
||||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||||
{ v+n [ ctor execute v+ ] }
|
{ v+n [ ctor execute v+ ] }
|
||||||
{ n-v [ [ ctor execute ] dip v- ] }
|
{ n-v [ [ ctor execute ] dip v- ] }
|
||||||
|
@ -71,20 +71,17 @@ ERROR: bad-schema schema ;
|
||||||
! To compute dot product and distance with integer vectors, we
|
! To compute dot product and distance with integer vectors, we
|
||||||
! have to do things less efficiently, with integer overflow checks,
|
! have to do things less efficiently, with integer overflow checks,
|
||||||
! in the general case.
|
! in the general case.
|
||||||
elt-class m:float = [
|
elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
|
||||||
{
|
|
||||||
{ distance [ v- norm ] }
|
|
||||||
{ v. [ v* sum ] }
|
|
||||||
} append
|
|
||||||
] when ;
|
|
||||||
|
|
||||||
:: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- )
|
:: simd-vector-words ( class ctor rep vv->v vn->v vv->n v->v v->n -- )
|
||||||
rep rep-component-type c-type-boxed-class :> elt-class
|
rep rep-component-type c-type-boxed-class :> elt-class
|
||||||
class
|
class
|
||||||
elt-class
|
elt-class
|
||||||
{
|
{
|
||||||
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
||||||
{ { +vector+ +scalar+ -> +vector+ } vn->v }
|
{ { +vector+ +scalar+ -> +vector+ } vn->v }
|
||||||
|
{ { +vector+ +literal+ -> +vector+ } vn->v }
|
||||||
|
{ { +vector+ +vector+ -> +scalar+ } vv->n }
|
||||||
{ { +vector+ -> +vector+ } v->v }
|
{ { +vector+ -> +vector+ } v->v }
|
||||||
{ { +vector+ -> +scalar+ } v->n }
|
{ { +vector+ -> +scalar+ } v->n }
|
||||||
{ { +vector+ -> +nonnegative+ } v->n }
|
{ { +vector+ -> +nonnegative+ } v->n }
|
||||||
|
@ -121,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ]
|
||||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||||
|
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
||||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
|
@ -186,13 +184,16 @@ INSTANCE: A sequence
|
||||||
: A-vn->v-op ( v1 v2 quot -- v3 )
|
: A-vn->v-op ( v1 v2 quot -- v3 )
|
||||||
[ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
|
[ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
|
: A-vv->n-op ( v1 v2 quot -- n )
|
||||||
|
[ [ underlying>> ] bi@ A-rep ] dip call ; inline
|
||||||
|
|
||||||
: A-v->v-op ( v1 quot -- v2 )
|
: A-v->v-op ( v1 quot -- v2 )
|
||||||
[ underlying>> A-rep ] dip call \ A boa ; inline
|
[ underlying>> A-rep ] dip call \ A boa ; inline
|
||||||
|
|
||||||
: A-v->n-op ( v quot -- n )
|
: A-v->n-op ( v quot -- n )
|
||||||
[ underlying>> A-rep ] dip call ; inline
|
[ underlying>> A-rep ] dip call ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||||
\ A \ A-rep define-simd-128-type
|
\ A \ A-rep define-simd-128-type
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
@ -243,6 +244,7 @@ A-deref DEFINES-PRIVATE ${A}-deref
|
||||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||||
|
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
||||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
|
@ -317,6 +319,11 @@ INSTANCE: A sequence
|
||||||
[ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
|
[ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
|
||||||
\ A boa ; inline
|
\ A boa ; inline
|
||||||
|
|
||||||
|
: A-vv->n-op ( v1 v2 quot -- v3 )
|
||||||
|
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
|
||||||
|
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||||
|
+ ; inline
|
||||||
|
|
||||||
: A-v->v-op ( v1 combine-quot -- v2 )
|
: A-v->v-op ( v1 combine-quot -- v2 )
|
||||||
[ [ underlying1>> A-rep ] dip call ]
|
[ [ underlying1>> A-rep ] dip call ]
|
||||||
[ [ underlying2>> A-rep ] dip call ] 2bi
|
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||||
|
@ -325,7 +332,7 @@ INSTANCE: A sequence
|
||||||
: A-v->n-op ( v1 combine-quot -- v2 )
|
: A-v->n-op ( v1 combine-quot -- v2 )
|
||||||
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
||||||
|
|
||||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||||
\ A \ A-rep define-simd-256-type
|
\ A \ A-rep define-simd-256-type
|
||||||
|
|
||||||
;FUNCTOR
|
;FUNCTOR
|
||||||
|
|
|
@ -36,6 +36,7 @@ SIMD-OP: v*
|
||||||
SIMD-OP: v/
|
SIMD-OP: v/
|
||||||
SIMD-OP: vmin
|
SIMD-OP: vmin
|
||||||
SIMD-OP: vmax
|
SIMD-OP: vmax
|
||||||
|
SIMD-OP: v.
|
||||||
SIMD-OP: vsqrt
|
SIMD-OP: vsqrt
|
||||||
SIMD-OP: sum
|
SIMD-OP: sum
|
||||||
SIMD-OP: vabs
|
SIMD-OP: vabs
|
||||||
|
@ -47,10 +48,12 @@ SIMD-OP: vlshift
|
||||||
SIMD-OP: vrshift
|
SIMD-OP: vrshift
|
||||||
SIMD-OP: hlshift
|
SIMD-OP: hlshift
|
||||||
SIMD-OP: hrshift
|
SIMD-OP: hrshift
|
||||||
|
SIMD-OP: vshuffle
|
||||||
|
|
||||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||||
|
: (simd-select) ( v n rep -- x ) bad-simd-call ;
|
||||||
|
|
||||||
: assert-positive ( x -- y ) ;
|
: assert-positive ( x -- y ) ;
|
||||||
|
|
||||||
|
@ -110,6 +113,7 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||||
|
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||||
|
@ -121,7 +125,9 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||||
|
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
|
{ \ (simd-select) [ %select-vector-reps ] }
|
||||||
} case member? ;
|
} case member? ;
|
||||||
|
|
|
@ -148,13 +148,14 @@ CONSTANT: simd-classes
|
||||||
: remove-integer-words ( alist -- alist' )
|
: remove-integer-words ( alist -- alist' )
|
||||||
[ drop { vlshift vrshift } member? not ] assoc-filter ;
|
[ drop { vlshift vrshift } member? not ] assoc-filter ;
|
||||||
|
|
||||||
: remove-horizontal-shifts ( alist -- alist' )
|
: remove-special-words ( alist -- alist' )
|
||||||
[ drop { hlshift hrshift } member? not ] assoc-filter ;
|
! These have their own tests later
|
||||||
|
[ drop { hlshift hrshift vshuffle } member? not ] assoc-filter ;
|
||||||
|
|
||||||
: ops-to-check ( elt-class -- alist )
|
: ops-to-check ( elt-class -- alist )
|
||||||
[ vector-words >alist ] dip
|
[ vector-words >alist ] dip
|
||||||
float = [ remove-integer-words ] [ remove-float-words ] if
|
float = [ remove-integer-words ] [ remove-float-words ] if
|
||||||
remove-horizontal-shifts ;
|
remove-special-words ;
|
||||||
|
|
||||||
: check-vector-ops ( class elt-class compare-quot -- )
|
: check-vector-ops ( class elt-class compare-quot -- )
|
||||||
[
|
[
|
||||||
|
@ -271,3 +272,47 @@ STRUCT: simd-struct
|
||||||
|
|
||||||
[ int-4{ 1 2 4 8 } ]
|
[ int-4{ 1 2 4 8 } ]
|
||||||
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
|
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
|
||||||
|
|
||||||
|
! Shuffles
|
||||||
|
: test-shuffle ( input shuffle -- failures )
|
||||||
|
[ dup class 1array ] dip
|
||||||
|
'[ _ declare _ vshuffle ]
|
||||||
|
[ call ] [ compile-call ] 2bi = not ; inline
|
||||||
|
|
||||||
|
: shuffles-for ( seq -- shuffles )
|
||||||
|
length {
|
||||||
|
{ 2 [
|
||||||
|
{
|
||||||
|
{ 0 1 }
|
||||||
|
{ 1 1 }
|
||||||
|
{ 1 0 }
|
||||||
|
{ 0 0 }
|
||||||
|
}
|
||||||
|
] }
|
||||||
|
{ 4 [
|
||||||
|
{
|
||||||
|
{ 1 2 3 0 }
|
||||||
|
{ 0 1 2 3 }
|
||||||
|
{ 1 1 2 2 }
|
||||||
|
{ 0 0 1 1 }
|
||||||
|
{ 2 2 3 3 }
|
||||||
|
{ 0 1 0 1 }
|
||||||
|
{ 2 3 2 3 }
|
||||||
|
{ 0 0 2 2 }
|
||||||
|
{ 1 1 3 3 }
|
||||||
|
{ 0 1 0 1 }
|
||||||
|
{ 2 2 3 3 }
|
||||||
|
}
|
||||||
|
] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: test-shuffles ( input -- failures )
|
||||||
|
dup shuffles-for [ test-shuffle ] with filter ; inline
|
||||||
|
|
||||||
|
[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-shuffles ] unit-test
|
||||||
|
[ { } ] [ int-4{ 1 2 3 4 } test-shuffles ] unit-test
|
||||||
|
[ { } ] [ uint-4{ 1 2 3 4 } test-shuffles ] unit-test
|
||||||
|
|
||||||
|
[ { } ] [ double-2{ 1.0 2.0 } test-shuffles ] unit-test
|
||||||
|
[ { } ] [ longlong-2{ 1 2 } test-shuffles ] unit-test
|
||||||
|
[ { } ] [ ulonglong-2{ 1 2 } test-shuffles ] unit-test
|
||||||
|
|
|
@ -6,7 +6,7 @@ namespaces assocs fry splitting classes.algebra generalizations
|
||||||
locals compiler.tree.propagation.info ;
|
locals compiler.tree.propagation.info ;
|
||||||
IN: math.vectors.specialization
|
IN: math.vectors.specialization
|
||||||
|
|
||||||
SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ;
|
SYMBOLS: -> +vector+ +scalar+ +nonnegative+ +literal+ ;
|
||||||
|
|
||||||
: signature-for-schema ( array-type elt-type schema -- signature )
|
: signature-for-schema ( array-type elt-type schema -- signature )
|
||||||
[
|
[
|
||||||
|
@ -14,6 +14,7 @@ SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ;
|
||||||
{ +vector+ [ drop ] }
|
{ +vector+ [ drop ] }
|
||||||
{ +scalar+ [ nip ] }
|
{ +scalar+ [ nip ] }
|
||||||
{ +nonnegative+ [ nip ] }
|
{ +nonnegative+ [ nip ] }
|
||||||
|
{ +literal+ [ 2drop object ] }
|
||||||
} case
|
} case
|
||||||
] with with map ;
|
] with with map ;
|
||||||
|
|
||||||
|
@ -87,8 +88,9 @@ H{
|
||||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||||
{ vlshift { +vector+ +scalar+ -> +vector+ } }
|
{ vlshift { +vector+ +scalar+ -> +vector+ } }
|
||||||
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
||||||
{ hlshift { +vector+ +scalar+ -> +vector+ } }
|
{ hlshift { +vector+ +literal+ -> +vector+ } }
|
||||||
{ hrshift { +vector+ +scalar+ -> +vector+ } }
|
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
||||||
|
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
||||||
}
|
}
|
||||||
|
|
||||||
PREDICATE: vector-word < word vector-words key? ;
|
PREDICATE: vector-word < word vector-words key? ;
|
||||||
|
@ -102,7 +104,10 @@ M: vector-word subwords specializations values [ word? ] filter ;
|
||||||
: add-specialization ( new-word signature word -- )
|
: add-specialization ( new-word signature word -- )
|
||||||
specializations set-at ;
|
specializations set-at ;
|
||||||
|
|
||||||
: word-schema ( word -- schema ) vector-words at ;
|
ERROR: bad-vector-word word ;
|
||||||
|
|
||||||
|
: word-schema ( word -- schema )
|
||||||
|
vector-words ?at [ bad-vector-word ] unless ;
|
||||||
|
|
||||||
: inputs ( schema -- seq ) { -> } split first ;
|
: inputs ( schema -- seq ) { -> } split first ;
|
||||||
|
|
||||||
|
@ -129,8 +134,8 @@ M: vector-word subwords specializations values [ word? ] filter ;
|
||||||
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
|
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
|
||||||
[ { } ]
|
[ { } ]
|
||||||
} cond
|
} cond
|
||||||
! Don't specialize horizontal shifts at all, they're only for SIMD
|
! Don't specialize horizontal shifts or shuffles at all, they're only for SIMD
|
||||||
{ hlshift hrshift } diff
|
{ hlshift hrshift vshuffle } diff
|
||||||
nip ;
|
nip ;
|
||||||
|
|
||||||
:: specialize-vector-words ( array-type elt-type simd -- )
|
:: specialize-vector-words ( array-type elt-type simd -- )
|
||||||
|
|
|
@ -41,6 +41,8 @@ $nl
|
||||||
{ $subsection vbitxor }
|
{ $subsection vbitxor }
|
||||||
{ $subsection vlshift }
|
{ $subsection vlshift }
|
||||||
{ $subsection vrshift }
|
{ $subsection vrshift }
|
||||||
|
"Shuffling:"
|
||||||
|
{ $subsection vshuffle }
|
||||||
"Inner product and norm:"
|
"Inner product and norm:"
|
||||||
{ $subsection v. }
|
{ $subsection v. }
|
||||||
{ $subsection norm }
|
{ $subsection norm }
|
||||||
|
@ -231,6 +233,18 @@ HELP: hrshift
|
||||||
{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
|
{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
|
||||||
{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
|
{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
|
||||||
|
|
||||||
|
HELP: vshuffle
|
||||||
|
{ $values { "u" "a SIMD array" } { "perm" "an array of integers" } { "v" "a SIMD array" } }
|
||||||
|
{ $description "Permutes the elements of a SIMD array. Duplicate entries are allowed in the permutation." }
|
||||||
|
{ $examples
|
||||||
|
{ $example
|
||||||
|
"USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
|
||||||
|
"SIMD: int"
|
||||||
|
"int-4{ 69 42 911 13 } { 1 3 2 3 } vshuffle ."
|
||||||
|
"int-4{ 42 13 911 13 }"
|
||||||
|
}
|
||||||
|
} ;
|
||||||
|
|
||||||
HELP: norm-sq
|
HELP: norm-sq
|
||||||
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||||
{ $description "Computes the squared length of a mathematical vector." } ;
|
{ $description "Computes the squared length of a mathematical vector." } ;
|
||||||
|
|
|
@ -66,6 +66,9 @@ PRIVATE>
|
||||||
|
|
||||||
GENERIC: new-underlying ( underlying seq -- seq' )
|
GENERIC: new-underlying ( underlying seq -- seq' )
|
||||||
|
|
||||||
|
: change-underlying ( seq quot -- seq' )
|
||||||
|
'[ underlying>> @ ] keep new-underlying ; inline
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||||
|
@ -74,6 +77,14 @@ PRIVATE>
|
||||||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||||
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
||||||
|
|
||||||
|
: vshuffle ( u perm -- v ) swap [ nths ] keep like ;
|
||||||
|
|
||||||
|
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||||
|
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||||
|
|
||||||
|
: hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
|
||||||
|
: hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;
|
||||||
|
|
||||||
: vand ( u v -- w ) [ and ] 2map ;
|
: vand ( u v -- w ) [ and ] 2map ;
|
||||||
: vor ( u v -- w ) [ or ] 2map ;
|
: vor ( u v -- w ) [ or ] 2map ;
|
||||||
: vxor ( u v -- w ) [ xor ] 2map ;
|
: vxor ( u v -- w ) [ xor ] 2map ;
|
||||||
|
@ -88,15 +99,6 @@ PRIVATE>
|
||||||
|
|
||||||
: v? ( ? u v -- w ) [ ? ] pick 3map-as ;
|
: v? ( ? u v -- w ) [ ? ] pick 3map-as ;
|
||||||
|
|
||||||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
|
||||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
|
||||||
|
|
||||||
: hlshift ( u n -- w )
|
|
||||||
[ [ underlying>> ] dip <byte-array> prepend 16 head ] [ drop ] 2bi new-underlying ;
|
|
||||||
|
|
||||||
: hrshift ( u n -- w )
|
|
||||||
[ [ underlying>> ] dip <byte-array> append 16 tail* ] [ drop ] 2bi new-underlying ;
|
|
||||||
|
|
||||||
: vfloor ( u -- v ) [ floor ] map ;
|
: vfloor ( u -- v ) [ floor ] map ;
|
||||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||||
|
|
Loading…
Reference in New Issue