more intrinsic madness
parent
f6643a1c72
commit
4d54f27cd1
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! Copyright (C) 2009 Slava Pestov, Joe Groff.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien byte-arrays fry classes.algebra
|
||||
cpu.architecture kernel math sequences math.vectors
|
||||
|
@ -16,6 +16,28 @@ IN: compiler.cfg.intrinsics.simd
|
|||
|
||||
! compound vector ops
|
||||
|
||||
: sign-bit-mask ( rep -- byte-array )
|
||||
unsign-rep {
|
||||
{ char-16-rep [ uchar-array{
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
} underlying>> ] }
|
||||
{ short-8-rep [ ushort-array{
|
||||
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
|
||||
HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
|
||||
} underlying>> ] }
|
||||
{ int-4-rep [ uint-array{
|
||||
HEX: 8000,0000 HEX: 8000,0000
|
||||
HEX: 8000,0000 HEX: 8000,0000
|
||||
} underlying>> ] }
|
||||
{ longlong-2-rep [ ulonglong-array{
|
||||
HEX: 8000,0000,0000,0000
|
||||
HEX: 8000,0000,0000,0000
|
||||
} underlying>> ] }
|
||||
} case ;
|
||||
|
||||
: ^load-neg-zero-vector ( rep -- dst )
|
||||
{
|
||||
{ float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
|
||||
|
@ -46,11 +68,163 @@ IN: compiler.cfg.intrinsics.simd
|
|||
mask false rep ^^andn-vector
|
||||
rep ^^or-vector ;
|
||||
|
||||
: ^compare-vector ( src1 src2 rep cc -- dst )
|
||||
... ;
|
||||
: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
|
||||
order-cc {
|
||||
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
|
||||
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] }
|
||||
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
|
||||
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^^compare-vector ] }
|
||||
} case ;
|
||||
|
||||
: ^widened-shr-vector-imm ( src shift rep -- dst )
|
||||
widen-vector-rep ^^shr-vector-imm ;
|
||||
: ^compare-vector ( src1 src2 rep cc -- dst )
|
||||
{
|
||||
[ ^^compare-vector ]
|
||||
[ ^minmax-compare-vector ]
|
||||
{ unsigned-int-vector-rep [| src1 src2 rep cc |
|
||||
rep sign-bit-mask ^^load-constant :> sign-bits
|
||||
src1 sign-bits rep ^^xor-vector
|
||||
src2 sign-bits rep ^^xor-vector
|
||||
rep unsign-rep cc ^^compare-vector
|
||||
] }
|
||||
} vv-cc-vector-op ;
|
||||
|
||||
: ^unpack-vector-head ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-head ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-head :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep ^widened-shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: ^unpack-vector-tail ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-tail ]
|
||||
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-tail :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep ^widened-shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-2) ( src rep -- dst )
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-4) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] bi
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-8) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] tri
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-16) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
} cleave
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector :> src'''
|
||||
|
||||
rep'' widen-rep :> rep'''
|
||||
src''' src''' rep''' ^^merge-vector-head :> head'''
|
||||
src''' src''' rep''' ^^merge-vector-tail :> tail'''
|
||||
head''' tail''' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector) ( src rep -- dst )
|
||||
[
|
||||
rep-length {
|
||||
{ 2 [ ^(sum-2) ] }
|
||||
{ 4 [ ^(sum-4) ] }
|
||||
{ 8 [ ^(sum-8) ] }
|
||||
{ 16 [ ^(sum-16) ] }
|
||||
} case
|
||||
] [ ^^vector>scalar ] bi ;
|
||||
|
||||
: ^sum-vector ( src rep -- dst )
|
||||
unsign-rep {
|
||||
{ float-vector-rep [ ^(sum-vector) ] }
|
||||
{ int-vector-rep [| src rep |
|
||||
src rep ^unpack-vector-head :> head
|
||||
src rep ^unpack-vector-tail :> tail
|
||||
rep widen-rep :> wide-rep
|
||||
head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
! intrinsic emitters
|
||||
|
||||
|
@ -135,12 +309,7 @@ IN: compiler.cfg.intrinsics.simd
|
|||
: emit-simd-v. ( node -- )
|
||||
{
|
||||
[ ^^dot-vector ]
|
||||
{ float-vector-rep [| src1 src2 rep |
|
||||
|
||||
] }
|
||||
{ int-vector-rep [| src1 src2 rep |
|
||||
...
|
||||
] }
|
||||
[ [ ^^mul-vector ] [ ^sum-vector ] bi ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsqrt ( node -- )
|
||||
|
@ -149,7 +318,9 @@ IN: compiler.cfg.intrinsics.simd
|
|||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-sum ( node -- )
|
||||
... ;
|
||||
{
|
||||
[ ^sum-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vabs ( node -- )
|
||||
{
|
||||
|
@ -195,30 +366,32 @@ IN: compiler.cfg.intrinsics.simd
|
|||
[ ^^shl-vector ]
|
||||
} {
|
||||
[ ^^shl-vector-imm ]
|
||||
} emit-vn-or-vl-vector-op ;
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-vrshift ( node -- )
|
||||
{
|
||||
[ ^^shr-vector ]
|
||||
} {
|
||||
[ ^^shr-vector-imm ]
|
||||
} emit-vn-or-vl-vector-op ;
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-hlshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shl-vector-imm ]
|
||||
} emit-vl-vector-op ;
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-hrshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shr-vector-imm ]
|
||||
} emit-vl-vector-op ;
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: emit-simd-vshuffle-elements ( node -- )
|
||||
{
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
|
||||
} emit-vl-vector-op ;
|
||||
} [ shuffle? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle-bytes ( node -- )
|
||||
{
|
||||
|
@ -236,24 +409,42 @@ IN: compiler.cfg.intrinsics.simd
|
|||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v<= ( node -- )
|
||||
[ cc<= ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc<= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v< ( node -- )
|
||||
[ cc< ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc< ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v= ( node -- )
|
||||
[ cc= ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v> ( node -- )
|
||||
[ cc> ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc> ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v>= ( node -- )
|
||||
[ cc>= ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-vunordered? ( node -- )
|
||||
[ cc/<>= ^compare-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ cc/<>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vany? ( node -- )
|
||||
[ vcc-any ^test-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ vcc-any ^test-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-vall? ( node -- )
|
||||
[ vcc-all ^test-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ vcc-all ^test-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-vnone? ( node -- )
|
||||
[ vcc-none ^test-vector ] (emit-vv-vector-op) ;
|
||||
{
|
||||
[ vcc-none ^test-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v>float ( node -- )
|
||||
{
|
||||
|
@ -277,48 +468,45 @@ IN: compiler.cfg.intrinsics.simd
|
|||
[ ^^unsigned-pack-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
! XXX shr vector rep is widened!
|
||||
: emit-simd-vunpack-head ( node -- )
|
||||
{
|
||||
[ ^^unpack-vector-head ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-head :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep ^widened-shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
] }
|
||||
[ ^unpack-vector-head ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vunpack-tail ( node -- )
|
||||
{
|
||||
[ ^^unpack-vector-tail ]
|
||||
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-tail :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep widen-vector-rep ^widened-shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
] }
|
||||
[ ^unpack-vector-tail ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-with ( node -- )
|
||||
{
|
||||
[ ^^with-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-gather-2 ( node -- )
|
||||
{
|
||||
[ ^^gather-vector-2 ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-gather-4 ( node -- )
|
||||
{
|
||||
[ ^^gather-vector-4 ]
|
||||
} emit-vvvv-vector-op ;
|
||||
|
||||
: emit-simd-select ( node -- )
|
||||
{
|
||||
[ ^^select-vector ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
{
|
||||
[ ^^alien-vector ]
|
||||
} emit-alien-vector-op ;
|
||||
|
||||
: emit-set-alien-vector ( node -- )
|
||||
: emit-alien-vector-aligned ( node -- )
|
||||
: emit-set-alien-vector-aligned ( node -- )
|
||||
{
|
||||
[ ^^set-alien-vector ]
|
||||
} emit-set-alien-vector-op ;
|
||||
|
||||
: enable-simd ( -- )
|
||||
{
|
||||
|
@ -376,8 +564,6 @@ IN: compiler.cfg.intrinsics.simd
|
|||
{ (simd-select) [ emit-simd-select ] }
|
||||
{ alien-vector [ emit-alien-vector ] }
|
||||
{ set-alien-vector [ emit-set-alien-vector ] }
|
||||
{ alien-vector-aligned [ emit-alien-vector ] }
|
||||
{ set-alien-vector-aligned [ emit-set-alien-vector ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
enable-simd
|
||||
|
|
|
@ -1131,34 +1131,13 @@ M: x86 %max-vector-reps
|
|||
M: x86 %dot-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPS ]
|
||||
[
|
||||
[ MULPS ] [
|
||||
drop 2dup float-4-rep
|
||||
[ %horizontal-add-vector ]
|
||||
[ %horizontal-add-vector ]
|
||||
[ nip %vector>scalar ] 3tri
|
||||
] 2bi
|
||||
] if
|
||||
] }
|
||||
{ double-2-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPD ]
|
||||
[
|
||||
[ MULPD ] [
|
||||
drop 2dup double-2-rep
|
||||
[ %horizontal-add-vector ]
|
||||
[ nip %vector>scalar ] 3bi
|
||||
] 2bi
|
||||
] if
|
||||
] }
|
||||
{ float-4-rep [ HEX: ff DPPS ] }
|
||||
{ double-2-rep [ HEX: ff DPPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %dot-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
{ sse4.1? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
|
|
|
@ -77,9 +77,6 @@ GENERIC: new-underlying ( underlying seq -- seq' )
|
|||
: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
|
||||
: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
|
||||
|
||||
: alien-vector-aligned ( c-ptr n rep -- value ) \ alien-vector-aligned bad-simd-call ;
|
||||
: set-alien-vector-aligned ( c-ptr n rep -- value ) \ set-alien-vector-aligned bad-simd-call ;
|
||||
|
||||
! Helper for boolean vector literals
|
||||
|
||||
: vector-true-value ( class -- value )
|
||||
|
|
Loading…
Reference in New Issue