scalar fallbacks for simd intrinsics
parent
6794d14652
commit
d94ffe6d78
|
@ -1,57 +1,76 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators fry sequences
|
||||
compiler.tree.propagation.info cpu.architecture kernel words math
|
||||
USING: accessors byte-arrays combinators continuations fry sequences
|
||||
compiler.tree.propagation.info cpu.architecture kernel words make math
|
||||
math.intervals math.vectors.simd.intrinsics ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-vabs)
|
||||
(simd-v+-)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-sum)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vbitnot)
|
||||
(simd-vand)
|
||||
(simd-vandn)
|
||||
(simd-vor)
|
||||
(simd-vxor)
|
||||
(simd-vnot)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-vmerge-head)
|
||||
(simd-vmerge-tail)
|
||||
(simd-v>float)
|
||||
(simd-v>integer)
|
||||
(simd-vpack-signed)
|
||||
(simd-vpack-unsigned)
|
||||
(simd-vunpack-head)
|
||||
(simd-vunpack-tail)
|
||||
(simd-v<=)
|
||||
(simd-v<)
|
||||
(simd-v=)
|
||||
(simd-v>)
|
||||
(simd-v>=)
|
||||
(simd-vunordered?)
|
||||
(simd-with)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
CONSTANT: vector>vector-intrinsics
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-v+-)
|
||||
(simd-vs+)
|
||||
(simd-vs-)
|
||||
(simd-vs*)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-vsqrt)
|
||||
(simd-vabs)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vbitnot)
|
||||
(simd-vand)
|
||||
(simd-vandn)
|
||||
(simd-vor)
|
||||
(simd-vxor)
|
||||
(simd-vnot)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vmerge-head)
|
||||
(simd-vmerge-tail)
|
||||
(simd-v<=)
|
||||
(simd-v<)
|
||||
(simd-v=)
|
||||
(simd-v>)
|
||||
(simd-v>=)
|
||||
(simd-vunordered?)
|
||||
(simd-v>float)
|
||||
(simd-v>integer)
|
||||
(simd-vpack-signed)
|
||||
(simd-vpack-unsigned)
|
||||
(simd-vunpack-head)
|
||||
(simd-vunpack-tail)
|
||||
(simd-with)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
}
|
||||
|
||||
CONSTANT: vector-other-intrinsics
|
||||
{
|
||||
(simd-v.)
|
||||
(simd-sum)
|
||||
(simd-vany?)
|
||||
(simd-vall?)
|
||||
(simd-vnone?)
|
||||
(simd-select)
|
||||
set-alien-vector
|
||||
}
|
||||
|
||||
: vector-intrinsics ( -- x )
|
||||
vector>vector-intrinsics vector-other-intrinsics append ;
|
||||
|
||||
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
|
||||
: scalar-output-class ( rep -- class )
|
||||
dup literal?>> [
|
||||
|
@ -79,12 +98,16 @@ IN: compiler.tree.propagation.simd
|
|||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
! If SIMD is not available, inline alien-vector and set-alien-vector
|
||||
! to get a speedup
|
||||
: try-intrinsic ( node intrinsic-quot -- ? )
|
||||
'[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ;
|
||||
|
||||
: inline-unless-intrinsic ( word -- )
|
||||
dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ]
|
||||
dup '[
|
||||
_ swap over "intrinsic" word-prop
|
||||
! word node intrinsic
|
||||
[ try-intrinsic [ drop f ] [ def>> ] if ]
|
||||
[ def>> ] if*
|
||||
]
|
||||
"custom-inlining" set-word-prop ;
|
||||
|
||||
\ alien-vector inline-unless-intrinsic
|
||||
|
||||
\ set-alien-vector inline-unless-intrinsic
|
||||
vector-intrinsics [ inline-unless-intrinsic ] each
|
||||
|
|
|
@ -105,7 +105,7 @@ scalar-rep ;
|
|||
{ ushort-scalar-rep short-scalar-rep }
|
||||
{ uint-scalar-rep int-scalar-rep }
|
||||
{ ulonglong-scalar-rep longlong-scalar-rep }
|
||||
} ?at drop ;
|
||||
} ?at drop ; foldable
|
||||
|
||||
: widen-vector-rep ( rep -- rep' )
|
||||
{
|
||||
|
@ -116,7 +116,18 @@ scalar-rep ;
|
|||
{ ushort-8-rep uint-4-rep }
|
||||
{ uint-4-rep ulonglong-2-rep }
|
||||
{ float-4-rep double-2-rep }
|
||||
} at ;
|
||||
} at ; foldable
|
||||
|
||||
: narrow-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ short-8-rep char-16-rep }
|
||||
{ int-4-rep short-8-rep }
|
||||
{ longlong-2-rep int-4-rep }
|
||||
{ ushort-8-rep uchar-16-rep }
|
||||
{ uint-4-rep ushort-8-rep }
|
||||
{ ulonglong-2-rep uint-4-rep }
|
||||
{ double-2-rep float-4-rep }
|
||||
} at ; foldable
|
||||
|
||||
! Register classes
|
||||
SINGLETONS: int-regs float-regs ;
|
||||
|
|
|
@ -0,0 +1,232 @@
|
|||
! (c)2009 Slava Pestov, Joe Groff bsd license
|
||||
USING: accessors alien alien.c-types alien.data combinators
|
||||
cords cpu.architecture fry generalizations kernel libc locals
|
||||
math math.libm math.order math.ranges math.vectors sequences
|
||||
sequences.private specialized-arrays vocabs.loader ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SPECIALIZED-ARRAYS:
|
||||
c:char c:short c:int c:longlong
|
||||
c:uchar c:ushort c:uint c:ulonglong
|
||||
c:float c:double ;
|
||||
IN: math.vectors.simd.intrinsics
|
||||
|
||||
: assert-positive ( x -- y ) ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: >bitwise-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ float-4-rep [ uint-4-rep ] }
|
||||
{ double-2-rep [ ulonglong-2-rep ] }
|
||||
[ ]
|
||||
} case ; foldable
|
||||
|
||||
: >uint-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ longlong-2-rep [ ulonglong-2-rep ] }
|
||||
{ int-4-rep [ uint-4-rep ] }
|
||||
{ short-8-rep [ ushort-8-rep ] }
|
||||
{ char-16-rep [ uchar-16-rep ] }
|
||||
[ ]
|
||||
} case ; foldable
|
||||
|
||||
: >int-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ float-4-rep [ int-4-rep ] }
|
||||
{ double-2-rep [ longlong-2-rep ] }
|
||||
} case ; foldable
|
||||
|
||||
: >float-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ int-4-rep [ float-4-rep ] }
|
||||
{ longlong-2-rep [ double-2-rep ] }
|
||||
} case ; foldable
|
||||
|
||||
: [byte>rep-array] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ byte-array>char-array ] ] }
|
||||
{ uchar-16-rep [ [ byte-array>uchar-array ] ] }
|
||||
{ short-8-rep [ [ byte-array>short-array ] ] }
|
||||
{ ushort-8-rep [ [ byte-array>ushort-array ] ] }
|
||||
{ int-4-rep [ [ byte-array>int-array ] ] }
|
||||
{ uint-4-rep [ [ byte-array>uint-array ] ] }
|
||||
{ longlong-2-rep [ [ byte-array>longlong-array ] ] }
|
||||
{ ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] }
|
||||
{ float-4-rep [ [ byte-array>float-array ] ] }
|
||||
{ double-2-rep [ [ byte-array>double-array ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: [>rep-array] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ >char-array ] ] }
|
||||
{ uchar-16-rep [ [ >uchar-array ] ] }
|
||||
{ short-8-rep [ [ >short-array ] ] }
|
||||
{ ushort-8-rep [ [ >ushort-array ] ] }
|
||||
{ int-4-rep [ [ >int-array ] ] }
|
||||
{ uint-4-rep [ [ >uint-array ] ] }
|
||||
{ longlong-2-rep [ [ >longlong-array ] ] }
|
||||
{ ulonglong-2-rep [ [ >ulonglong-array ] ] }
|
||||
{ float-4-rep [ [ >float-array ] ] }
|
||||
{ double-2-rep [ [ >double-array ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: [<rep-array>] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ 16 (char-array) ] ] }
|
||||
{ uchar-16-rep [ [ 16 (uchar-array) ] ] }
|
||||
{ short-8-rep [ [ 8 (short-array) ] ] }
|
||||
{ ushort-8-rep [ [ 8 (ushort-array) ] ] }
|
||||
{ int-4-rep [ [ 4 (int-array) ] ] }
|
||||
{ uint-4-rep [ [ 4 (uint-array) ] ] }
|
||||
{ longlong-2-rep [ [ 2 (longlong-array) ] ] }
|
||||
{ ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] }
|
||||
{ float-4-rep [ [ 4 (float-array) ] ] }
|
||||
{ double-2-rep [ [ 2 (double-array) ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: rep-tf-values ( rep -- t f )
|
||||
float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
|
||||
|
||||
: >rep-array ( a rep -- a' )
|
||||
[byte>rep-array] call( a -- a' ) ; inline
|
||||
: 2>rep-array ( a b rep -- a' b' )
|
||||
[byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
|
||||
: <rep-array> ( rep -- a' )
|
||||
[<rep-array>] call( -- a' ) ; inline
|
||||
|
||||
: components-map ( a rep quot -- c )
|
||||
[ >rep-array ] dip map underlying>> ; inline
|
||||
: components-2map ( a b rep quot -- c )
|
||||
[ 2>rep-array ] dip 2map underlying>> ; inline
|
||||
: components-reduce ( a rep quot -- x )
|
||||
[ >rep-array [ ] ] dip map-reduce ; inline
|
||||
|
||||
: bitwise-components-map ( a rep quot -- c )
|
||||
[ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
|
||||
: bitwise-components-2map ( a b rep quot -- c )
|
||||
[ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
|
||||
: bitwise-components-reduce ( a rep quot -- x )
|
||||
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
|
||||
|
||||
:: (vshuffle) ( a elts rep -- c )
|
||||
a rep >rep-array :> a'
|
||||
rep <rep-array> :> c'
|
||||
elts [| from to |
|
||||
from a' nth-unsafe
|
||||
rep rep-length 1 - bitand
|
||||
to c' set-nth-unsafe
|
||||
] each-index
|
||||
c' underlying>> ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
|
||||
: (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
|
||||
: (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
|
||||
:: (simd-v+-) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
0 rep length 1 - 2 <range> [| n |
|
||||
n a' nth-unsafe n b' nth-unsafe -
|
||||
n c' set-nth-unsafe
|
||||
|
||||
n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
|
||||
n 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
: (simd-vs+) ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ;
|
||||
: (simd-vs-) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
|
||||
: (simd-vs*) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
|
||||
: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
|
||||
: (simd-v/) ( a b rep -- c ) [ / ] components-2map ;
|
||||
: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
|
||||
: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
|
||||
: (simd-v.) ( a b rep -- n )
|
||||
[ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
|
||||
1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
|
||||
: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
|
||||
: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
|
||||
: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
|
||||
: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
|
||||
: (simd-vbitandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
|
||||
: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
|
||||
: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
|
||||
: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
|
||||
: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
|
||||
: (simd-vandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
|
||||
: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
|
||||
: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
|
||||
: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
|
||||
: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
|
||||
: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
|
||||
: (simd-hlshift) ( a n rep -- c )
|
||||
drop tail-slice 16 0 pad-tail ;
|
||||
: (simd-hrshift) ( a n rep -- c )
|
||||
drop head-slice 16 0 pad-head ;
|
||||
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
|
||||
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
|
||||
:: (simd-vmerge-head) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
rep rep-length 2 /i iota [| n |
|
||||
n a' nth-unsafe n 2 * c' set-nth-unsafe
|
||||
n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
:: (simd-vmerge-tail) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
rep rep-length 2 /i :> len
|
||||
len iota [| n |
|
||||
n len + a' nth-unsafe n 2 * c' set-nth-unsafe
|
||||
n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
: (simd-v<=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ <= _ _ ? ] components-2map ;
|
||||
: (simd-v<) ( a b rep -- c )
|
||||
dup rep-tf-values '[ < _ _ ? ] components-2map ;
|
||||
: (simd-v=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ = _ _ ? ] components-2map ;
|
||||
: (simd-v>) ( a b rep -- c )
|
||||
dup rep-tf-values '[ > _ _ ? ] components-2map ;
|
||||
: (simd-v>=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ >= _ _ ? ] components-2map ;
|
||||
: (simd-vunordered?) ( a b rep -- c )
|
||||
dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
|
||||
: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
|
||||
: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
|
||||
: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
|
||||
: (simd-v>float) ( a rep -- c )
|
||||
[ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ;
|
||||
: (simd-v>integer) ( a rep -- c )
|
||||
[ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ;
|
||||
: (simd-vpack-signed) ( a b rep -- c )
|
||||
[ 2>rep-array cord-append ]
|
||||
[ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
|
||||
'[ _ c-type-clamp ] swap map-as ;
|
||||
: (simd-vpack-unsigned) ( a b rep -- c )
|
||||
[ 2>rep-array cord-append ]
|
||||
[ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
|
||||
'[ _ c-type-clamp ] swap map-as ;
|
||||
: (simd-vunpack-head) ( a rep -- c )
|
||||
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
|
||||
[ head-slice ] dip call( a' -- c' ) underlying>> ;
|
||||
: (simd-vunpack-tail) ( a rep -- c )
|
||||
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
|
||||
[ tail-slice ] dip call( a' -- c' ) underlying>> ;
|
||||
: (simd-with) ( n rep -- v )
|
||||
[ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as ;
|
||||
: (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
|
||||
: (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
|
||||
: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
|
||||
|
||||
: alien-vector ( c-ptr n rep -- value )
|
||||
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
|
||||
: set-alien-vector ( value c-ptr n rep -- )
|
||||
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
||||
|
||||
"compiler.cfg.intrinsics.simd" require
|
||||
"compiler.tree.propagation.simd" require
|
||||
"compiler.cfg.value-numbering.simd" require
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
USING: classes.tuple.private cpu.architecture help.markup
|
||||
help.syntax kernel.private math math.vectors
|
||||
help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
|
||||
sequences ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
|
@ -163,7 +163,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
|
|||
{ $list
|
||||
"They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
|
||||
"They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
|
||||
{ "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." }
|
||||
}
|
||||
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
|
||||
$nl
|
||||
|
|
|
@ -95,6 +95,9 @@ DEFER: simd-construct-op
|
|||
PRIVATE>
|
||||
>>
|
||||
|
||||
DEFER: simd-with
|
||||
DEFER: simd-cast
|
||||
|
||||
<<
|
||||
<PRIVATE
|
||||
|
||||
|
|
Loading…
Reference in New Issue