scalar fallbacks for simd intrinsics
							parent
							
								
									6794d14652
								
							
						
					
					
						commit
						d94ffe6d78
					
				| 
						 | 
				
			
			@ -1,57 +1,76 @@
 | 
			
		|||
! Copyright (C) 2009 Slava Pestov.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors byte-arrays combinators fry sequences
 | 
			
		||||
compiler.tree.propagation.info cpu.architecture kernel words math
 | 
			
		||||
USING: accessors byte-arrays combinators continuations fry sequences
 | 
			
		||||
compiler.tree.propagation.info cpu.architecture kernel words make math
 | 
			
		||||
math.intervals math.vectors.simd.intrinsics ;
 | 
			
		||||
IN: compiler.tree.propagation.simd
 | 
			
		||||
 | 
			
		||||
{
 | 
			
		||||
    (simd-v+)
 | 
			
		||||
    (simd-v-)
 | 
			
		||||
    (simd-vneg)
 | 
			
		||||
    (simd-vabs)
 | 
			
		||||
    (simd-v+-)
 | 
			
		||||
    (simd-v*)
 | 
			
		||||
    (simd-v/)
 | 
			
		||||
    (simd-vmin)
 | 
			
		||||
    (simd-vmax)
 | 
			
		||||
    (simd-sum)
 | 
			
		||||
    (simd-vsqrt)
 | 
			
		||||
    (simd-vbitand)
 | 
			
		||||
    (simd-vbitandn)
 | 
			
		||||
    (simd-vbitor)
 | 
			
		||||
    (simd-vbitxor)
 | 
			
		||||
    (simd-vbitnot)
 | 
			
		||||
    (simd-vand)
 | 
			
		||||
    (simd-vandn)
 | 
			
		||||
    (simd-vor)
 | 
			
		||||
    (simd-vxor)
 | 
			
		||||
    (simd-vnot)
 | 
			
		||||
    (simd-vlshift)
 | 
			
		||||
    (simd-vrshift)
 | 
			
		||||
    (simd-hlshift)
 | 
			
		||||
    (simd-hrshift)
 | 
			
		||||
    (simd-vshuffle-bytes)
 | 
			
		||||
    (simd-vshuffle-elements)
 | 
			
		||||
    (simd-vmerge-head)
 | 
			
		||||
    (simd-vmerge-tail)
 | 
			
		||||
    (simd-v>float)
 | 
			
		||||
    (simd-v>integer)
 | 
			
		||||
    (simd-vpack-signed)
 | 
			
		||||
    (simd-vpack-unsigned)
 | 
			
		||||
    (simd-vunpack-head)
 | 
			
		||||
    (simd-vunpack-tail)
 | 
			
		||||
    (simd-v<=)
 | 
			
		||||
    (simd-v<)
 | 
			
		||||
    (simd-v=)
 | 
			
		||||
    (simd-v>)
 | 
			
		||||
    (simd-v>=)
 | 
			
		||||
    (simd-vunordered?)
 | 
			
		||||
    (simd-with)
 | 
			
		||||
    (simd-gather-2)
 | 
			
		||||
    (simd-gather-4)
 | 
			
		||||
    alien-vector
 | 
			
		||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
 | 
			
		||||
CONSTANT: vector>vector-intrinsics
 | 
			
		||||
    {
 | 
			
		||||
        (simd-v+)
 | 
			
		||||
        (simd-v-)
 | 
			
		||||
        (simd-vneg)
 | 
			
		||||
        (simd-v+-)
 | 
			
		||||
        (simd-vs+)
 | 
			
		||||
        (simd-vs-)
 | 
			
		||||
        (simd-vs*)
 | 
			
		||||
        (simd-v*)
 | 
			
		||||
        (simd-v/)
 | 
			
		||||
        (simd-vmin)
 | 
			
		||||
        (simd-vmax)
 | 
			
		||||
        (simd-vsqrt)
 | 
			
		||||
        (simd-vabs)
 | 
			
		||||
        (simd-vbitand)
 | 
			
		||||
        (simd-vbitandn)
 | 
			
		||||
        (simd-vbitor)
 | 
			
		||||
        (simd-vbitxor)
 | 
			
		||||
        (simd-vbitnot)
 | 
			
		||||
        (simd-vand)
 | 
			
		||||
        (simd-vandn)
 | 
			
		||||
        (simd-vor)
 | 
			
		||||
        (simd-vxor)
 | 
			
		||||
        (simd-vnot)
 | 
			
		||||
        (simd-vlshift)
 | 
			
		||||
        (simd-vrshift)
 | 
			
		||||
        (simd-hlshift)
 | 
			
		||||
        (simd-hrshift)
 | 
			
		||||
        (simd-vshuffle-elements)
 | 
			
		||||
        (simd-vshuffle-bytes)
 | 
			
		||||
        (simd-vmerge-head)
 | 
			
		||||
        (simd-vmerge-tail)
 | 
			
		||||
        (simd-v<=)
 | 
			
		||||
        (simd-v<)
 | 
			
		||||
        (simd-v=)
 | 
			
		||||
        (simd-v>)
 | 
			
		||||
        (simd-v>=)
 | 
			
		||||
        (simd-vunordered?)
 | 
			
		||||
        (simd-v>float)
 | 
			
		||||
        (simd-v>integer)
 | 
			
		||||
        (simd-vpack-signed)
 | 
			
		||||
        (simd-vpack-unsigned)
 | 
			
		||||
        (simd-vunpack-head)
 | 
			
		||||
        (simd-vunpack-tail)
 | 
			
		||||
        (simd-with)
 | 
			
		||||
        (simd-gather-2)
 | 
			
		||||
        (simd-gather-4)
 | 
			
		||||
        alien-vector
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
CONSTANT: vector-other-intrinsics
 | 
			
		||||
    {
 | 
			
		||||
        (simd-v.)
 | 
			
		||||
        (simd-sum)
 | 
			
		||||
        (simd-vany?)
 | 
			
		||||
        (simd-vall?)
 | 
			
		||||
        (simd-vnone?)
 | 
			
		||||
        (simd-select)
 | 
			
		||||
        set-alien-vector
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
: vector-intrinsics ( -- x )
 | 
			
		||||
    vector>vector-intrinsics vector-other-intrinsics append ;
 | 
			
		||||
 | 
			
		||||
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
 | 
			
		||||
 | 
			
		||||
: scalar-output-class ( rep -- class )
 | 
			
		||||
    dup literal?>> [
 | 
			
		||||
| 
						 | 
				
			
			@ -79,12 +98,16 @@ IN: compiler.tree.propagation.simd
 | 
			
		|||
    real [0,inf] <class/interval-info> value-info-intersect
 | 
			
		||||
] "outputs" set-word-prop
 | 
			
		||||
 | 
			
		||||
! If SIMD is not available, inline alien-vector and set-alien-vector
 | 
			
		||||
! to get a speedup
 | 
			
		||||
: try-intrinsic ( node intrinsic-quot -- ? )
 | 
			
		||||
    '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ;
 | 
			
		||||
 | 
			
		||||
: inline-unless-intrinsic ( word -- )
 | 
			
		||||
    dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ]
 | 
			
		||||
    dup '[
 | 
			
		||||
        _ swap over "intrinsic" word-prop
 | 
			
		||||
        ! word node intrinsic
 | 
			
		||||
        [ try-intrinsic [ drop f ] [ def>> ] if ]
 | 
			
		||||
        [ def>> ] if*
 | 
			
		||||
    ]
 | 
			
		||||
    "custom-inlining" set-word-prop ;
 | 
			
		||||
 | 
			
		||||
\ alien-vector inline-unless-intrinsic
 | 
			
		||||
 | 
			
		||||
\ set-alien-vector inline-unless-intrinsic
 | 
			
		||||
vector-intrinsics [ inline-unless-intrinsic ] each
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -105,7 +105,7 @@ scalar-rep ;
 | 
			
		|||
        { ushort-scalar-rep    short-scalar-rep }
 | 
			
		||||
        { uint-scalar-rep      int-scalar-rep }
 | 
			
		||||
        { ulonglong-scalar-rep longlong-scalar-rep }
 | 
			
		||||
    } ?at drop ;
 | 
			
		||||
    } ?at drop ; foldable
 | 
			
		||||
 | 
			
		||||
: widen-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
| 
						 | 
				
			
			@ -116,7 +116,18 @@ scalar-rep ;
 | 
			
		|||
        { ushort-8-rep    uint-4-rep      }
 | 
			
		||||
        { uint-4-rep      ulonglong-2-rep }
 | 
			
		||||
        { float-4-rep     double-2-rep    }
 | 
			
		||||
    } at ;
 | 
			
		||||
    } at ; foldable
 | 
			
		||||
 | 
			
		||||
: narrow-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
        { short-8-rep     char-16-rep     }
 | 
			
		||||
        { int-4-rep       short-8-rep     }
 | 
			
		||||
        { longlong-2-rep  int-4-rep       }
 | 
			
		||||
        { ushort-8-rep    uchar-16-rep    }
 | 
			
		||||
        { uint-4-rep      ushort-8-rep    }
 | 
			
		||||
        { ulonglong-2-rep uint-4-rep      }
 | 
			
		||||
        { double-2-rep    float-4-rep     }
 | 
			
		||||
    } at ; foldable
 | 
			
		||||
 | 
			
		||||
! Register classes
 | 
			
		||||
SINGLETONS: int-regs float-regs ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,232 @@
 | 
			
		|||
! (c)2009 Slava Pestov, Joe Groff bsd license
 | 
			
		||||
USING: accessors alien alien.c-types alien.data combinators
 | 
			
		||||
cords cpu.architecture fry generalizations kernel libc locals
 | 
			
		||||
math math.libm math.order math.ranges math.vectors sequences
 | 
			
		||||
sequences.private specialized-arrays vocabs.loader ;
 | 
			
		||||
QUALIFIED-WITH: alien.c-types c
 | 
			
		||||
SPECIALIZED-ARRAYS:
 | 
			
		||||
    c:char c:short c:int c:longlong
 | 
			
		||||
    c:uchar c:ushort c:uint c:ulonglong
 | 
			
		||||
    c:float c:double ;
 | 
			
		||||
IN: math.vectors.simd.intrinsics
 | 
			
		||||
 | 
			
		||||
: assert-positive ( x -- y ) ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: >bitwise-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
        { float-4-rep    [ uint-4-rep      ] }
 | 
			
		||||
        { double-2-rep   [ ulonglong-2-rep ] }
 | 
			
		||||
        [ ]
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: >uint-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
        { longlong-2-rep [ ulonglong-2-rep ] }
 | 
			
		||||
        { int-4-rep      [ uint-4-rep      ] }
 | 
			
		||||
        { short-8-rep    [ ushort-8-rep    ] }
 | 
			
		||||
        { char-16-rep    [ uchar-16-rep    ] }
 | 
			
		||||
        [ ]
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: >int-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
        { float-4-rep  [ int-4-rep      ] }
 | 
			
		||||
        { double-2-rep [ longlong-2-rep ] }
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: >float-vector-rep ( rep -- rep' )
 | 
			
		||||
    {
 | 
			
		||||
        { int-4-rep      [ float-4-rep  ] }
 | 
			
		||||
        { longlong-2-rep [ double-2-rep ] }
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: [byte>rep-array] ( rep -- class )
 | 
			
		||||
    {
 | 
			
		||||
        { char-16-rep      [ [ byte-array>char-array      ] ] }
 | 
			
		||||
        { uchar-16-rep     [ [ byte-array>uchar-array     ] ] }
 | 
			
		||||
        { short-8-rep      [ [ byte-array>short-array     ] ] }
 | 
			
		||||
        { ushort-8-rep     [ [ byte-array>ushort-array    ] ] }
 | 
			
		||||
        { int-4-rep        [ [ byte-array>int-array       ] ] }
 | 
			
		||||
        { uint-4-rep       [ [ byte-array>uint-array      ] ] }
 | 
			
		||||
        { longlong-2-rep   [ [ byte-array>longlong-array  ] ] }
 | 
			
		||||
        { ulonglong-2-rep  [ [ byte-array>ulonglong-array ] ] }
 | 
			
		||||
        { float-4-rep      [ [ byte-array>float-array     ] ] }
 | 
			
		||||
        { double-2-rep     [ [ byte-array>double-array    ] ] }
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: [>rep-array] ( rep -- class )
 | 
			
		||||
    {
 | 
			
		||||
        { char-16-rep      [ [ >char-array      ] ] }
 | 
			
		||||
        { uchar-16-rep     [ [ >uchar-array     ] ] }
 | 
			
		||||
        { short-8-rep      [ [ >short-array     ] ] }
 | 
			
		||||
        { ushort-8-rep     [ [ >ushort-array    ] ] }
 | 
			
		||||
        { int-4-rep        [ [ >int-array       ] ] }
 | 
			
		||||
        { uint-4-rep       [ [ >uint-array      ] ] }
 | 
			
		||||
        { longlong-2-rep   [ [ >longlong-array  ] ] }
 | 
			
		||||
        { ulonglong-2-rep  [ [ >ulonglong-array ] ] }
 | 
			
		||||
        { float-4-rep      [ [ >float-array     ] ] }
 | 
			
		||||
        { double-2-rep     [ [ >double-array    ] ] }
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: [<rep-array>] ( rep -- class )
 | 
			
		||||
    {
 | 
			
		||||
        { char-16-rep      [ [ 16 (char-array)      ] ] }
 | 
			
		||||
        { uchar-16-rep     [ [ 16 (uchar-array)     ] ] }
 | 
			
		||||
        { short-8-rep      [ [  8 (short-array)     ] ] }
 | 
			
		||||
        { ushort-8-rep     [ [  8 (ushort-array)    ] ] }
 | 
			
		||||
        { int-4-rep        [ [  4 (int-array)       ] ] }
 | 
			
		||||
        { uint-4-rep       [ [  4 (uint-array)      ] ] }
 | 
			
		||||
        { longlong-2-rep   [ [  2 (longlong-array)  ] ] }
 | 
			
		||||
        { ulonglong-2-rep  [ [  2 (ulonglong-array) ] ] }
 | 
			
		||||
        { float-4-rep      [ [  4 (float-array)     ] ] }
 | 
			
		||||
        { double-2-rep     [ [  2 (double-array)    ] ] }
 | 
			
		||||
    } case ; foldable
 | 
			
		||||
 | 
			
		||||
: rep-tf-values ( rep -- t f )
 | 
			
		||||
    float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
 | 
			
		||||
 | 
			
		||||
: >rep-array ( a rep -- a' )
 | 
			
		||||
    [byte>rep-array] call( a -- a' ) ; inline
 | 
			
		||||
: 2>rep-array ( a b rep -- a' b' )
 | 
			
		||||
    [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
 | 
			
		||||
: <rep-array> ( rep -- a' )
 | 
			
		||||
    [<rep-array>] call( -- a' ) ; inline
 | 
			
		||||
 | 
			
		||||
: components-map ( a rep quot -- c )
 | 
			
		||||
    [ >rep-array ] dip map underlying>> ; inline
 | 
			
		||||
: components-2map ( a b rep quot -- c )
 | 
			
		||||
    [ 2>rep-array ] dip 2map underlying>> ; inline
 | 
			
		||||
: components-reduce ( a rep quot -- x )
 | 
			
		||||
    [ >rep-array [ ] ] dip map-reduce ; inline
 | 
			
		||||
 | 
			
		||||
: bitwise-components-map ( a rep quot -- c )
 | 
			
		||||
    [ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
 | 
			
		||||
: bitwise-components-2map ( a b rep quot -- c )
 | 
			
		||||
    [ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
 | 
			
		||||
: bitwise-components-reduce ( a rep quot -- x )
 | 
			
		||||
    [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
 | 
			
		||||
 | 
			
		||||
:: (vshuffle) ( a elts rep -- c )
 | 
			
		||||
    a rep >rep-array :> a'
 | 
			
		||||
    rep <rep-array> :> c'
 | 
			
		||||
    elts [| from to |
 | 
			
		||||
        from a' nth-unsafe
 | 
			
		||||
        rep rep-length 1 - bitand
 | 
			
		||||
        to c' set-nth-unsafe
 | 
			
		||||
    ] each-index
 | 
			
		||||
    c' underlying>> ; inline
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: (simd-v+)                ( a b rep -- c ) [ + ] components-2map ;
 | 
			
		||||
: (simd-v-)                ( a b rep -- c ) [ - ] components-2map ;
 | 
			
		||||
: (simd-vneg)              ( a   rep -- c ) [ neg ] components-map ;
 | 
			
		||||
:: (simd-v+-)              ( a b rep -- c ) 
 | 
			
		||||
    a b rep 2>rep-array :> ( a' b' )
 | 
			
		||||
    rep <rep-array> :> c'
 | 
			
		||||
    0  rep length 1 -  2 <range> [| n |
 | 
			
		||||
        n     a' nth-unsafe n     b' nth-unsafe -
 | 
			
		||||
        n     c' set-nth-unsafe
 | 
			
		||||
 | 
			
		||||
        n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
 | 
			
		||||
        n 1 + c' set-nth-unsafe
 | 
			
		||||
    ] each
 | 
			
		||||
    c' underlying>> ;
 | 
			
		||||
: (simd-vs+)               ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ;
 | 
			
		||||
: (simd-vs-)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
 | 
			
		||||
: (simd-vs*)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
 | 
			
		||||
: (simd-v*)                ( a b rep -- c ) [ * ] components-2map ;
 | 
			
		||||
: (simd-v/)                ( a b rep -- c ) [ / ] components-2map ;
 | 
			
		||||
: (simd-vmin)              ( a b rep -- c ) [ min ] components-2map ;
 | 
			
		||||
: (simd-vmax)              ( a b rep -- c ) [ max ] components-2map ;
 | 
			
		||||
: (simd-v.)                ( a b rep -- n )
 | 
			
		||||
    [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
 | 
			
		||||
    1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
 | 
			
		||||
: (simd-vsqrt)             ( a   rep -- c ) [ fsqrt ] components-map ;
 | 
			
		||||
: (simd-sum)               ( a   rep -- n ) [ + ] components-reduce ;
 | 
			
		||||
: (simd-vabs)              ( a   rep -- c ) [ abs ] components-map ;
 | 
			
		||||
: (simd-vbitand)           ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vbitandn)          ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vbitor)            ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vbitxor)           ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vbitnot)           ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
 | 
			
		||||
: (simd-vand)              ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vandn)             ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vor)               ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vxor)              ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
 | 
			
		||||
: (simd-vnot)              ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
 | 
			
		||||
: (simd-vlshift)           ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
 | 
			
		||||
: (simd-vrshift)           ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
 | 
			
		||||
: (simd-hlshift)           ( a n rep -- c )
 | 
			
		||||
    drop tail-slice 16 0 pad-tail ;
 | 
			
		||||
: (simd-hrshift)           ( a n rep -- c )
 | 
			
		||||
    drop head-slice 16 0 pad-head ;
 | 
			
		||||
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
 | 
			
		||||
: (simd-vshuffle-bytes)    ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
 | 
			
		||||
:: (simd-vmerge-head)      ( a b rep -- c )
 | 
			
		||||
    a b rep 2>rep-array :> ( a' b' )
 | 
			
		||||
    rep <rep-array> :> c'
 | 
			
		||||
    rep rep-length 2 /i iota [| n |
 | 
			
		||||
        n a' nth-unsafe n 2 *     c' set-nth-unsafe
 | 
			
		||||
        n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
 | 
			
		||||
    ] each
 | 
			
		||||
    c' underlying>> ;
 | 
			
		||||
:: (simd-vmerge-tail)      ( a b rep -- c )
 | 
			
		||||
    a b rep 2>rep-array :> ( a' b' )
 | 
			
		||||
    rep <rep-array> :> c'
 | 
			
		||||
    rep rep-length 2 /i :> len
 | 
			
		||||
    len iota [| n |
 | 
			
		||||
        n len + a' nth-unsafe n 2 *     c' set-nth-unsafe
 | 
			
		||||
        n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
 | 
			
		||||
    ] each
 | 
			
		||||
    c' underlying>> ;
 | 
			
		||||
: (simd-v<=)               ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ <= _ _ ? ] components-2map ; 
 | 
			
		||||
: (simd-v<)                ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ <  _ _ ? ] components-2map ;
 | 
			
		||||
: (simd-v=)                ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ =  _ _ ? ] components-2map ;
 | 
			
		||||
: (simd-v>)                ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ >  _ _ ? ] components-2map ;
 | 
			
		||||
: (simd-v>=)               ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ >= _ _ ? ] components-2map ;
 | 
			
		||||
: (simd-vunordered?)       ( a b rep -- c )
 | 
			
		||||
    dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
 | 
			
		||||
: (simd-vany?)             ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero? not ;
 | 
			
		||||
: (simd-vall?)             ( a   rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
 | 
			
		||||
: (simd-vnone?)            ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero?     ;
 | 
			
		||||
: (simd-v>float)           ( a   rep -- c )
 | 
			
		||||
    [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ;
 | 
			
		||||
: (simd-v>integer)         ( a   rep -- c )
 | 
			
		||||
    [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ;
 | 
			
		||||
: (simd-vpack-signed)      ( a b rep -- c )
 | 
			
		||||
    [ 2>rep-array cord-append ]
 | 
			
		||||
    [ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
 | 
			
		||||
    '[ _ c-type-clamp ] swap map-as ;
 | 
			
		||||
: (simd-vpack-unsigned)    ( a b rep -- c )
 | 
			
		||||
    [ 2>rep-array cord-append ]
 | 
			
		||||
    [ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
 | 
			
		||||
    '[ _ c-type-clamp ] swap map-as ;
 | 
			
		||||
: (simd-vunpack-head)      ( a   rep -- c ) 
 | 
			
		||||
    [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
 | 
			
		||||
    [ head-slice ] dip call( a' -- c' ) underlying>> ;
 | 
			
		||||
: (simd-vunpack-tail)      ( a   rep -- c )
 | 
			
		||||
    [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
 | 
			
		||||
    [ tail-slice ] dip call( a' -- c' ) underlying>> ;
 | 
			
		||||
: (simd-with)              (   n rep -- v )
 | 
			
		||||
    [ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as ;
 | 
			
		||||
: (simd-gather-2)          ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
 | 
			
		||||
: (simd-gather-4)          ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
 | 
			
		||||
: (simd-select)            ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
 | 
			
		||||
 | 
			
		||||
: alien-vector     (       c-ptr n rep -- value )
 | 
			
		||||
    [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
 | 
			
		||||
: set-alien-vector ( value c-ptr n rep --       )
 | 
			
		||||
    [ swap <displaced-alien> swap ] dip rep-size memcpy ;
 | 
			
		||||
 | 
			
		||||
"compiler.cfg.intrinsics.simd" require
 | 
			
		||||
"compiler.tree.propagation.simd" require
 | 
			
		||||
"compiler.cfg.value-numbering.simd" require
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
USING: classes.tuple.private cpu.architecture help.markup
 | 
			
		||||
help.syntax kernel.private math math.vectors
 | 
			
		||||
help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
 | 
			
		||||
sequences ;
 | 
			
		||||
IN: math.vectors.simd
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -163,7 +163,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
 | 
			
		|||
{ $list
 | 
			
		||||
    "They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
 | 
			
		||||
    "They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
 | 
			
		||||
    { "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." }
 | 
			
		||||
}
 | 
			
		||||
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
 | 
			
		||||
$nl
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -95,6 +95,9 @@ DEFER: simd-construct-op
 | 
			
		|||
PRIVATE>
 | 
			
		||||
>>
 | 
			
		||||
 | 
			
		||||
DEFER: simd-with
 | 
			
		||||
DEFER: simd-cast
 | 
			
		||||
 | 
			
		||||
<<
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue