diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 679cc09273..93eb2a8ecc 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,57 +1,76 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators fry sequences -compiler.tree.propagation.info cpu.architecture kernel words math +USING: accessors byte-arrays combinators continuations fry sequences +compiler.tree.propagation.info cpu.architecture kernel words make math math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd -{ - (simd-v+) - (simd-v-) - (simd-vneg) - (simd-vabs) - (simd-v+-) - (simd-v*) - (simd-v/) - (simd-vmin) - (simd-vmax) - (simd-sum) - (simd-vsqrt) - (simd-vbitand) - (simd-vbitandn) - (simd-vbitor) - (simd-vbitxor) - (simd-vbitnot) - (simd-vand) - (simd-vandn) - (simd-vor) - (simd-vxor) - (simd-vnot) - (simd-vlshift) - (simd-vrshift) - (simd-hlshift) - (simd-hrshift) - (simd-vshuffle-bytes) - (simd-vshuffle-elements) - (simd-vmerge-head) - (simd-vmerge-tail) - (simd-v>float) - (simd-v>integer) - (simd-vpack-signed) - (simd-vpack-unsigned) - (simd-vunpack-head) - (simd-vunpack-tail) - (simd-v<=) - (simd-v<) - (simd-v=) - (simd-v>) - (simd-v>=) - (simd-vunordered?) - (simd-with) - (simd-gather-2) - (simd-gather-4) - alien-vector -} [ { byte-array } "default-output-classes" set-word-prop ] each +CONSTANT: vector>vector-intrinsics + { + (simd-v+) + (simd-v-) + (simd-vneg) + (simd-v+-) + (simd-vs+) + (simd-vs-) + (simd-vs*) + (simd-v*) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-vsqrt) + (simd-vabs) + (simd-vbitand) + (simd-vbitandn) + (simd-vbitor) + (simd-vbitxor) + (simd-vbitnot) + (simd-vand) + (simd-vandn) + (simd-vor) + (simd-vxor) + (simd-vnot) + (simd-vlshift) + (simd-vrshift) + (simd-hlshift) + (simd-hrshift) + (simd-vshuffle-elements) + (simd-vshuffle-bytes) + (simd-vmerge-head) + (simd-vmerge-tail) + (simd-v<=) + (simd-v<) + (simd-v=) + (simd-v>) + (simd-v>=) + (simd-vunordered?) + (simd-v>float) + (simd-v>integer) + (simd-vpack-signed) + (simd-vpack-unsigned) + (simd-vunpack-head) + (simd-vunpack-tail) + (simd-with) + (simd-gather-2) + (simd-gather-4) + alien-vector + } + +CONSTANT: vector-other-intrinsics + { + (simd-v.) + (simd-sum) + (simd-vany?) + (simd-vall?) + (simd-vnone?) + (simd-select) + set-alien-vector + } + +: vector-intrinsics ( -- x ) + vector>vector-intrinsics vector-other-intrinsics append ; + +vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each : scalar-output-class ( rep -- class ) dup literal?>> [ @@ -79,12 +98,16 @@ IN: compiler.tree.propagation.simd real [0,inf] value-info-intersect ] "outputs" set-word-prop -! If SIMD is not available, inline alien-vector and set-alien-vector -! to get a speedup +: try-intrinsic ( node intrinsic-quot -- ? ) + '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ; + : inline-unless-intrinsic ( word -- ) - dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ] + dup '[ + _ swap over "intrinsic" word-prop + ! word node intrinsic + [ try-intrinsic [ drop f ] [ def>> ] if ] + [ def>> ] if* + ] "custom-inlining" set-word-prop ; -\ alien-vector inline-unless-intrinsic - -\ set-alien-vector inline-unless-intrinsic +vector-intrinsics [ inline-unless-intrinsic ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 86d68caa66..6631cec189 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -105,7 +105,7 @@ scalar-rep ; { ushort-scalar-rep short-scalar-rep } { uint-scalar-rep int-scalar-rep } { ulonglong-scalar-rep longlong-scalar-rep } - } ?at drop ; + } ?at drop ; foldable : widen-vector-rep ( rep -- rep' ) { @@ -116,7 +116,18 @@ scalar-rep ; { ushort-8-rep uint-4-rep } { uint-4-rep ulonglong-2-rep } { float-4-rep double-2-rep } - } at ; + } at ; foldable + +: narrow-vector-rep ( rep -- rep' ) + { + { short-8-rep char-16-rep } + { int-4-rep short-8-rep } + { longlong-2-rep int-4-rep } + { ushort-8-rep uchar-16-rep } + { uint-4-rep ushort-8-rep } + { ulonglong-2-rep uint-4-rep } + { double-2-rep float-4-rep } + } at ; foldable ! Register classes SINGLETONS: int-regs float-regs ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor new file mode 100644 index 0000000000..a236db00c9 --- /dev/null +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -0,0 +1,232 @@ +! (c)2009 Slava Pestov, Joe Groff bsd license +USING: accessors alien alien.c-types alien.data combinators +cords cpu.architecture fry generalizations kernel libc locals +math math.libm math.order math.ranges math.vectors sequences +sequences.private specialized-arrays vocabs.loader ; +QUALIFIED-WITH: alien.c-types c +SPECIALIZED-ARRAYS: + c:char c:short c:int c:longlong + c:uchar c:ushort c:uint c:ulonglong + c:float c:double ; +IN: math.vectors.simd.intrinsics + +: assert-positive ( x -- y ) ; + +bitwise-vector-rep ( rep -- rep' ) + { + { float-4-rep [ uint-4-rep ] } + { double-2-rep [ ulonglong-2-rep ] } + [ ] + } case ; foldable + +: >uint-vector-rep ( rep -- rep' ) + { + { longlong-2-rep [ ulonglong-2-rep ] } + { int-4-rep [ uint-4-rep ] } + { short-8-rep [ ushort-8-rep ] } + { char-16-rep [ uchar-16-rep ] } + [ ] + } case ; foldable + +: >int-vector-rep ( rep -- rep' ) + { + { float-4-rep [ int-4-rep ] } + { double-2-rep [ longlong-2-rep ] } + } case ; foldable + +: >float-vector-rep ( rep -- rep' ) + { + { int-4-rep [ float-4-rep ] } + { longlong-2-rep [ double-2-rep ] } + } case ; foldable + +: [byte>rep-array] ( rep -- class ) + { + { char-16-rep [ [ byte-array>char-array ] ] } + { uchar-16-rep [ [ byte-array>uchar-array ] ] } + { short-8-rep [ [ byte-array>short-array ] ] } + { ushort-8-rep [ [ byte-array>ushort-array ] ] } + { int-4-rep [ [ byte-array>int-array ] ] } + { uint-4-rep [ [ byte-array>uint-array ] ] } + { longlong-2-rep [ [ byte-array>longlong-array ] ] } + { ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] } + { float-4-rep [ [ byte-array>float-array ] ] } + { double-2-rep [ [ byte-array>double-array ] ] } + } case ; foldable + +: [>rep-array] ( rep -- class ) + { + { char-16-rep [ [ >char-array ] ] } + { uchar-16-rep [ [ >uchar-array ] ] } + { short-8-rep [ [ >short-array ] ] } + { ushort-8-rep [ [ >ushort-array ] ] } + { int-4-rep [ [ >int-array ] ] } + { uint-4-rep [ [ >uint-array ] ] } + { longlong-2-rep [ [ >longlong-array ] ] } + { ulonglong-2-rep [ [ >ulonglong-array ] ] } + { float-4-rep [ [ >float-array ] ] } + { double-2-rep [ [ >double-array ] ] } + } case ; foldable + +: [] ( rep -- class ) + { + { char-16-rep [ [ 16 (char-array) ] ] } + { uchar-16-rep [ [ 16 (uchar-array) ] ] } + { short-8-rep [ [ 8 (short-array) ] ] } + { ushort-8-rep [ [ 8 (ushort-array) ] ] } + { int-4-rep [ [ 4 (int-array) ] ] } + { uint-4-rep [ [ 4 (uint-array) ] ] } + { longlong-2-rep [ [ 2 (longlong-array) ] ] } + { ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] } + { float-4-rep [ [ 4 (float-array) ] ] } + { double-2-rep [ [ 2 (double-array) ] ] } + } case ; foldable + +: rep-tf-values ( rep -- t f ) + float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ; + +: >rep-array ( a rep -- a' ) + [byte>rep-array] call( a -- a' ) ; inline +: 2>rep-array ( a b rep -- a' b' ) + [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline +: ( rep -- a' ) + [] call( -- a' ) ; inline + +: components-map ( a rep quot -- c ) + [ >rep-array ] dip map underlying>> ; inline +: components-2map ( a b rep quot -- c ) + [ 2>rep-array ] dip 2map underlying>> ; inline +: components-reduce ( a rep quot -- x ) + [ >rep-array [ ] ] dip map-reduce ; inline + +: bitwise-components-map ( a rep quot -- c ) + [ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline +: bitwise-components-2map ( a b rep quot -- c ) + [ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline +: bitwise-components-reduce ( a rep quot -- x ) + [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline + +:: (vshuffle) ( a elts rep -- c ) + a rep >rep-array :> a' + rep :> c' + elts [| from to | + from a' nth-unsafe + rep rep-length 1 - bitand + to c' set-nth-unsafe + ] each-index + c' underlying>> ; inline + +PRIVATE> + +: (simd-v+) ( a b rep -- c ) [ + ] components-2map ; +: (simd-v-) ( a b rep -- c ) [ - ] components-2map ; +: (simd-vneg) ( a rep -- c ) [ neg ] components-map ; +:: (simd-v+-) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + 0 rep length 1 - 2 [| n | + n a' nth-unsafe n b' nth-unsafe - + n c' set-nth-unsafe + + n 1 + a' nth-unsafe n 1 + b' nth-unsafe + + n 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +: (simd-vs+) ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ; +: (simd-vs-) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; +: (simd-vs*) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; +: (simd-v*) ( a b rep -- c ) [ * ] components-2map ; +: (simd-v/) ( a b rep -- c ) [ / ] components-2map ; +: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ; +: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ; +: (simd-v.) ( a b rep -- n ) + [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep + 1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ; +: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ; +: (simd-sum) ( a rep -- n ) [ + ] components-reduce ; +: (simd-vabs) ( a rep -- c ) [ abs ] components-map ; +: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; +: (simd-vbitandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; +: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; +: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; +: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; +: (simd-vandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; +: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; +: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; +: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ; +: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ; +: (simd-hlshift) ( a n rep -- c ) + drop tail-slice 16 0 pad-tail ; +: (simd-hrshift) ( a n rep -- c ) + drop head-slice 16 0 pad-head ; +: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ; +: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ; +:: (simd-vmerge-head) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + rep rep-length 2 /i iota [| n | + n a' nth-unsafe n 2 * c' set-nth-unsafe + n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +:: (simd-vmerge-tail) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + rep rep-length 2 /i :> len + len iota [| n | + n len + a' nth-unsafe n 2 * c' set-nth-unsafe + n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +: (simd-v<=) ( a b rep -- c ) + dup rep-tf-values '[ <= _ _ ? ] components-2map ; +: (simd-v<) ( a b rep -- c ) + dup rep-tf-values '[ < _ _ ? ] components-2map ; +: (simd-v=) ( a b rep -- c ) + dup rep-tf-values '[ = _ _ ? ] components-2map ; +: (simd-v>) ( a b rep -- c ) + dup rep-tf-values '[ > _ _ ? ] components-2map ; +: (simd-v>=) ( a b rep -- c ) + dup rep-tf-values '[ >= _ _ ? ] components-2map ; +: (simd-vunordered?) ( a b rep -- c ) + dup rep-tf-values '[ unordered? _ _ ? ] components-2map ; +: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ; +: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ; +: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ; +: (simd-v>float) ( a rep -- c ) + [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ; +: (simd-v>integer) ( a rep -- c ) + [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ; +: (simd-vpack-signed) ( a b rep -- c ) + [ 2>rep-array cord-append ] + [ narrow-vector-rep [ [] ] [ rep-component-type ] bi ] bi + '[ _ c-type-clamp ] swap map-as ; +: (simd-vpack-unsigned) ( a b rep -- c ) + [ 2>rep-array cord-append ] + [ narrow-vector-rep >uint-vector-rep [ [] ] [ rep-component-type ] bi ] bi + '[ _ c-type-clamp ] swap map-as ; +: (simd-vunpack-head) ( a rep -- c ) + [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi + [ head-slice ] dip call( a' -- c' ) underlying>> ; +: (simd-vunpack-tail) ( a rep -- c ) + [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi + [ tail-slice ] dip call( a' -- c' ) underlying>> ; +: (simd-with) ( n rep -- v ) + [ rep-length iota swap '[ _ ] ] [ ] bi replicate-as ; +: (simd-gather-2) ( m n rep -- v ) [ 2 set-firstn ] keep underlying>> ; +: (simd-gather-4) ( m n o p rep -- v ) [ 4 set-firstn ] keep underlying>> ; +: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ; + +: alien-vector ( c-ptr n rep -- value ) + [ swap ] dip rep-size memory>byte-array ; +: set-alien-vector ( value c-ptr n rep -- ) + [ swap swap ] dip rep-size memcpy ; + +"compiler.cfg.intrinsics.simd" require +"compiler.tree.propagation.simd" require +"compiler.cfg.value-numbering.simd" require + diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d600b0bc24..8aeea4267d 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,5 +1,5 @@ USING: classes.tuple.private cpu.architecture help.markup -help.syntax kernel.private math math.vectors +help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics sequences ; IN: math.vectors.simd @@ -163,7 +163,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" { $list "They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result." "They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor." - { "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." } } "The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD." $nl diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 0e40e787df..5289f3f393 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -95,6 +95,9 @@ DEFER: simd-construct-op PRIVATE> >> +DEFER: simd-with +DEFER: simd-cast + <<