removed: redadd these.

modern-harvey2
Doug Coleman 2017-12-28 16:03:25 -08:00
parent 032e819f3c
commit 7ccaf78071
26 changed files with 0 additions and 1755 deletions

View File

@ -1,4 +0,0 @@
USING: math.vectors.simd math.vectors.simd.cords tools.test ;
IN: math.vectors.simd.cords.tests
{ float-4{ 1.0 2.0 3.0 4.0 } } [ double-4{ 1.0 2.0 3.0 4.0 } >float-4 ] unit-test

View File

@ -1,97 +0,0 @@
USING: accessors alien.c-types arrays byte-arrays
cpu.architecture effects functors generalizations kernel lexer
math math.vectors.simd math.vectors.simd.intrinsics parser
prettyprint.custom quotations sequences sequences.cords words
classes ;
IN: math.vectors.simd.cords
<<
<PRIVATE
<FUNCTOR: (define-simd-128-cord) ( A/2 A -- )
A-rep IS ${A/2}-rep
>A/2 IS >${A/2}
A/2-boa IS ${A/2}-boa
A/2-with IS ${A/2}-with
A/2-cast IS ${A/2}-cast
>A DEFINES >${A}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
A-cast DEFINES ${A}-cast
A{ DEFINES ${A}{
N [ A-rep rep-length ]
BOA-EFFECT [ N 2 * "n" <array> { "v" } <effect> ]
WHERE
: >A ( seq -- A )
[ N head-slice >A/2 ]
[ N tail-slice >A/2 ] bi cord-append ;
\ A-boa
{ N ndip A/2-boa cord-append } { A/2-boa } >quotation prefix >quotation
BOA-EFFECT define-inline
: A-with ( n -- v )
[ A/2-with ] [ A/2-with ] bi cord-append ; inline
: A-cast ( v -- v' )
[ A/2-cast ] cord-map ; inline
M: A new-sequence
2drop
N A/2 new new-sequence
N A/2 new new-sequence
\ A boa ;
M: A like
over \ A instance? [ drop ] [ call-next-method ] if ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
M: A pprint-delims drop \ A{ \ } ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
<c-type>
byte-array >>class
A >>boxed-class
[
[ A-rep alien-vector A/2 boa ]
[ 16 + A-rep alien-vector A/2 boa ] 2bi cord-append
] >>getter
[
[ [ head>> underlying>> ] 2dip A-rep set-alien-vector ]
[ [ tail>> underlying>> ] 2dip 16 + A-rep set-alien-vector ] 3bi
] >>setter
32 >>size
16 >>align
A-rep >>rep
\ A typedef
;FUNCTOR>
: define-simd-128-cord ( A/2 T -- )
[ define-specialized-cord ]
[ create-word-in (define-simd-128-cord) ] 2bi ;
SYNTAX: \SIMD-128-CORD:
scan-word scan-token define-simd-128-cord ;
PRIVATE>
>>
SIMD-128-CORD: char-16 char-32
SIMD-128-CORD: uchar-16 uchar-32
SIMD-128-CORD: short-8 short-16
SIMD-128-CORD: ushort-8 ushort-16
SIMD-128-CORD: int-4 int-8
SIMD-128-CORD: uint-4 uint-8
SIMD-128-CORD: longlong-2 longlong-4
SIMD-128-CORD: ulonglong-2 ulonglong-4
SIMD-128-CORD: float-4 float-8
SIMD-128-CORD: double-2 double-4

View File

@ -1,12 +0,0 @@
USING: help.markup help.syntax kernel sequences ;
IN: math.vectors.simd.intrinsics
HELP: (simd-select)
{ $values { "a" object } { "n" object } { "rep" object } { "x" object } }
{ $description "Word which implements " { $link nth } " for SIMD vectors." }
{ $examples
{ $unchecked-example
"float-4{ 3 4 9 1 } underlying>> 2 float-4-rep (simd-select)"
"9.0"
}
} ;

View File

@ -1,285 +0,0 @@
! Copyright (C) 2009 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.data combinators cpu.architecture fry
grouping kernel libc locals math math.libm math.order math.ranges
sequences sequences.cords sequences.generalizations sequences.private
sequences.unrolled sequences.unrolled.private specialized-arrays
vocabs ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAYS:
c:char c:short c:int c:longlong
c:uchar c:ushort c:uint c:ulonglong
c:float c:double ;
IN: math.vectors.simd.intrinsics
! Word props are added later
: assert-positive ( x -- y ) ;
<PRIVATE
: >bitwise-vector-rep ( rep -- rep' )
{
{ float-4-rep [ uint-4-rep ] }
{ double-2-rep [ ulonglong-2-rep ] }
[ ]
} case ; foldable
: >uint-vector-rep ( rep -- rep' )
{
{ longlong-2-rep [ ulonglong-2-rep ] }
{ int-4-rep [ uint-4-rep ] }
{ short-8-rep [ ushort-8-rep ] }
{ char-16-rep [ uchar-16-rep ] }
[ ]
} case ; foldable
: >int-vector-rep ( rep -- rep' )
{
{ float-4-rep [ int-4-rep ] }
{ double-2-rep [ longlong-2-rep ] }
} case ; foldable
: >float-vector-rep ( rep -- rep' )
{
{ int-4-rep [ float-4-rep ] }
{ longlong-2-rep [ double-2-rep ] }
} case ; foldable
: byte>rep-array ( byte-array rep -- array )
{
{ char-16-rep [ 16 c:char <c-direct-array> ] }
{ uchar-16-rep [ 16 c:uchar <c-direct-array> ] }
{ short-8-rep [ 8 c:short <c-direct-array> ] }
{ ushort-8-rep [ 8 c:ushort <c-direct-array> ] }
{ int-4-rep [ 4 c:int <c-direct-array> ] }
{ uint-4-rep [ 4 c:uint <c-direct-array> ] }
{ longlong-2-rep [ 2 c:longlong <c-direct-array> ] }
{ ulonglong-2-rep [ 2 c:ulonglong <c-direct-array> ] }
{ float-4-rep [ 4 c:float <c-direct-array> ] }
{ double-2-rep [ 2 c:double <c-direct-array> ] }
} case ; inline
: >rep-array ( seq rep -- array )
{
{ char-16-rep [ c:char >c-array ] }
{ uchar-16-rep [ c:uchar >c-array ] }
{ short-8-rep [ c:short >c-array ] }
{ ushort-8-rep [ c:ushort >c-array ] }
{ int-4-rep [ c:int >c-array ] }
{ uint-4-rep [ c:uint >c-array ] }
{ longlong-2-rep [ c:longlong >c-array ] }
{ ulonglong-2-rep [ c:ulonglong >c-array ] }
{ float-4-rep [ c:float >c-array ] }
{ double-2-rep [ c:double >c-array ] }
} case ; inline
: <rep-array> ( rep -- array )
{
{ char-16-rep [ 16 c:char (c-array) ] }
{ uchar-16-rep [ 16 c:uchar (c-array) ] }
{ short-8-rep [ 8 c:short (c-array) ] }
{ ushort-8-rep [ 8 c:ushort (c-array) ] }
{ int-4-rep [ 4 c:int (c-array) ] }
{ uint-4-rep [ 4 c:uint (c-array) ] }
{ longlong-2-rep [ 2 c:longlong (c-array) ] }
{ ulonglong-2-rep [ 2 c:ulonglong (c-array) ] }
{ float-4-rep [ 4 c:float (c-array) ] }
{ double-2-rep [ 2 c:double (c-array) ] }
} case ; inline
: rep-tf-values ( rep -- t f )
float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
: 2byte>rep-array ( a b rep -- a' b' )
'[ _ byte>rep-array ] bi@ ; inline
: components-map ( a rep quot -- c )
[ [ byte>rep-array ] [ rep-length ] bi ] dip unrolled-map-unsafe underlying>> ; inline
: components-2map ( a b rep quot -- c )
[ [ 2byte>rep-array ] [ rep-length ] bi ] dip unrolled-2map-unsafe underlying>> ; inline
: components-reduce ( a rep quot -- x )
[ byte>rep-array [ ] ] dip map-reduce ; inline
: bitwise-components-map ( a rep quot -- c )
[ >bitwise-vector-rep [ byte>rep-array ] [ rep-length ] bi ] dip
unrolled-map-unsafe underlying>> ; inline
: bitwise-components-2map ( a b rep quot -- c )
[ >bitwise-vector-rep [ 2byte>rep-array ] [ rep-length ] bi ] dip
unrolled-2map-unsafe underlying>> ; inline
: bitwise-components-reduce ( a rep quot -- x )
[ >bitwise-vector-rep byte>rep-array [ ] ] dip map-reduce ; inline
: bitwise-components-reduce* ( a rep identity quot -- x )
[ >bitwise-vector-rep byte>rep-array ] 2dip reduce ; inline
:: (vshuffle) ( a elts rep -- c )
a rep byte>rep-array :> a'
rep <rep-array> :> c'
elts rep rep-length |[ from to |
from rep rep-length 1 - bitand
a' nth-unsafe
to c' set-nth-unsafe
] unrolled-each-index-unsafe
c' underlying>> ; inline
:: (vshuffle2) ( a b elts rep -- c )
a rep byte>rep-array :> a'
b rep byte>rep-array :> b'
a' b' cord-append :> ab'
rep <rep-array> :> c'
elts rep rep-length |[ from to |
from rep rep-length dup + 1 - bitand
ab' nth-unsafe
to c' set-nth-unsafe
] unrolled-each-index-unsafe
c' underlying>> ; inline
GENERIC: native/ ( x y -- x/y )
M: integer native/ /i ; inline
M: float native/ /f ; inline
: (vgetmask) ( a rep -- b )
0 [ [ 1 shift ] [ zero? 0 1 ? ] bi* bitor ] bitwise-components-reduce* ; inline
PRIVATE>
: (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
: (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
: (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
:: (simd-v+-) ( a b rep -- c )
a b rep 2byte>rep-array :> ( a' b' )
rep <rep-array> :> c'
0 rep rep-length [ 1 - 2 <range> ] [ 2 /i ] bi |[ n |
n a' nth-unsafe n b' nth-unsafe -
n c' set-nth-unsafe
n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
n 1 + c' set-nth-unsafe
] unrolled-each-unsafe
c' underlying>> ;
: (simd-vs+) ( a b rep -- c )
dup rep-component-type '[ + _ c:c-type-clamp ] components-2map ;
: (simd-vs-) ( a b rep -- c )
dup rep-component-type '[ - _ c:c-type-clamp ] components-2map ;
: (simd-vs*) ( a b rep -- c )
dup rep-component-type '[ * _ c:c-type-clamp ] components-2map ;
: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
: (simd-v*high) ( a b rep -- c )
dup rep-component-type c:heap-size -8 * '[ * _ shift ] components-2map ;
:: (simd-v*hs+) ( a b rep -- c )
rep { char-16-rep uchar-16-rep } member-eq?
[ uchar-16-rep char-16-rep ]
[ rep rep ] if :> ( a-rep b-rep )
b-rep widen-vector-rep signed-rep :> wide-rep
wide-rep rep-component-type :> wide-type
a a-rep byte>rep-array 2 <groups> :> a'
b b-rep byte>rep-array 2 <groups> :> b'
a' b' rep rep-length 2 /i [
[ [ first ] bi@ * ]
[ [ second ] bi@ * ] 2bi +
wide-type c:c-type-clamp
] wide-rep <rep-array> unrolled-2map-as-unsafe underlying>> ;
: (simd-v/) ( a b rep -- c ) [ native/ ] components-2map ;
: (simd-vavg) ( a b rep -- c )
[ + dup integer? [ 1 + -1 shift ] [ 0.5 * ] if ] components-2map ;
: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
! XXX
: (simd-v.) ( a b rep -- n )
[ 2byte>rep-array [ [ first ] bi@ * ] 2keep ] keep
1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] 2with each ;
: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
: (simd-vsad) ( a b rep -- c ) 2byte>rep-array [ - abs ] [ + ] 2map-reduce ;
: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
: (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
: (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
! XXX
: (simd-hlshift) ( a n rep -- c )
drop head-slice* 16 0 pad-head ;
! XXX
: (simd-hrshift) ( a n rep -- c )
drop tail-slice 16 0 pad-tail ;
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle2-elements) ( a b n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle2) ;
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
:: (simd-vmerge-head) ( a b rep -- c )
a b rep 2byte>rep-array :> ( a' b' )
rep <rep-array> :> c'
rep rep-length 2 /i |[ n |
n a' nth-unsafe n 2 * c' set-nth-unsafe
n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
] unrolled-each-integer
c' underlying>> ;
:: (simd-vmerge-tail) ( a b rep -- c )
a b rep 2byte>rep-array :> ( a' b' )
rep <rep-array> :> c'
rep rep-length 2 /i :> len
len |[ n |
n len + a' nth-unsafe n 2 * c' set-nth-unsafe
n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
] unrolled-each-integer
c' underlying>> ;
: (simd-v<=) ( a b rep -- c )
dup rep-tf-values '[ <= _ _ ? ] components-2map ;
: (simd-v<) ( a b rep -- c )
dup rep-tf-values '[ < _ _ ? ] components-2map ;
: (simd-v=) ( a b rep -- c )
dup rep-tf-values '[ = _ _ ? ] components-2map ;
: (simd-v>) ( a b rep -- c )
dup rep-tf-values '[ > _ _ ? ] components-2map ;
: (simd-v>=) ( a b rep -- c )
dup rep-tf-values '[ >= _ _ ? ] components-2map ;
: (simd-vunordered?) ( a b rep -- c )
dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
: (simd-vgetmask) ( a rep -- n )
{ float-4-rep double-2-rep } member?
[ uint-4-rep (vgetmask) ] [ uchar-16-rep (vgetmask) ] if ;
: (simd-v>float) ( a rep -- c )
[ [ byte>rep-array ] [ rep-length ] bi [ >float ] ]
[ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
: (simd-v>integer) ( a rep -- c )
[ [ byte>rep-array ] [ rep-length ] bi [ >integer ] ]
[ >int-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
: (simd-vpack-signed) ( a b rep -- c )
[ [ 2byte>rep-array cord-append ] [ rep-length 2 * ] bi ]
[ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
'[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
: (simd-vpack-unsigned) ( a b rep -- c )
[ [ 2byte>rep-array cord-append ] [ rep-length 2 * ] bi ]
[ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
'[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
: (simd-vunpack-head) ( a rep -- c )
[ byte>rep-array ] [ widen-vector-rep [ rep-length ] [ '[ _ >rep-array ] ] bi ] bi
[ head-slice ] dip call( a' -- c' ) underlying>> ;
: (simd-vunpack-tail) ( a rep -- c )
[ byte>rep-array ] [ widen-vector-rep [ rep-length ] [ '[ _ >rep-array ] ] bi ] bi
[ tail-slice ] dip call( a' -- c' ) underlying>> ;
: (simd-with) ( n rep -- v )
[ rep-length swap '[ _ ] ] [ <rep-array> ] bi replicate-as
underlying>> ;
: (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn-unsafe ] keep underlying>> ;
: (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn-unsafe ] keep underlying>> ;
: (simd-select) ( a n rep -- x ) swapd byte>rep-array nth-unsafe ;
: alien-vector ( c-ptr n rep -- value )
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
: set-alien-vector ( value c-ptr n rep -- )
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
"compiler.cfg.intrinsics.simd" require
"compiler.tree.propagation.simd" require
"compiler.cfg.value-numbering.simd" require

View File

@ -1,2 +0,0 @@
USING: math.vectors.simd mirrors ;
IN: math.vectors.simd.mirrors

View File

@ -1,205 +0,0 @@
USING: classes.tuple.private cpu.architecture help.markup
help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
sequences ;
IN: math.vectors.simd
ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
"Modern CPUs support a form of data-level parallelism, where arithmetic operations on fixed-size short vectors can be done on all components in parallel. This is known as single-instruction-multiple-data (SIMD)."
$nl
"SIMD support in the processor takes the form of instruction sets which operate on vector registers. By operating on multiple scalar values at the same time, code which operates on points, colors, and other vector data can be sped up."
$nl
"In Factor, SIMD support is exposed in the form of special-purpose SIMD " { $link "sequence-protocol" } " implementations. These are fixed-length, homogeneous sequences. They are referred to as vectors, but should not be confused with Factor's " { $link "vectors" } ", which can hold any type of object and can be resized."
$nl
"The words in the " { $vocab-link "math.vectors" } " vocabulary, which can be used with any sequence of numbers, are special-cased by the compiler. If the compiler can prove that only SIMD vectors are used, it expands " { $link "math-vectors" } " into " { $link "math.vectors.simd.intrinsics" } ". While in the general case, SIMD intrinsics operate on heap-allocated SIMD vectors, that too can be optimized since in many cases the compiler unbox SIMD vectors, storing them directly in registers."
$nl
"Since the only difference between ordinary code and SIMD-accelerated code is that the latter uses special fixed-length SIMD sequences, the SIMD library is very easy to use. To ensure your code compiles to use vector instructions without boxing and unboxing overhead, follow the guidelines for " { $link "math.vectors.simd.efficiency" } "."
$nl
"There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
$nl
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } ")."
$nl
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } ") and integer SIMD (all types). Integer SIMD is missing a few features; in particular, the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
$nl
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
$nl
"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types, a faster instruction for " { $link v. } ", and a few other things."
$nl
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
$nl
"The primitives in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl
"The following 128-bit vector types are defined in the " { $vocab-link "math.vectors.simd" } " vocabulary:"
{ $code
"char-16"
"uchar-16"
"short-8"
"ushort-8"
"int-4"
"uint-4"
"longlong-2"
"ulonglong-2"
"float-4"
"double-2"
}
"Double-width 256-bit vector types are defined in the " { $vocab-link "math.vectors.simd.cords" } " vocabulary:"
{ $code
"char-32"
"uchar-32"
"short-16"
"ushort-16"
"int-8"
"uint-8"
"longlong-4"
"ulonglong-4"
"float-8"
"double-4"
} ;
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
"For each SIMD vector type, several words are defined, where " { $snippet "type" } " is the type in question:"
{ $table
{ "Word" "Stack effect" "Description" }
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
{ { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" }
{ { $snippet "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
}
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
{ $see-also "c-types-specs" } ;
ARTICLE: "math.vectors.simd.efficiency" "Writing efficient SIMD code"
"Since SIMD vectors are heap-allocated objects, it is important to write code in a style which is conducive to the compiler being able to inline generic dispatch and eliminate allocation."
$nl
"If the inputs to a " { $vocab-link "math.vectors" } " word are statically known to be SIMD vectors, the call is converted into an SIMD primitive, and the output is then also known to be an SIMD vector (or scalar, depending on the operation); this information propagates forward within a single word (together with any inlined words and macro expansions). Any intermediate values which are not stored into collections, or returned from the word, are furthermore unboxed."
$nl
"To check if optimizations are being performed, pass a quotation to the " { $snippet "optimizer-report." } " and " { $snippet "optimized." } " words in the " { $vocab-link "compiler.tree.debugger" } " vocabulary, and look for calls to " { $link "math.vectors.simd.intrinsics" } " as opposed to high-level " { $link "math-vectors" } "."
$nl
"For example, in the following, no SIMD operations are used at all, because the compiler's propagation pass does not consider dynamic variable usage:"
{ $code
"USING: compiler.tree.debugger math.vectors
math.vectors.simd ;
SYMBOLS: x y ;
[
float-4{ 1.5 2.0 3.7 0.4 } x set
float-4{ 1.5 2.0 3.7 0.4 } y set
x get y get v+
] optimizer-report." }
"The following word benefits from SIMD optimization, because it begins with an unsafe declaration:"
{ $code
"USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ;
IN: simd-demo
: interpolate ( v a b -- w )
{ float-4 float-4 float-4 } declare
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
\\ interpolate optimizer-report." }
"Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link postpone: inline } " declarations."
$nl
"Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:"
{ $code
"USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ;
IN: simd-demo
: interpolate ( v a b -- w )
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
HINTS: interpolate float-4 float-4 float-4 ;
\\ interpolate optimizer-report. " }
"This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives."
$nl
"If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link postpone: inline } "."
$nl
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code
"USING: compiler.tree.debugger math.vectors math.vectors.simd ;
IN: simd-demo
STRUCT: actor
{ id int }
{ position float-4 }
{ velocity float-4 }
{ acceleration float-4 } ;
GENERIC: advance ( dt object -- )
: update-velocity ( dt actor -- )
[ acceleration>> n*v ] [ velocity>> v+ ] [ ] tri
velocity<< ; inline
: update-position ( dt actor -- )
[ velocity>> n*v ] [ position>> v+ ] [ ] tri
position<< ; inline
M: actor advance ( dt actor -- )
[ >float ] dip
[ update-velocity ] [ update-position ] 2bi ;
M\\ actor advance optimized."
}
"The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "regs." } " on a word or quotation:"
{ $code
"USE: compiler.tree.debugger
M\\ actor advance regs." }
"Example of a high-performance algorithms that use SIMD primitives can be found in the following vocabularies:"
{ $list
{ $vocab-link "benchmark.nbody-simd" }
{ $vocab-link "benchmark.raytracer-simd" }
{ $vocab-link "random.sfmt" }
} ;
ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
"The words in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary are used to implement SIMD support. These words have three disadvantages compared to the higher-level " { $link "math-vectors" } " words:"
{ $list
"They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
"They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
}
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
$nl
"It is best to avoid calling SIMD primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
$nl
"There are two primitives which are used to implement accessing SIMD vector fields of " { $link "classes.struct" } ":"
{ $subsections
alien-vector
set-alien-vector
}
"For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
$nl
"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
$nl
"In particular, horizontal operations on " { $snippet "float-4" } " vectors are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
{ $subsections
"math.vectors.simd.intro"
"math.vectors.simd.types"
"math.vectors.simd.words"
"math.vectors.simd.support"
"math.vectors.simd.accuracy"
"math.vectors.simd.efficiency"
"math.vectors.simd.alien"
"math.vectors.simd.intrinsics"
} ;
ABOUT: "math.vectors.simd"

View File

@ -1,775 +0,0 @@
USING: accessors arrays classes compiler.test compiler.tree.debugger
effects fry io kernel kernel.private math math.functions
math.private math.vectors math.vectors.simd math.ranges
math.vectors.simd.private prettyprint random sequences system
tools.test vocabs assocs compiler.cfg.debugger words
locals combinators cpu.architecture namespaces byte-arrays alien
specialized-arrays classes.struct eval classes.algebra sets
quotations math.constants compiler.units splitting math.matrices
math.vectors.simd.cords alien.data ;
FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAY: c:float
IN: math.vectors.simd.tests
! Test type propagation
{ V{ float } } [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
{ V{ float } } [ [ { float-4 } declare norm ] final-classes ] unit-test
{ V{ float-4 } } [ [ { float-4 } declare normalize ] final-classes ] unit-test
{ V{ float-4 } } [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test
{ V{ float } } [ [ { float-4 } declare second ] final-classes ] unit-test
{ V{ int-4 } } [ [ { int-4 int-4 } declare v+ ] final-classes ] unit-test
{ t } [ [ { int-4 } declare second ] final-classes first integer class<= ] unit-test
{ V{ longlong-2 } } [ [ { longlong-2 longlong-2 } declare v+ ] final-classes ] unit-test
{ V{ integer } } [ [ { longlong-2 } declare second ] final-classes ] unit-test
! Test puns; only on x86
cpu x86? [
[ double-2{ 4 1024 } ] [
float-4{ 0 1 0 2 }
[ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call
] unit-test
] when
! Fuzz testing
CONSTANT: simd-classes
{
char-16
uchar-16
short-8
ushort-8
int-4
uint-4
longlong-2
ulonglong-2
float-4
double-2
}
SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
CONSTANT: vector-words
H{
{ [v-] { +vector+ +vector+ -> +vector+ } }
{ distance { +vector+ +vector+ -> +nonnegative+ } }
{ n*v { +scalar+ +vector+ -> +vector+ } }
{ n+v { +scalar+ +vector+ -> +vector+ } }
{ n-v { +scalar+ +vector+ -> +vector+ } }
{ n/v { +scalar+ +vector+ -> +vector+ } }
{ norm { +vector+ -> +nonnegative+ } }
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v*high { +vector+ +vector+ -> +vector+ } }
{ v*hs+ { +vector+ +vector+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ vsad { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
{ v/n { +vector+ +scalar+ -> +vector+ } }
{ vceiling { +vector+ -> +vector+ } }
{ vfloor { +vector+ -> +vector+ } }
{ vmax { +vector+ +vector+ -> +vector+ } }
{ vmin { +vector+ +vector+ -> +vector+ } }
{ vavg { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
{ vcount { +vector+ -> +scalar+ } }
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitandn { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
{ vbitnot { +vector+ -> +vector+ } }
{ vand { +vector+ +vector+ -> +vector+ } }
{ vandn { +vector+ +vector+ -> +vector+ } }
{ vor { +vector+ +vector+ -> +vector+ } }
{ vxor { +vector+ +vector+ -> +vector+ } }
{ vnot { +vector+ -> +vector+ } }
{ vlshift { +vector+ +scalar+ -> +vector+ } }
{ vrshift { +vector+ +scalar+ -> +vector+ } }
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
{ v<= { +vector+ +vector+ -> +vector+ } }
{ v< { +vector+ +vector+ -> +vector+ } }
{ v= { +vector+ +vector+ -> +vector+ } }
{ v> { +vector+ +vector+ -> +vector+ } }
{ v>= { +vector+ +vector+ -> +vector+ } }
{ vunordered? { +vector+ +vector+ -> +vector+ } }
}
: vector-word-inputs ( schema -- seq ) { -> } split first ;
: with-ctors ( -- seq )
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup-word ] map ;
: boa-ctors ( -- seq )
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup-word ] map ;
TUPLE: simd-test-failure
input
input-quot
unoptimized-result
optimized-result
nonintrinsic-result ;
:: check-optimizer (
seq
test-quot: ( input -- input-quot: ( -- ..v ) code-quot: ( ..v -- result ) )
eq-quot: ( resulta resultb -- ? )
--
failures
)
! Use test-quot to generate a bunch of test cases from the
! given inputs. Run each test case optimized and
! unoptimized. Compare results with eq-quot.
!
! seq: sequence of inputs
! test-quot: ( input -- input-quot: ( -- ..v ) code-quot: ( ..v -- result ) )
! eq-quot: ( result1 result2 -- ? )
seq |[ input |
input test-quot call :> ( input-quot code-quot )
input-quot [ class-of ] { } map-as :> input-classes
input-classes code-quot '[ _ declare @ ] :> code-quot'
"print-mr" get [ code-quot' regs. ] when
"print-checks" get [ input-quot . code-quot' . ] when
input-quot code-quot' [ [ call ] dip call ]
call( i c -- result ) :> unoptimized-result
input-quot code-quot' [ [ call ] dip compile-call ]
call( i c -- result ) :> optimized-result
input-quot code-quot' [
t "always-inline-simd-intrinsics" [
"print-inline-mr" get [ code-quot' regs. ] when
[ call ] dip compile-call
] with-variable
] call( i c -- result ) :> nonintrinsic-result
unoptimized-result optimized-result eq-quot call
optimized-result nonintrinsic-result eq-quot call
and
[ f ] [
input input-quot unoptimized-result optimized-result nonintrinsic-result
simd-test-failure boa
] if
] map sift
dup empty? [ dup ... ] unless ! Print full errors
; inline
"== Checking -new constructors" print
{ { } } [
simd-classes [ [ [ ] ] dip '[ _ new ] ] [ = ] check-optimizer
] unit-test
{ { } } [
simd-classes [ '[ _ new ] compile-call [ zero? ] all? ] reject
] unit-test
"== Checking -with constructors" print
{ { } } [
with-ctors [
[ 1000 random '[ _ ] ] dip '[ _ execute ]
] [ = ] check-optimizer
] unit-test
{ 0xffffffff } [ 0xffffffff uint-4-with first ] unit-test
{ 0xffffffff } [ 0xffffffff [ uint-4-with ] compile-call first ] unit-test
{ 0xffffffff } [ [ 0xffffffff uint-4-with ] compile-call first ] unit-test
"== Checking -boa constructors" print
{ { } } [
boa-ctors [
[ stack-effect in>> length [ 1000 random ] [ ] replicate-as ] keep
'[ _ execute ]
] [ = ] check-optimizer
] unit-test
{ 0xffffffff } [ 0xffffffff 2 3 4 [ uint-4-boa ] compile-call first ] unit-test
"== Checking vector operations" print
: random-int-vector ( class -- vec )
new [ drop 1000 random ] map ;
: random-float-vector ( class -- vec )
new [
drop
1000 random
10 swap <array> 0/0. suffix random
] map ;
: random-vector ( class elt-class -- vec )
float =
[ random-float-vector ]
[ random-int-vector ] if ;
:: check-vector-op ( word inputs class elt-class -- inputs quot )
inputs [
{
{ +vector+ [ class elt-class random-vector ] }
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
} case
] [ ] map-as
word '[ _ execute ] ;
: remove-float-words ( alist -- alist' )
{ distance vsqrt n/v v/n v/ normalize }
'[ drop _ member? ] assoc-reject ;
: remove-integer-words ( alist -- alist' )
{ vlshift vrshift v*high v*hs+ }
'[ drop _ member? ] assoc-reject ;
: boolean-ops ( -- words )
{ vand vandn vor vxor vnot vcount } ;
: remove-boolean-words ( alist -- alist' )
boolean-ops '[ drop _ member? ] assoc-reject ;
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip
float = [ remove-integer-words ] [ remove-float-words ] if
remove-boolean-words ;
: check-vector-ops ( class elt-class compare-quot -- failures )
[
[ nip ops-to-check ] 2keep
'[ first2 vector-word-inputs _ _ check-vector-op ]
] dip check-optimizer ; inline
: (approx=) ( x y -- ? )
{
{ [ 2dup [ fp-nan? ] both? ] [ 2drop t ] }
{ [ 2dup [ fp-nan? ] either? ] [ 2drop f ] }
{ [ 2dup [ fp-infinity? ] either? ] [ fp-bitwise= ] }
{ [ 2dup [ float? ] both? ] [ -1.e8 ~ ] }
[ = ]
} cond ;
: approx= ( x y -- ? )
2dup [ sequence? ] both?
[ [ (approx=) ] 2all? ] [ (approx=) ] if ;
: exact= ( x y -- ? )
{
{ [ 2dup [ float? ] both? ] [ fp-bitwise= ] }
{ [ 2dup [ sequence? ] both? ] [ [ fp-bitwise= ] 2all? ] }
[ = ]
} cond ;
: simd-classes&reps ( -- alist )
simd-classes [
{
{ [ dup name>> "float" head? ] [ float [ approx= ] ] }
{ [ dup name>> "double" head? ] [ float [ exact= ] ] }
[ fixnum [ = ] ]
} cond 3array
] map ;
simd-classes&reps [
[ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
] each
"== Checking boolean operations" print
: random-boolean-vector ( class -- vec )
new [ drop 2 random zero? ] map ;
:: check-boolean-op ( word inputs class elt-class -- inputs quot )
inputs [
{
{ +vector+ [ class random-boolean-vector ] }
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
} case
] [ ] map-as
word '[ _ execute ] ;
: check-boolean-ops ( class elt-class compare-quot -- seq )
[
[ boolean-ops [ dup vector-words at ] { } map>assoc ] 2dip
'[ first2 vector-word-inputs _ _ check-boolean-op ]
] dip check-optimizer ; inline
simd-classes&reps [
[ [ { } ] ] dip first3 '[ _ _ _ check-boolean-ops ] unit-test
] each
"== Checking vector blend" print
{ char-16{ 0 1 22 33 4 5 6 77 8 99 110 121 12 143 14 15 } }
[
char-16{ t t f f t t t f t f f f t f t t }
char-16{ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 }
char-16{ 0 11 22 33 44 55 66 77 88 99 110 121 132 143 154 165 } v?
] unit-test
{ char-16{ 0 1 22 33 4 5 6 77 8 99 110 121 12 143 14 15 } }
[
char-16{ t t f f t t t f t f f f t f t t }
char-16{ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 }
char-16{ 0 11 22 33 44 55 66 77 88 99 110 121 132 143 154 165 }
[ { char-16 char-16 char-16 } declare v? ] compile-call
] unit-test
{ int-4{ 1 22 33 4 } }
[ int-4{ t f f t } int-4{ 1 2 3 4 } int-4{ 11 22 33 44 } v? ] unit-test
{ int-4{ 1 22 33 4 } }
[
int-4{ t f f t } int-4{ 1 2 3 4 } int-4{ 11 22 33 44 }
[ { int-4 int-4 int-4 } declare v? ] compile-call
] unit-test
{ float-4{ 1.0 22.0 33.0 4.0 } }
[ float-4{ t f f t } float-4{ 1.0 2.0 3.0 4.0 } float-4{ 11.0 22.0 33.0 44.0 } v? ] unit-test
{ float-4{ 1.0 22.0 33.0 4.0 } }
[
float-4{ t f f t } float-4{ 1.0 2.0 3.0 4.0 } float-4{ 11.0 22.0 33.0 44.0 }
[ { float-4 float-4 float-4 } declare v? ] compile-call
] unit-test
"== Checking shifts and permutations" print
{ char-16{ 0 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } 1 hlshift ] unit-test
{ char-16{ 0 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } [ { char-16 } declare 1 hlshift ] compile-call ] unit-test
{ char-16{ 0 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } [ { char-16 } declare 1 >bignum hlshift ] compile-call ] unit-test
{ char-16{ 0 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } 1 [ { char-16 fixnum } declare hlshift ] compile-call ] unit-test
{ char-16{ 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 0 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } 1 hrshift ] unit-test
{ char-16{ 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 0 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } [ { char-16 } declare 1 hrshift ] compile-call ] unit-test
{ char-16{ 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 0 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } [ { char-16 } declare 1 >bignum hrshift ] compile-call ] unit-test
{ char-16{ 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 0 } }
[ char-16{ 1 2 4 8 1 2 4 8 1 2 4 8 1 2 4 8 } 1 [ { char-16 fixnum } declare hrshift ] compile-call ] unit-test
{ int-4{ 4 8 12 16 } }
[ int-4{ 1 2 3 4 } 2 vlshift ] unit-test
{ int-4{ 4 8 12 16 } }
[ int-4{ 1 2 3 4 } 2 [ { int-4 fixnum } declare vlshift ] compile-call ] unit-test
{ int-4{ 4 8 12 16 } }
[ int-4{ 1 2 3 4 } 2 >bignum [ { int-4 bignum } declare vlshift ] compile-call ] unit-test
! Invalid inputs should not cause the compiler to throw errors
{ } [
[ [ { int-4 } declare t hrshift ] ( a -- b ) define-temp drop ] with-compilation-unit
] unit-test
{ } [
[ [ { int-4 } declare { 3 2 1 } vshuffle ] ( a -- b ) define-temp drop ] with-compilation-unit
] unit-test
! Shuffles
: shuffles-for ( n -- shuffles )
{
{ 2 [
{
{ 0 1 }
{ 1 1 }
{ 1 0 }
{ 0 0 }
}
] }
{ 4 [
{
{ 1 2 3 0 }
{ 0 1 2 3 }
{ 1 1 2 2 }
{ 0 0 1 1 }
{ 2 2 3 3 }
{ 0 1 0 1 }
{ 2 3 2 3 }
{ 0 0 2 2 }
{ 1 1 3 3 }
{ 0 1 0 1 }
{ 2 2 3 3 }
}
] }
{ 8 [
4 shuffles-for
4 shuffles-for
[ [ 4 + ] map ] map
[ append ] 2map
] }
[ dup '[ _ random ] replicate 1array ]
} case ;
: 2shuffles-for ( n -- shuffles )
{
{ 2 [
{
{ 0 1 }
{ 0 3 }
{ 2 3 }
{ 2 0 }
}
] }
{ 4 [
{
{ 0 1 2 3 }
{ 4 1 2 3 }
{ 0 5 2 3 }
{ 0 1 6 3 }
{ 0 1 2 7 }
{ 4 5 2 3 }
{ 0 1 6 7 }
{ 4 5 6 7 }
{ 0 5 2 7 }
}
] }
{ 8 [
4 2shuffles-for
4 2shuffles-for
[ [ 8 + ] map ] map
[ append ] 2map
] }
[ dup 2 * '[ _ random ] replicate 1array ]
} case ;
simd-classes [
[ [ { } ] ] dip
[ new length shuffles-for ] keep
'[
_ [ [ _ new [ length <iota> ] keep like 1quotation ] dip '[ _ vshuffle ] ]
[ = ] check-optimizer
] unit-test
] each
simd-classes [
[ [ { } ] ] dip
[ new length 2shuffles-for ] keep
'[
_ [ [
_ new
[ [ length <iota> ] keep like ]
[ [ length dup dup + [a,b) ] keep like ] bi [ ] 2sequence
] dip '[ _ vshuffle2-elements ] ]
[ = ] check-optimizer
] unit-test
] each
"== Checking variable shuffles" print
: random-shift-vector ( class -- vec )
new [ drop 16 random ] map ;
:: test-shift-vector ( class -- ? )
[
class random-int-vector :> src
char-16 random-shift-vector :> perm
{ class char-16 } :> decl
src perm vshuffle
src perm [ decl declare vshuffle ] compile-call
=
] call( -- ? ) ;
{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 }
[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each
"== Checking vector tests" print
:: test-vector-tests-bool ( vector declaration -- none? any? all? )
[
vector
[ [ declaration declare vnone? ] compile-call ]
[ [ declaration declare vany? ] compile-call ]
[ [ declaration declare vall? ] compile-call ] tri
] call( -- none? any? all? ) ;
: yes ( -- x ) t ;
: no ( -- x ) f ;
:: test-vector-tests-branch ( vector declaration -- none? any? all? )
[
vector
[ [ declaration declare vnone? [ yes ] [ no ] if ] compile-call ]
[ [ declaration declare vany? [ yes ] [ no ] if ] compile-call ]
[ [ declaration declare vall? [ yes ] [ no ] if ] compile-call ] tri
] call( -- none? any? all? ) ;
TUPLE: inconsistent-vector-test bool branch ;
: ?inconsistent ( bool branch -- ?/inconsistent )
2dup = [ drop ] [ inconsistent-vector-test boa ] if ;
:: test-vector-tests ( vector decl -- none? any? all? )
[
vector decl test-vector-tests-bool :> ( bool-none bool-any bool-all )
vector decl test-vector-tests-branch :> ( branch-none branch-any branch-all )
bool-none branch-none ?inconsistent
bool-any branch-any ?inconsistent
bool-all branch-all ?inconsistent
] call( -- none? any? all? ) ;
{ f t t }
[ float-4{ t t t t } { float-4 } test-vector-tests ] unit-test
{ f t f }
[ float-4{ f t t t } { float-4 } test-vector-tests ] unit-test
{ t f f }
[ float-4{ f f f f } { float-4 } test-vector-tests ] unit-test
{ f t t }
[ double-2{ t t } { double-2 } test-vector-tests ] unit-test
{ f t f }
[ double-2{ f t } { double-2 } test-vector-tests ] unit-test
{ t f f }
[ double-2{ f f } { double-2 } test-vector-tests ] unit-test
{ f t t }
[ int-4{ t t t t } { int-4 } test-vector-tests ] unit-test
{ f t f }
[ int-4{ f t t t } { int-4 } test-vector-tests ] unit-test
{ t f f }
[ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test
"== Checking element access" print
! Test element access -- it should box bignums for int-4 on x86
: test-accesses ( seq -- failures )
[ length <iota> dup [ >bignum ] map append ] keep
'[ [ _ 1quotation ] dip '[ _ swap nth ] ] [ = ] check-optimizer ; inline
{ { } } [ float-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
{ { } } [ int-4{ 0x7fffffff 3 4 -8 } test-accesses ] unit-test
{ { } } [ uint-4{ 0xffffffff 2 3 4 } test-accesses ] unit-test
{ 0x7fffffff } [ int-4{ 0x7fffffff 3 4 -8 } first ] unit-test
{ -8 } [ int-4{ 0x7fffffff 3 4 -8 } last ] unit-test
{ 0xffffffff } [ uint-4{ 0xffffffff 2 3 4 } first ] unit-test
{ { } } [ double-2{ 1.0 2.0 } test-accesses ] unit-test
{ { } } [ longlong-2{ 1 2 } test-accesses ] unit-test
{ { } } [ ulonglong-2{ 1 2 } test-accesses ] unit-test
"== Checking broadcast" print
: test-broadcast ( seq -- failures )
[ length <iota> >array ] keep
'[ [ _ 1quotation ] dip '[ _ vbroadcast ] ] [ = ] check-optimizer ;
{ { } } [ float-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test
{ { } } [ int-4{ 0x7fffffff 3 4 -8 } test-broadcast ] unit-test
{ { } } [ uint-4{ 0xffffffff 2 3 4 } test-broadcast ] unit-test
{ { } } [ double-2{ 1.0 2.0 } test-broadcast ] unit-test
{ { } } [ longlong-2{ 1 2 } test-broadcast ] unit-test
{ { } } [ ulonglong-2{ 1 2 } test-broadcast ] unit-test
! Make sure we use the fallback in the correct situations
{ int-4{ 3 3 3 3 } } [ int-4{ 12 34 3 17 } 2 [ { int-4 fixnum } declare vbroadcast ] compile-call ] unit-test
"== Checking alien operations" print
{ float-4{ 1 2 3 4 } } [
[
float-4{ 1 2 3 4 }
underlying>> 0 float-4-rep alien-vector
] compile-call float-4 boa
] unit-test
{ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } } [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
{ float-array{ 1 2 3 4 } } [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x float-4 }
{ y longlong-2 }
{ z double-2 }
{ w int-4 } ;
{ t } [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
{
float-4{ 1 2 3 4 }
longlong-2{ 2 1 }
double-2{ 4 3 }
int-4{ 1 2 3 4 }
} [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
longlong-2{ 2 1 } >>y
double-2{ 4 3 } >>z
int-4{ 1 2 3 4 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
{
float-4{ 1 2 3 4 }
longlong-2{ 2 1 }
double-2{ 4 3 }
int-4{ 1 2 3 4 }
} [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
longlong-2{ 2 1 } >>y
double-2{ 4 3 } >>z
int-4{ 1 2 3 4 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test
"== Misc tests" print
{ } [ char-16 new 1array stack. ] unit-test
! Test some sequence protocol stuff
{ t } [ 4 double-4{ 1 2 3 4 } new-sequence double-4? ] unit-test
{ double-4{ 2 3 4 5 } } [ double-4{ 1 2 3 4 } [ 1 + ] map ] unit-test
! Test cross product
{ float-4{ 0.0 0.0 1.0 0.0 } } [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test
{ float-4{ 0.0 0.0 1.0 0.0 } } [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 } [ { float-4 float-4 } declare cross ] compile-call ] unit-test
{ float-4{ 0.0 -1.0 0.0 0.0 } } [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test
{ float-4{ 0.0 -1.0 0.0 0.0 } } [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 0.0 1.0 0.0 } [ { float-4 float-4 } declare cross ] compile-call ] unit-test
{ double-4{ 0.0 0.0 1.0 0.0 } } [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test
{ double-4{ 0.0 0.0 1.0 0.0 } } [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 1.0 0.0 0.0 } [ { double-4 double-4 } declare cross ] compile-call ] unit-test
{ double-4{ 0.0 -1.0 0.0 0.0 } } [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test
{ double-4{ 0.0 -1.0 0.0 0.0 } } [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 0.0 1.0 0.0 } [ { double-4 double-4 } declare cross ] compile-call ] unit-test
! CSSA bug
{ 4000000 } [
int-4{ 1000 1000 1000 1000 }
[ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
] unit-test
! Coalescing was too aggressive
:: broken ( axis theta -- a b c )
axis { float-4 } declare drop
theta { float } declare drop
theta cos float-4-with :> cc
theta sin float-4-with :> ss
axis cc v+ :> diagonal
diagonal cc ss ; inline
{ t } [
float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ]
[ compile-call ] [ call ] 3bi =
] unit-test
! Spilling SIMD values -- this basically just tests that the
! stack was aligned properly by the runtime
: simd-spill-test-1 ( a b c -- v )
{ float-4 float-4 float } declare
[ v+ ] dip sin v*n ;
{ float-4{ 0 0 0 0 } }
[ float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-1 ] unit-test
: simd-spill-test-2 ( a b d c -- v )
{ float float-4 float-4 float } declare
[ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v ;
{ float-4{ 0 0 0 0 } }
[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-2 ] unit-test
: callback-1 ( -- c )
c:int { c:int c:int c:int c:int c:int } cdecl [ + + + + ] alien-callback ;
: indirect-1 ( x x x x x c -- y )
c:int { c:int c:int c:int c:int c:int } cdecl alien-indirect ; inline
: simd-spill-test-3 ( a b d c -- v )
{ float float-4 float-4 float } declare
[ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v
10 5 100 50 500 callback-1 indirect-1 665 assert= ;
{ float-4{ 0 0 0 0 } }
[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-3 ] unit-test
! Stack allocation of SIMD values -- make sure that everything is
! aligned right
: simd-stack-test ( -- b c )
{ c:int float-4 } [
[ 123 swap 0 c:int c:set-alien-value ]
[ float-4{ 1 2 3 4 } swap 0 float-4 c:set-alien-value ] bi*
] with-out-parameters ;
{ 123 float-4{ 1 2 3 4 } } [ simd-stack-test ] unit-test
! Stack allocation + spilling
: (simd-stack-spill-test) ( -- n ) 17 ;
: simd-stack-spill-test ( x -- b c )
{ c:int } [
123 swap 0 c:int c:set-alien-value
>float (simd-stack-spill-test) float-4-with swap cos v*n
] with-out-parameters ;
{ } [
1.047197551196598 simd-stack-spill-test
[ float-4{ 8.5 8.5 8.5 8.5 } approx= t assert= ]
[ 123 assert= ]
bi*
] unit-test
! #1308
: test-1308 ( a b -- c )
{ double-4 double-4 } declare
v+ dup first 10 > [ first ] [ third ] if 1array ;
! Before the fix, this evaluated to an uninitialized value.
{ 33.0 } [
double-4{ 2 20 30 40 } double-4{ 2 4 3 2 } test-1308 first
] unit-test

View File

@ -1,373 +0,0 @@
USING: accessors alien arrays byte-arrays classes combinators
combinators.smart.syntax cpu.architecture effects fry functors
generalizations generic generic.parser kernel lexer literals
locals macros math math.bitwise math.functions math.vectors
math.vectors.private math.vectors.simd.intrinsics namespaces
parser prettyprint.custom quotations sequences
sequences.generalizations sequences.private vocabs vocabs.loader
words ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd
ERROR: bad-simd-length got expected ;
ERROR: bad-simd-vector obj ;
<<
<PRIVATE
! Primitive SIMD constructors
GENERIC: new-underlying ( underlying seq -- seq' )
: make-underlying ( seq quot -- seq' )
dip new-underlying ; inline
: change-underlying ( seq quot -- seq' )
'[ underlying>> @ ] keep new-underlying ; inline
PRIVATE>
>>
<PRIVATE
! Helper for boolean vector literals
: vector-true-value ( class -- value )
{ c:float c:double } member? [ -1 bits>double ] [ -1 ] if ; foldable
: vector-false-value ( type -- value )
{ c:float c:double } member? [ 0.0 ] [ 0 ] if ; foldable
: boolean>element ( bool/elt type -- elt )
swap {
{ t [ vector-true-value ] }
{ f [ vector-false-value ] }
[ nip ]
} case ; inline
PRIVATE>
! SIMD base type
TUPLE: simd-128
{ underlying byte-array read-only initial: 1[ 16 <byte-array> ] } ;
GENERIC: simd-element-type ( obj -- c-type )
GENERIC: simd-rep ( simd -- rep )
GENERIC: simd-with ( n exemplar -- v )
M: object simd-element-type drop f ;
M: object simd-rep drop f ;
<<
<PRIVATE
DEFER: simd-construct-op
! Unboxers for SIMD operations
: if-both-vectors ( a b rep t f -- )
[ 2over [ simd-128? ] both? ] 2dip if ; inline
: if-both-vectors-match ( a b rep t f -- )
[ 3dup [ drop [ simd-128? ] both? ] [ '[ simd-rep _ eq? ] both? ] 3bi and ]
2dip if ; inline
: simd-unbox ( a -- a (a) )
[ ] [ underlying>> ] bi ; inline
: v->v-op ( a rep quot: ( (a) rep -- (c) ) fallback-quot -- c )
drop [ simd-unbox ] 2dip 2curry make-underlying ; inline
: vx->v-op ( a obj rep quot: ( (a) obj rep -- (c) ) fallback-quot -- c )
drop [ simd-unbox ] 3dip 3curry make-underlying ; inline
: vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c )
drop [ [ simd-unbox ] [ >fixnum ] bi* ] 2dip 3curry make-underlying ; inline
: vx->x-op ( a obj rep quot: ( (a) obj rep -- obj ) fallback-quot -- obj )
drop [ underlying>> ] 3dip call ; inline
: v->x-op ( a rep quot: ( (a) rep -- obj ) fallback-quot -- obj )
drop [ underlying>> ] 2dip call ; inline
: (vv->v-op) ( a b rep quot: ( (a) (b) rep -- (c) ) -- c )
[ [ simd-unbox ] [ underlying>> ] bi* ] 2dip 3curry make-underlying ; inline
: (vv->x-op) ( a b rep quot: ( (a) (b) rep -- n ) -- n )
[ [ underlying>> ] bi@ ] 2dip 3curry call ; inline
: (vvx->v-op) ( a b obj rep quot: ( (a) (b) obj rep -- (c) ) -- c )
[ [ simd-unbox ] [ underlying>> ] bi* ] 3dip 2curry 2curry make-underlying ; inline
: vv->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
:: vvx->v-op ( a b obj rep quot: ( (a) (b) obj rep -- (c) ) fallback-quot -- c )
a b rep
[ obj swap quot (vvx->v-op) ]
[ drop obj fallback-quot call ] if-both-vectors-match ; inline
: vv'->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors ; inline
: vv->x-op ( a b rep quot: ( (a) (b) rep -- obj ) fallback-quot -- obj )
[ '[ _ (vv->x-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
: mask>count ( n rep -- n' )
[ bit-count ] dip {
{ float-4-rep [ ] }
{ double-2-rep [ -1 shift ] }
{ uchar-16-rep [ ] }
{ char-16-rep [ ] }
{ ushort-8-rep [ -1 shift ] }
{ short-8-rep [ -1 shift ] }
{ ushort-8-rep [ -1 shift ] }
{ int-4-rep [ -2 shift ] }
{ uint-4-rep [ -2 shift ] }
{ longlong-2-rep [ -3 shift ] }
{ ulonglong-2-rep [ -3 shift ] }
} case ; inline
PRIVATE>
>>
<<
! SIMD vectors as sequences
M: simd-128 hashcode* underlying>> hashcode* ; inline
M: simd-128 clone [ clone ] change-underlying ; inline
M: simd-128 byte-length drop 16 ; inline
M: simd-128 new-sequence
2dup length =
[ nip [ 16 (byte-array) ] make-underlying ]
[ length bad-simd-length ] if ; inline
M: simd-128 equal?
dup simd-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
! SIMD primitive operations
M: simd-128 v+
dup simd-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v-
dup simd-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vneg
dup simd-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline
M: simd-128 v+-
dup simd-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vs+
dup simd-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vs-
dup simd-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vs*
dup simd-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v*
dup simd-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v*high
dup simd-rep [ (simd-v*high) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v/
dup simd-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vavg
dup simd-rep [ (simd-vavg) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vmin
dup simd-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vmax
dup simd-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v.
dup simd-rep [ (simd-v.) ] [ call-next-method ] vv->x-op ; inline
M: simd-128 vsad
dup simd-rep [ (simd-vsad) ] [ call-next-method ] vv->x-op ; inline
M: simd-128 vsqrt
dup simd-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline
M: simd-128 sum
dup simd-rep [ (simd-sum) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vabs
dup simd-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline
M: simd-128 vbitand
dup simd-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vbitandn
dup simd-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vbitor
dup simd-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vbitxor
dup simd-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vbitnot
dup simd-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline
M: simd-128 vand
dup simd-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vandn
dup simd-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vor
dup simd-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vxor
dup simd-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vnot
dup simd-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline
M: simd-128 vlshift
over simd-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline
M: simd-128 vrshift
over simd-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline
M: simd-128 hlshift
over simd-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline
M: simd-128 hrshift
over simd-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline
M: simd-128 vshuffle-elements
over simd-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vx->v-op ; inline
M: simd-128 vshuffle2-elements
over simd-rep [ (simd-vshuffle2-elements) ] [ call-next-method ] vvx->v-op ; inline
M: simd-128 vshuffle-bytes
dup simd-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline
M: simd-128 (vmerge-head)
dup simd-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 (vmerge-tail)
dup simd-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v<=
dup simd-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v<
dup simd-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v=
dup simd-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v>
dup simd-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 v>=
dup simd-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vunordered?
dup simd-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vany?
dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vall?
dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vnone?
dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vcount
dup simd-rep
[ [ (simd-vgetmask) assert-positive ] [ call-next-method ] v->x-op ]
[ mask>count ] bi ; inline
! SIMD high-level specializations
M: simd-128 vbroadcast swap [ nth ] [ simd-with ] bi ; inline
M: simd-128 n+v [ simd-with ] keep v+ ; inline
M: simd-128 n-v [ simd-with ] keep v- ; inline
M: simd-128 n*v [ simd-with ] keep v* ; inline
M: simd-128 n/v [ simd-with ] keep v/ ; inline
M: simd-128 v+n over simd-with v+ ; inline
M: simd-128 v-n over simd-with v- ; inline
M: simd-128 v*n over simd-with v* ; inline
M: simd-128 v/n over simd-with v/ ; inline
M: simd-128 norm-sq dup v. assert-positive ; inline
M: simd-128 distance v- norm ; inline
M: simd-128 >pprint-sequence ;
M: simd-128 pprint* pprint-object ;
<PRIVATE
! SIMD concrete type functor
<FUNCTOR: define-simd-128 ( T -- )
A DEFINES-CLASS ${T}
A-rep IS ${T}-rep
>A DEFINES >${T}
A-boa DEFINES ${T}-boa
A-with DEFINES ${T}-with
A-cast DEFINES ${T}-cast
A{ DEFINES ${T}{
ELT [ A-rep rep-component-type ]
N [ A-rep rep-length ]
COERCER [ ELT c:c-type-class "coercer" word-prop [ ] or ]
BOA-EFFECT [ N "n" <array> { "v" } <effect> ]
WHERE
TUPLE: A < simd-128 ; final
M: A new-underlying drop \ A boa ; inline
M: A simd-rep drop A-rep ; inline
M: A simd-element-type drop ELT ; inline
M: A simd-with drop A-with ; inline
M: A nth-unsafe
swap \ A-rep [ (simd-select) ] [ call-next-method ] vx->x-op ; inline
M: A set-nth-unsafe
[ ELT boolean>element ] 2dip
underlying>> ELT c:set-alien-element ; inline
: >A ( seq -- simd ) \ A new clone-like ; inline
M: A like drop dup \ A instance? [ >A ] unless ; inline
: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
: A-cast ( v -- v' ) underlying>> \ A boa ; inline
M: A length drop N ; inline
\ A-boa
[ COERCER N napply ] N {
{ 2 [ [ A-rep (simd-gather-2) A boa ] ] }
{ 4 [ [ A-rep (simd-gather-4) A boa ] ] }
[ \ A new '[ _ _ nsequence ] ]
} case compose
BOA-EFFECT define-inline
M: A pprint-delims drop \ A{ \ } ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
INSTANCE: A sequence
c:<c-type>
byte-array >>class
A >>boxed-class
{ A-rep alien-vector A boa } >quotation >>getter
{
[ dup simd-128? [ bad-simd-vector ] unless underlying>> ] 2dip
A-rep set-alien-vector
} >quotation >>setter
16 >>size
16 >>align
A-rep >>rep
\ A c:typedef
;FUNCTOR>
SYNTAX: \SIMD-128:
scan-token define-simd-128 ;
PRIVATE>
>>
! SIMD instances
SIMD-128: char-16
SIMD-128: uchar-16
SIMD-128: short-8
SIMD-128: ushort-8
SIMD-128: int-4
SIMD-128: uint-4
SIMD-128: longlong-2
SIMD-128: ulonglong-2
SIMD-128: float-4
SIMD-128: double-2
! misc
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
M: uchar-16 v*hs+
uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
M: ushort-8 v*hs+
ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
M: uint-4 v*hs+
uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
M: char-16 v*hs+
char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
M: short-8 v*hs+
short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
M: int-4 v*hs+
int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
{ "math.vectors.simd" "mirrors" } "math.vectors.simd.mirrors" require-when

View File

@ -1 +0,0 @@
Single-instruction-multiple-data parallel vector operations

View File

@ -1 +0,0 @@
Slava Pestov