More integer SIMD work

- move generated vocab support from specialized-arrays to vocabs.generated
- add fuzz testing to math.vectors.simd
- add alien type support for integer SIMD vectors
- SIMD: parsing word generates a SIMD type, instead of pre-generating them all in math.vectors.simd
Slava Pestov 2009-09-20 16:48:17 -05:00
parent ea2bcd69c7
commit dfb43bd2ca
15 changed files with 346 additions and 501 deletions

View File

@ -24,6 +24,7 @@ IN: compiler.tree.propagation.simd
literal>> scalar-rep-of { literal>> scalar-rep-of {
{ float-rep [ float ] } { float-rep [ float ] }
{ double-rep [ float ] } { double-rep [ float ] }
{ int-rep [ integer ] }
} case } case
] [ drop real ] if ] [ drop real ] if
<class-info> <class-info>

View File

@ -417,8 +417,7 @@ M: x86 %horizontal-add-vector ( dst src rep -- )
M: x86 %horizontal-add-vector-reps M: x86 %horizontal-add-vector-reps
{ {
{ sse? { float-4-rep } } { sse3? { float-4-rep double-2-rep } }
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
} available-reps ; } available-reps ;
M: x86 %unbox-alien ( dst src -- ) M: x86 %unbox-alien ( dst src -- )

View File

@ -1,70 +0,0 @@
USING: cpu.architecture math.vectors.simd
math.vectors.simd.intrinsics accessors math.vectors.simd.alien
kernel classes.struct tools.test compiler sequences byte-arrays
alien math kernel.private specialized-arrays combinators ;
SPECIALIZED-ARRAY: float
IN: math.vectors.simd.alien.tests
! Vector alien intrinsics
[ float-4{ 1 2 3 4 } ] [
[
float-4{ 1 2 3 4 }
underlying>> 0 float-4-rep alien-vector
] compile-call float-4 boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x float-4 }
{ y double-2 }
{ z double-4 }
{ w float-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test

View File

@ -1,42 +0,0 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien accessors alien.c-types byte-arrays compiler.units
cpu.architecture locals kernel math math.vectors.simd
math.vectors.simd.intrinsics ;
IN: math.vectors.simd.alien
:: define-simd-128-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
class name>> typedef ;
:: define-simd-256-type ( class rep -- )
<c-type>
class >>class
class >>boxed-class
[
[ rep alien-vector ]
[ 16 + >fixnum rep alien-vector ] 2bi
class boa
] >>getter
[
[ [ underlying1>> ] 2dip rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
rep >>rep
class name>> typedef ;
[
float-4 float-4-rep define-simd-128-type
double-2 double-2-rep define-simd-128-type
float-8 float-4-rep define-simd-256-type
double-4 double-2-rep define-simd-256-type
] with-compilation-unit

View File

@ -5,6 +5,7 @@ effects fry functors generalizations kernel literals locals
math math.functions math.vectors math.vectors.simd.intrinsics math math.functions math.vectors math.vectors.simd.intrinsics
math.vectors.specialization parser prettyprint.custom sequences math.vectors.specialization parser prettyprint.custom sequences
sequences.private strings words definitions macros cpu.architecture ; sequences.private strings words definitions macros cpu.architecture ;
QUALIFIED-WITH: math m
IN: math.vectors.simd.functor IN: math.vectors.simd.functor
ERROR: bad-length got expected ; ERROR: bad-length got expected ;
@ -51,11 +52,10 @@ MACRO: simd-boa ( rep class -- simd-array )
'[ nip _ swap supported-simd-op? ] assoc-filter '[ nip _ swap supported-simd-op? ] assoc-filter
'[ drop _ key? ] assoc-filter ; '[ drop _ key? ] assoc-filter ;
:: high-level-ops ( ctor -- assoc ) :: high-level-ops ( ctor elt-class -- assoc )
! Some SIMD operations are defined in terms of others. ! Some SIMD operations are defined in terms of others.
{ {
{ vneg [ [ dup v- ] keep v- ] } { vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] } { n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] } { v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] } { n-v [ [ ctor execute ] dip v- ] }
@ -67,16 +67,36 @@ MACRO: simd-boa ( rep class -- simd-array )
{ norm-sq [ dup v. assert-positive ] } { norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] } { norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] } { normalize [ dup norm v/n ] }
{ distance [ v- norm ] } }
} ; ! To compute dot product and distance with integer vectors, we
! have to do things less efficiently, with integer overflow checks,
! in the general case.
elt-class m:float = [
{
{ distance [ v- norm ] }
{ v. [ v* sum ] }
} append
] when ;
:: simd-vector-words ( class ctor rep assoc -- ) :: simd-vector-words ( class ctor rep assoc -- )
rep rep-component-type c-type-boxed-class :> elt-class
class class
rep rep-component-type c-type-boxed-class elt-class
assoc rep supported-simd-ops assoc rep supported-simd-ops
ctor high-level-ops assoc-union ctor elt-class high-level-ops assoc-union
specialize-vector-words ; specialize-vector-words ;
:: define-simd-128-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
class typedef ;
FUNCTOR: define-simd-128 ( T -- ) FUNCTOR: define-simd-128 ( T -- )
N [ 16 T heap-size /i ] N [ 16 T heap-size /i ]
@ -159,11 +179,35 @@ INSTANCE: A sequence
{ sum [ [ (simd-sum) ] \ A-v->n-op execute ] } { sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
} simd-vector-words } simd-vector-words
\ A \ A-rep define-simd-128-type
PRIVATE> PRIVATE>
;FUNCTOR ;FUNCTOR
! Synthesize 256-bit vectors from a pair of 128-bit vectors ! Synthesize 256-bit vectors from a pair of 128-bit vectors
SLOT: underlying1
SLOT: underlying2
:: define-simd-256-type ( class rep -- )
<c-type>
class >>class
class >>boxed-class
[
[ rep alien-vector ]
[ 16 + >fixnum rep alien-vector ] 2bi
class boa
] >>getter
[
[ [ underlying1>> ] 2dip rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
rep >>rep
class typedef ;
FUNCTOR: define-simd-256 ( T -- ) FUNCTOR: define-simd-256 ( T -- )
N [ 32 T heap-size /i ] N [ 32 T heap-size /i ]
@ -235,7 +279,7 @@ M: A pprint* pprint-object ;
: A-boa ( ... -- simd-array ) : A-boa ( ... -- simd-array )
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
\ A boa ; \ A boa ; inline
\ A-rep 2 boa-effect \ A-boa set-stack-effect \ A-rep 2 boa-effect \ A-boa set-stack-effect
@ -260,4 +304,6 @@ INSTANCE: A sequence
{ sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] } { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
} simd-vector-words } simd-vector-words
\ A \ A-rep define-simd-256-type
;FUNCTOR ;FUNCTOR

View File

@ -17,23 +17,45 @@ $nl
"There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ; "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs." "At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs."
$nl $nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance." "SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
$nl $nl
"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." "SSE2 introduces double-precision and integer SIMD."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words."
$nl
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
$nl $nl
"The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ; "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
ARTICLE: "math.vectors.simd.types" "SIMD vector types" ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8." "Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl $nl
"The following vector types are defined:" "To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
{ $subsection float-4 } { $subsection POSTPONE: SIMD: }
{ $subsection double-2 } "The following vector types are supported:"
{ $subsection float-8 } { $code
{ $subsection double-4 } "char-16"
"For each vector type, several words are defined:" "uchar-16"
"char-32"
"uchar-32"
"short-8"
"ushort-8"
"short-16"
"ushort-16"
"int-4"
"uint-4"
"int-8"
"uint-8"
"float-4"
"float-8"
"double-2"
"double-4"
} ;
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
"For each SIMD vector type, several words are defined:"
{ $table { $table
{ "Word" "Stack effect" "Description" } { "Word" "Stack effect" "Description" }
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" } { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
@ -41,8 +63,6 @@ $nl
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" } { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" } { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
} }
"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
$nl
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words." "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
{ $see-also "c-types-specs" } ; { $see-also "c-types-specs" } ;
@ -68,6 +88,8 @@ SYMBOLS: x y ;
{ $code { $code
<" USING: compiler.tree.debugger kernel.private <" USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ; math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo
: interpolate ( v a b -- w ) : interpolate ( v a b -- w )
{ float-4 float-4 float-4 } declare { float-4 float-4 float-4 } declare
@ -80,6 +102,8 @@ $nl
{ $code { $code
<" USING: compiler.tree.debugger hints <" USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ; math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo
: interpolate ( v a b -- w ) : interpolate ( v a b -- w )
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
@ -94,6 +118,7 @@ $nl
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code { $code
<" USING: compiler.tree.debugger math.vectors math.vectors.simd ; <" USING: compiler.tree.debugger math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo IN: simd-demo
STRUCT: actor STRUCT: actor
@ -151,21 +176,23 @@ $nl
"For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ; "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes" ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
"Struct classes may contain fields which store SIMD data; use one of the following C type names:" "Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
{ $code $nl
<" float-4 "Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
double-2
float-8
double-4"> }
"Passing SIMD data as function parameters is not yet supported." ;
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
{ $subsection "math.vectors.simd.intro" } { $subsection "math.vectors.simd.intro" }
{ $subsection "math.vectors.simd.types" } { $subsection "math.vectors.simd.types" }
{ $subsection "math.vectors.simd.words" }
{ $subsection "math.vectors.simd.support" } { $subsection "math.vectors.simd.support" }
{ $subsection "math.vectors.simd.efficiency" } { $subsection "math.vectors.simd.efficiency" }
{ $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.alien" }
{ $subsection "math.vectors.simd.intrinsics" } ; { $subsection "math.vectors.simd.intrinsics" } ;
HELP: SIMD:
{ $syntax "SIMD: type-length" }
{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
ABOUT: "math.vectors.simd" ABOUT: "math.vectors.simd"

View File

@ -1,8 +1,30 @@
USING: accessors arrays classes compiler compiler.tree.debugger
effects fry io kernel kernel.private math math.functions
math.private math.vectors math.vectors.simd
math.vectors.simd.private prettyprint random sequences system
tools.test vocabs assocs compiler.cfg.debugger words
locals math.vectors.specialization combinators cpu.architecture
math.vectors.simd.intrinsics namespaces byte-arrays alien
specialized-arrays classes.struct ;
FROM: alien.c-types => c-type-boxed-class ;
SPECIALIZED-ARRAY: float
SIMD: char-16
SIMD: uchar-16
SIMD: char-32
SIMD: uchar-32
SIMD: short-8
SIMD: ushort-8
SIMD: short-16
SIMD: ushort-16
SIMD: int-4
SIMD: uint-4
SIMD: int-8
SIMD: uint-8
SIMD: float-4
SIMD: float-8
SIMD: double-2
SIMD: double-4
IN: math.vectors.simd.tests IN: math.vectors.simd.tests
USING: math math.vectors.simd math.vectors.simd.private
math.vectors math.functions math.private kernel.private compiler
sequences tools.test compiler.tree.debugger accessors kernel
system ;
[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
@ -12,344 +34,6 @@ system ;
[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
[ float-4{ 12 12 12 12 } ] [
12 [ float-4-with ] compile-call
] unit-test
[ float-4{ 1 2 3 4 } ] [
1 2 3 4 [ float-4-boa ] compile-call
] unit-test
[ float-4{ 11 22 33 44 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v+ ] compile-call
] unit-test
[ float-4{ -9 -18 -27 -36 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v- ] compile-call
] unit-test
[ float-4{ 10 40 90 160 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v* ] compile-call
] unit-test
[ float-4{ 10 100 1000 10000 } ] [
float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v/ ] compile-call
] unit-test
[ float-4{ -10 -20 -30 -40 } ] [
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
[ { float-4 float-4 } declare vmin ] compile-call
] unit-test
[ float-4{ 10 20 30 40 } ] [
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
[ { float-4 float-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
[ { float-4 float-4 } declare v. ] compile-call
] unit-test
[ float-4{ 5 10 15 20 } ] [
5.0 float-4{ 1 2 3 4 }
[ { float float-4 } declare n*v ] compile-call
] unit-test
[ float-4{ 5 10 15 20 } ] [
float-4{ 1 2 3 4 } 5.0
[ { float float-4 } declare v*n ] compile-call
] unit-test
[ float-4{ 10 5 2 5 } ] [
10.0 float-4{ 1 2 5 2 }
[ { float float-4 } declare n/v ] compile-call
] unit-test
[ float-4{ 0.5 1 1.5 2 } ] [
float-4{ 1 2 3 4 } 2
[ { float float-4 } declare v/n ] compile-call
] unit-test
[ float-4{ 1 0 0 0 } ] [
float-4{ 10 0 0 0 }
[ { float-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
float-4{ 1 0 0 0 }
float-4{ 0 1 0 0 }
[ { float-4 float-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ double-2{ 12 12 } ] [
12 [ double-2-with ] compile-call
] unit-test
[ double-2{ 1 2 } ] [
1 2 [ double-2-boa ] compile-call
] unit-test
[ double-2{ 11 22 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v+ ] compile-call
] unit-test
[ double-2{ -9 -18 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v- ] compile-call
] unit-test
[ double-2{ 10 40 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v* ] compile-call
] unit-test
[ double-2{ 10 100 } ] [
double-2{ 100 2000 } double-2{ 10 20 }
[ { double-2 double-2 } declare v/ ] compile-call
] unit-test
[ double-2{ -10 -20 } ] [
double-2{ -10 20 } double-2{ 10 -20 }
[ { double-2 double-2 } declare vmin ] compile-call
] unit-test
[ double-2{ 10 20 } ] [
double-2{ -10 20 } double-2{ 10 -20 }
[ { double-2 double-2 } declare vmax ] compile-call
] unit-test
[ 3.0 ] [
double-2{ 1 2 }
[ { double-2 } declare sum ] compile-call
] unit-test
[ 7.0 ] [
double-2{ 1 2 }
[ { double-2 } declare sum 4.0 + ] compile-call
] unit-test
[ 16.0 ] [
double-2{ 1 2 } double-2{ 2 7 }
[ { double-2 double-2 } declare v. ] compile-call
] unit-test
[ double-2{ 5 10 } ] [
5.0 double-2{ 1 2 }
[ { float double-2 } declare n*v ] compile-call
] unit-test
[ double-2{ 5 10 } ] [
double-2{ 1 2 } 5.0
[ { float double-2 } declare v*n ] compile-call
] unit-test
[ double-2{ 10 5 } ] [
10.0 double-2{ 1 2 }
[ { float double-2 } declare n/v ] compile-call
] unit-test
[ double-2{ 0.5 1 } ] [
double-2{ 1 2 } 2
[ { float double-2 } declare v/n ] compile-call
] unit-test
[ double-2{ 0 0 } ] [ double-2 new ] unit-test
[ double-2{ 1 0 } ] [
double-2{ 10 0 }
[ { double-2 } declare normalize ] compile-call
] unit-test
[ 5.0 ] [
double-2{ 1 2 }
[ { double-2 } declare norm-sq ] compile-call
] unit-test
[ t ] [
double-2{ 1 0 }
double-2{ 0 1 }
[ { double-2 double-2 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
[ double-4{ 1 2 3 4 } ] [
1 2 3 4 double-4-boa
] unit-test
[ double-4{ 1 1 1 1 } ] [
1 double-4-with
] unit-test
[ double-4{ 0 1 2 3 } ] [
1 double-4-with [ * ] map-index
] unit-test
[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
[ double-4{ 12 12 12 12 } ] [
12 [ double-4-with ] compile-call
] unit-test
[ double-4{ 1 2 3 4 } ] [
1 2 3 4 [ double-4-boa ] compile-call
] unit-test
[ double-4{ 11 22 33 44 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v+ ] compile-call
] unit-test
[ double-4{ -9 -18 -27 -36 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v- ] compile-call
] unit-test
[ double-4{ 10 40 90 160 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v* ] compile-call
] unit-test
[ double-4{ 10 100 1000 10000 } ] [
double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v/ ] compile-call
] unit-test
[ double-4{ -10 -20 -30 -40 } ] [
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
[ { double-4 double-4 } declare vmin ] compile-call
] unit-test
[ double-4{ 10 20 30 40 } ] [
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
[ { double-4 double-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
[ { double-4 double-4 } declare v. ] compile-call
] unit-test
[ double-4{ 5 10 15 20 } ] [
5.0 double-4{ 1 2 3 4 }
[ { float double-4 } declare n*v ] compile-call
] unit-test
[ double-4{ 5 10 15 20 } ] [
double-4{ 1 2 3 4 } 5.0
[ { float double-4 } declare v*n ] compile-call
] unit-test
[ double-4{ 10 5 2 5 } ] [
10.0 double-4{ 1 2 5 2 }
[ { float double-4 } declare n/v ] compile-call
] unit-test
[ double-4{ 0.5 1 1.5 2 } ] [
double-4{ 1 2 3 4 } 2
[ { float double-4 } declare v/n ] compile-call
] unit-test
[ double-4{ 1 0 0 0 } ] [
double-4{ 10 0 0 0 }
[ { double-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
double-4{ 1 0 0 0 }
double-4{ 0 1 0 0 }
[ { double-4 double-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
[ float-8{ 3 6 9 12 15 18 21 24 } ] [
float-8{ 1 2 3 4 5 6 7 8 }
float-8{ 2 4 6 8 10 12 14 16 }
[ { float-8 float-8 } declare v+ ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 1 2 3 4 5 6 7 8 }
float-8{ 2 4 6 8 10 12 14 16 }
[ { float-8 float-8 } declare v- ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
-0.5
float-8{ 2 4 6 8 10 12 14 16 }
[ { float float-8 } declare n*v ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 2 4 6 8 10 12 14 16 }
-0.5
[ { float-8 float } declare v*n ] compile-call
] unit-test
[ float-8{ 256 128 64 32 16 8 4 2 } ] [
256.0
float-8{ 1 2 4 8 16 32 64 128 }
[ { float float-8 } declare n/v ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 2 4 6 8 10 12 14 16 }
-2.0
[ { float-8 float } declare v/n ] compile-call
] unit-test
! Test puns; only on x86 ! Test puns; only on x86
cpu x86? [ cpu x86? [
[ double-2{ 4 1024 } ] [ [ double-2{ 4 1024 } ] [
@ -362,3 +46,179 @@ cpu x86? [
[ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
] unit-test ] unit-test
] when ] when
! Fuzz testing
CONSTANT: simd-classes
{
char-16
uchar-16
char-32
uchar-32
short-8
ushort-8
short-16
ushort-16
int-4
uint-4
int-8
uint-8
float-4
float-8
double-2
double-4
}
: with-ctors ( -- seq )
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
: boa-ctors ( -- seq )
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
: check-optimizer ( seq inputs quot -- )
[
[ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
[ [ call ] dip call ]
[ [ call ] dip compile-call ] 2tri = not
] compose filter ; inline
"== Checking -new constructors" print
[ { } ] [
simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer
] unit-test
[ { } ] [
simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
] unit-test
"== Checking -with constructors" print
[ { } ] [
with-ctors [
[ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
] check-optimizer
] unit-test
"== Checking -boa constructors" print
[ { } ] [
boa-ctors [
dup stack-effect in>> length
[ nip [ 1000 random ] [ ] replicate-as ]
[ fixnum <array> swap '[ _ declare _ execute ] ]
2bi
] check-optimizer
] unit-test
"== Checking vector operations" print
: random-vector ( class -- vec )
new [ drop 1000 random ] map ;
:: check-vector-op ( word inputs class elt-class -- inputs quot )
inputs [
[
{
{ +vector+ [ class random-vector ] }
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
} case
] [ ] map-as
] [
[
{
{ +vector+ [ class ] }
{ +scalar+ [ elt-class ] }
} case
] map
] bi
word '[ _ declare _ execute ] ;
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip float = [
[ drop { n/v v/n v/ normalize } member? not ] assoc-filter
] unless ;
: check-vector-ops ( class elt-class -- )
[ nip ops-to-check ] 2keep
'[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline
: simd-classes&reps ( -- alist )
simd-classes [
dup name>> [ "float" head? ] [ "double" head? ] bi or
float fixnum ?
] { } map>assoc ;
simd-classes&reps [
[ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test
] assoc-each
! Other regressions
[ 8000000 ] [
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
] unit-test
! Vector alien intrinsics
[ float-4{ 1 2 3 4 } ] [
[
float-4{ 1 2 3 4 }
underlying>> 0 float-4-rep alien-vector
] compile-call float-4 boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x float-4 }
{ y double-2 }
{ z double-4 }
{ w float-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test

View File

@ -1,15 +1,32 @@
! Copyright (C) 2009 Slava Pestov. ! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: alien.c-types cpu.architecture kernel USING: alien.c-types combinators fry kernel lexer math math.parser
math.vectors.simd.functor vocabs.loader ; math.vectors.simd.functor sequences splitting vocabs.generated
FROM: sequences => each ; vocabs.loader vocabs.parser words ;
IN: math.vectors.simd IN: math.vectors.simd
<< ERROR: bad-vector-size bits ;
{ double float char uchar short ushort int uint } <PRIVATE
[ [ define-simd-128 ] [ define-simd-256 ] bi ] each
>> : simd-vocab ( type -- vocab )
"math.vectors.simd.instances." prepend ;
"math.vectors.simd.alien" require : parse-simd-name ( string -- c-type quot )
"-" split1
[ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
* 8 * {
{ 128 [ [ define-simd-128 ] ] }
{ 256 [ [ define-simd-256 ] ] }
[ bad-vector-size ]
} case ;
PRIVATE>
: define-simd-vocab ( type -- vocab )
[ simd-vocab ]
[ '[ _ parse-simd-name call( type -- ) ] ] bi
generate-vocab ;
SYNTAX: SIMD:
scan define-simd-vocab use-vocab ;

View File

@ -4,7 +4,7 @@ USING: accessors alien alien.c-types alien.data alien.parser assocs
byte-arrays classes compiler.units functors kernel lexer libc math byte-arrays classes compiler.units functors kernel lexer libc math
math.vectors.specialization namespaces parser prettyprint.custom math.vectors.specialization namespaces parser prettyprint.custom
sequences sequences.private strings summary vocabs vocabs.loader sequences sequences.private strings summary vocabs vocabs.loader
vocabs.parser words fry combinators ; vocabs.parser vocabs.generated words fry combinators ;
IN: specialized-arrays IN: specialized-arrays
MIXIN: specialized-array MIXIN: specialized-array
@ -123,15 +123,6 @@ M: word (underlying-type) "c-type" word-prop ;
PRIVATE> PRIVATE>
: generate-vocab ( vocab-name quot -- vocab )
[ dup vocab [ ] ] dip '[
[
[
_ with-current-vocab
] with-compilation-unit
] keep
] ?if ; inline
: define-array-vocab ( type -- vocab ) : define-array-vocab ( type -- vocab )
underlying-type-name underlying-type-name
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi [ specialized-array-vocab ] [ '[ _ define-array ] ] bi

View File

@ -3,7 +3,7 @@
USING: accessors alien.c-types assocs compiler.units functors USING: accessors alien.c-types assocs compiler.units functors
growable kernel lexer namespaces parser prettyprint.custom growable kernel lexer namespaces parser prettyprint.custom
sequences specialized-arrays specialized-arrays.private strings sequences specialized-arrays specialized-arrays.private strings
vocabs vocabs.parser fry ; vocabs vocabs.parser vocabs.generated fry ;
QUALIFIED: vectors.functor QUALIFIED: vectors.functor
IN: specialized-vectors IN: specialized-vectors

View File

@ -0,0 +1,13 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: compiler.units fry kernel vocabs vocabs.parser ;
IN: vocabs.generated
: generate-vocab ( vocab-name quot -- vocab )
[ dup vocab [ ] ] dip '[
[
[
_ with-current-vocab
] with-compilation-unit
] keep
] ?if ; inline

View File

@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants
math.functions math.vectors math.vectors.simd prettyprint math.functions math.vectors math.vectors.simd prettyprint
combinators.smart sequences hints classes.struct combinators.smart sequences hints classes.struct
specialized-arrays ; specialized-arrays ;
SIMD: double-4
IN: benchmark.nbody-simd IN: benchmark.nbody-simd
: solar-mass ( -- x ) 4 pi sq * ; inline : solar-mass ( -- x ) 4 pi sq * ; inline

View File

@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp
io.encodings.binary kernel math math.constants math.functions io.encodings.binary kernel math math.constants math.functions
math.vectors math.vectors.simd math.parser make sequences math.vectors math.vectors.simd math.parser make sequences
sequences.private words hints classes.struct ; sequences.private words hints classes.struct ;
SIMD: double-4
IN: benchmark.raytracer-simd IN: benchmark.raytracer-simd
! parameters ! parameters

View File

@ -2,6 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel io math math.functions math.parser math.vectors USING: kernel io math math.functions math.parser math.vectors
math.vectors.simd sequences specialized-arrays ; math.vectors.simd sequences specialized-arrays ;
SIMD: float-4
SPECIALIZED-ARRAY: float-4 SPECIALIZED-ARRAY: float-4
IN: benchmark.simd-1 IN: benchmark.simd-1