math.vectors.simd: redesign to be more flexible, integer SIMD work in progress
parent
e5af501429
commit
ea2bcd69c7
|
@ -472,3 +472,11 @@ SYMBOLS:
|
||||||
\ ulong \ size_t typedef
|
\ ulong \ size_t typedef
|
||||||
] with-compilation-unit
|
] with-compilation-unit
|
||||||
|
|
||||||
|
M: char-16-rep rep-component-type drop char ;
|
||||||
|
M: uchar-16-rep rep-component-type drop uchar ;
|
||||||
|
M: short-8-rep rep-component-type drop short ;
|
||||||
|
M: ushort-8-rep rep-component-type drop ushort ;
|
||||||
|
M: int-4-rep rep-component-type drop int ;
|
||||||
|
M: uint-4-rep rep-component-type drop uint ;
|
||||||
|
M: float-4-rep rep-component-type drop float ;
|
||||||
|
M: double-2-rep rep-component-type drop double ;
|
||||||
|
|
|
@ -22,8 +22,6 @@ SINGLETONS: float-rep double-rep ;
|
||||||
|
|
||||||
! On x86, floating point registers are really vector registers
|
! On x86, floating point registers are really vector registers
|
||||||
SINGLETONS:
|
SINGLETONS:
|
||||||
float-4-rep
|
|
||||||
double-2-rep
|
|
||||||
char-16-rep
|
char-16-rep
|
||||||
uchar-16-rep
|
uchar-16-rep
|
||||||
short-8-rep
|
short-8-rep
|
||||||
|
@ -31,9 +29,11 @@ ushort-8-rep
|
||||||
int-4-rep
|
int-4-rep
|
||||||
uint-4-rep ;
|
uint-4-rep ;
|
||||||
|
|
||||||
UNION: vector-rep
|
SINGLETONS:
|
||||||
float-4-rep
|
float-4-rep
|
||||||
double-2-rep
|
double-2-rep ;
|
||||||
|
|
||||||
|
UNION: int-vector-rep
|
||||||
char-16-rep
|
char-16-rep
|
||||||
uchar-16-rep
|
uchar-16-rep
|
||||||
short-8-rep
|
short-8-rep
|
||||||
|
@ -41,6 +41,14 @@ ushort-8-rep
|
||||||
int-4-rep
|
int-4-rep
|
||||||
uint-4-rep ;
|
uint-4-rep ;
|
||||||
|
|
||||||
|
UNION: float-vector-rep
|
||||||
|
float-4-rep
|
||||||
|
double-2-rep ;
|
||||||
|
|
||||||
|
UNION: vector-rep
|
||||||
|
int-vector-rep
|
||||||
|
float-vector-rep ;
|
||||||
|
|
||||||
UNION: representation
|
UNION: representation
|
||||||
any-rep
|
any-rep
|
||||||
tagged-rep
|
tagged-rep
|
||||||
|
@ -76,10 +84,15 @@ M: double-rep rep-size drop 8 ;
|
||||||
M: stack-params rep-size drop cell ;
|
M: stack-params rep-size drop cell ;
|
||||||
M: vector-rep rep-size drop 16 ;
|
M: vector-rep rep-size drop 16 ;
|
||||||
|
|
||||||
|
GENERIC: rep-component-type ( rep -- n )
|
||||||
|
|
||||||
|
! Methods defined in alien.c-types
|
||||||
|
|
||||||
GENERIC: scalar-rep-of ( rep -- rep' )
|
GENERIC: scalar-rep-of ( rep -- rep' )
|
||||||
|
|
||||||
M: float-4-rep scalar-rep-of drop float-rep ;
|
M: float-4-rep scalar-rep-of drop float-rep ;
|
||||||
M: double-2-rep scalar-rep-of drop double-rep ;
|
M: double-2-rep scalar-rep-of drop double-rep ;
|
||||||
|
M: int-vector-rep scalar-rep-of drop int-rep ;
|
||||||
|
|
||||||
! Mapping from register class to machine registers
|
! Mapping from register class to machine registers
|
||||||
HOOK: machine-registers cpu ( -- assoc )
|
HOOK: machine-registers cpu ( -- assoc )
|
||||||
|
@ -167,7 +180,6 @@ HOOK: %unbox-vector cpu ( dst src rep -- )
|
||||||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
|
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -177,6 +189,18 @@ HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||||
|
|
||||||
|
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
|
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||||
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %sub-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %mul-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %div-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %min-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %max-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||||
|
|
||||||
HOOK: %unbox-alien cpu ( dst src -- )
|
HOOK: %unbox-alien cpu ( dst src -- )
|
||||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||||
HOOK: %box-alien cpu ( dst src temp -- )
|
HOOK: %box-alien cpu ( dst src temp -- )
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: system kernel math math.order math.parser namespaces
|
USING: system kernel memoize math math.order math.parser
|
||||||
alien.c-types alien.syntax combinators locals init io cpu.x86
|
namespaces alien.c-types alien.syntax combinators locals init io
|
||||||
compiler compiler.units accessors ;
|
compiler compiler.units accessors ;
|
||||||
IN: cpu.x86.features
|
IN: cpu.x86.features
|
||||||
|
|
||||||
|
@ -13,7 +13,16 @@ FUNCTION: longlong read_timestamp_counter ( ) ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
ALIAS: sse-version sse_version
|
: sse-version ( -- n )
|
||||||
|
sse_version
|
||||||
|
"sse-version" get string>number [ min ] when* ; foldable
|
||||||
|
|
||||||
|
: sse? ( -- ? ) sse-version 10 >= ; foldable
|
||||||
|
: sse2? ( -- ? ) sse-version 20 >= ; foldable
|
||||||
|
: sse3? ( -- ? ) sse-version 30 >= ; foldable
|
||||||
|
: ssse3? ( -- ? ) sse-version 33 >= ; foldable
|
||||||
|
: sse4.1? ( -- ? ) sse-version 41 >= ; foldable
|
||||||
|
: sse4.2? ( -- ? ) sse-version 42 >= ; foldable
|
||||||
|
|
||||||
: sse-string ( version -- string )
|
: sse-string ( version -- string )
|
||||||
{
|
{
|
||||||
|
@ -32,37 +41,3 @@ M: x86 instruction-count read_timestamp_counter ;
|
||||||
|
|
||||||
: count-instructions ( quot -- n )
|
: count-instructions ( quot -- n )
|
||||||
instruction-count [ call ] dip instruction-count swap - ; inline
|
instruction-count [ call ] dip instruction-count swap - ; inline
|
||||||
|
|
||||||
USING: cpu.x86.features cpu.x86.features.private ;
|
|
||||||
|
|
||||||
:: install-sse-check ( version -- )
|
|
||||||
[
|
|
||||||
sse-version version < [
|
|
||||||
"This image was built to use " write
|
|
||||||
version sse-string write
|
|
||||||
" but your CPU only supports " write
|
|
||||||
sse-version sse-string write "." print
|
|
||||||
"You will need to bootstrap Factor again." print
|
|
||||||
flush
|
|
||||||
1 exit
|
|
||||||
] when
|
|
||||||
] "cpu.x86" add-init-hook ;
|
|
||||||
|
|
||||||
: enable-sse ( version -- )
|
|
||||||
{
|
|
||||||
{ 00 [ ] }
|
|
||||||
{ 10 [ ] }
|
|
||||||
{ 20 [ enable-sse2 ] }
|
|
||||||
{ 30 [ enable-sse3 ] }
|
|
||||||
{ 33 [ enable-sse3 ] }
|
|
||||||
{ 41 [ enable-sse3 ] }
|
|
||||||
{ 42 [ enable-sse3 ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
[ { sse_version } compile ] with-optimizer
|
|
||||||
|
|
||||||
"Checking for multimedia extensions: " write sse-version
|
|
||||||
"sse-version" get [ string>number min ] when*
|
|
||||||
[ sse-string write " detected" print ]
|
|
||||||
[ install-sse-check ]
|
|
||||||
[ enable-sse ] tri
|
|
||||||
|
|
|
@ -4,7 +4,8 @@ USING: accessors assocs alien alien.c-types arrays strings
|
||||||
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
|
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
|
||||||
cpu.architecture kernel kernel.private math memory namespaces make
|
cpu.architecture kernel kernel.private math memory namespaces make
|
||||||
sequences words system layouts combinators math.order fry locals
|
sequences words system layouts combinators math.order fry locals
|
||||||
compiler.constants byte-arrays
|
compiler.constants byte-arrays io macros quotations cpu.x86.features
|
||||||
|
cpu.x86.features.private compiler compiler.units init
|
||||||
compiler.cfg.registers
|
compiler.cfg.registers
|
||||||
compiler.cfg.instructions
|
compiler.cfg.instructions
|
||||||
compiler.cfg.intrinsics
|
compiler.cfg.intrinsics
|
||||||
|
@ -250,12 +251,26 @@ M:: x86 %unbox-vector ( dst src rep -- )
|
||||||
dst src byte-array-offset [+]
|
dst src byte-array-offset [+]
|
||||||
rep copy-register ;
|
rep copy-register ;
|
||||||
|
|
||||||
|
MACRO: available-reps ( alist -- )
|
||||||
|
! Each SSE version adds new representations and supports
|
||||||
|
! all old ones
|
||||||
|
unzip { } [ append ] accumulate rest swap suffix
|
||||||
|
[ [ 1quotation ] map ] bi@ zip
|
||||||
|
reverse [ { } ] suffix
|
||||||
|
'[ _ cond ] ;
|
||||||
|
|
||||||
M: x86 %broadcast-vector ( dst src rep -- )
|
M: x86 %broadcast-vector ( dst src rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
|
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
|
||||||
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
|
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %broadcast-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||||
rep {
|
rep {
|
||||||
{
|
{
|
||||||
|
@ -269,6 +284,11 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||||
}
|
}
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %gather-vector-4-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||||
rep {
|
rep {
|
||||||
{
|
{
|
||||||
|
@ -280,6 +300,11 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||||
}
|
}
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %gather-vector-2-reps
|
||||||
|
{
|
||||||
|
{ sse2? { double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ADDPS ] }
|
{ float-4-rep [ ADDPS ] }
|
||||||
|
@ -292,6 +317,12 @@ M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PADDD ] }
|
{ uint-4-rep [ PADDD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %add-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ SUBPS ] }
|
{ float-4-rep [ SUBPS ] }
|
||||||
|
@ -304,43 +335,92 @@ M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PSUBD ] }
|
{ uint-4-rep [ PSUBD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %sub-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MULPS ] }
|
{ float-4-rep [ MULPS ] }
|
||||||
{ double-2-rep [ MULPD ] }
|
{ double-2-rep [ MULPD ] }
|
||||||
{ int-4-rep [ PMULLW ] }
|
{ short-8-rep [ PMULLW ] }
|
||||||
|
{ ushort-8-rep [ PMULLW ] }
|
||||||
|
{ int-4-rep [ PMULLD ] }
|
||||||
|
{ uint-4-rep [ PMULLD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %mul-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ DIVPS ] }
|
{ float-4-rep [ DIVPS ] }
|
||||||
{ double-2-rep [ DIVPD ] }
|
{ double-2-rep [ DIVPD ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %div-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MINPS ] }
|
{ float-4-rep [ MINPS ] }
|
||||||
{ double-2-rep [ MINPD ] }
|
{ double-2-rep [ MINPD ] }
|
||||||
|
{ uchar-16-rep [ PMINUB ] }
|
||||||
|
{ short-8-rep [ PMINSW ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %min-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MAXPS ] }
|
{ float-4-rep [ MAXPS ] }
|
||||||
{ double-2-rep [ MAXPD ] }
|
{ double-2-rep [ MAXPD ] }
|
||||||
|
{ uchar-16-rep [ PMAXUB ] }
|
||||||
|
{ short-8-rep [ PMAXSW ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %max-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %sqrt-vector ( dst src rep -- )
|
M: x86 %sqrt-vector ( dst src rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ SQRTPS ] }
|
{ float-4-rep [ SQRTPS ] }
|
||||||
{ double-2-rep [ SQRTPD ] }
|
{ double-2-rep [ SQRTPD ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %sqrt-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %horizontal-add-vector-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep short-8-rep uchar-16-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %unbox-alien ( dst src -- )
|
M: x86 %unbox-alien ( dst src -- )
|
||||||
alien-offset [+] MOV ;
|
alien-offset [+] MOV ;
|
||||||
|
|
||||||
|
@ -775,3 +855,34 @@ M: x86 small-enough? ( n -- ? )
|
||||||
enable-sse3-simd ;
|
enable-sse3-simd ;
|
||||||
|
|
||||||
enable-min/max
|
enable-min/max
|
||||||
|
|
||||||
|
:: install-sse-check ( version -- )
|
||||||
|
[
|
||||||
|
sse-version version < [
|
||||||
|
"This image was built to use " write
|
||||||
|
version sse-string write
|
||||||
|
" but your CPU only supports " write
|
||||||
|
sse-version sse-string write "." print
|
||||||
|
"You will need to bootstrap Factor again." print
|
||||||
|
flush
|
||||||
|
1 exit
|
||||||
|
] when
|
||||||
|
] "cpu.x86" add-init-hook ;
|
||||||
|
|
||||||
|
: enable-sse ( version -- )
|
||||||
|
{
|
||||||
|
{ 00 [ ] }
|
||||||
|
{ 10 [ ] }
|
||||||
|
{ 20 [ enable-sse2 ] }
|
||||||
|
{ 30 [ enable-sse3 ] }
|
||||||
|
{ 33 [ enable-sse3 ] }
|
||||||
|
{ 41 [ enable-sse3 ] }
|
||||||
|
{ 42 [ enable-sse3 ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
[ { sse_version } compile ] with-optimizer
|
||||||
|
|
||||||
|
"Checking for multimedia extensions: " write sse-version 30 min
|
||||||
|
[ sse-string write " detected" print ]
|
||||||
|
[ install-sse-check ]
|
||||||
|
[ enable-sse ] tri
|
||||||
|
|
|
@ -31,9 +31,7 @@ M: x87-env (set-fp-env-register)
|
||||||
set_x87_env ;
|
set_x87_env ;
|
||||||
|
|
||||||
M: x86 (fp-env-registers)
|
M: x86 (fp-env-registers)
|
||||||
sse-version 20 >=
|
sse2? [ <sse-env> <x87-env> 2array ] [ <x87-env> 1array ] if ;
|
||||||
[ <sse-env> <x87-env> 2array ]
|
|
||||||
[ <x87-env> 1array ] if ;
|
|
||||||
|
|
||||||
CONSTANT: sse-exception-flag-bits HEX: 3f
|
CONSTANT: sse-exception-flag-bits HEX: 3f
|
||||||
CONSTANT: sse-exception-flag>bit
|
CONSTANT: sse-exception-flag>bit
|
||||||
|
|
|
@ -1,24 +1,94 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors alien.c-types byte-arrays classes functors
|
USING: accessors alien.c-types assocs byte-arrays classes
|
||||||
kernel math parser prettyprint.custom sequences
|
effects fry functors generalizations kernel literals locals
|
||||||
sequences.private literals ;
|
math math.functions math.vectors math.vectors.simd.intrinsics
|
||||||
|
math.vectors.specialization parser prettyprint.custom sequences
|
||||||
|
sequences.private strings words definitions macros cpu.architecture ;
|
||||||
IN: math.vectors.simd.functor
|
IN: math.vectors.simd.functor
|
||||||
|
|
||||||
ERROR: bad-length got expected ;
|
ERROR: bad-length got expected ;
|
||||||
|
|
||||||
|
MACRO: simd-boa ( rep class -- simd-array )
|
||||||
|
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
|
||||||
|
|
||||||
|
:: define-boa-custom-inlining ( word rep class -- )
|
||||||
|
word [
|
||||||
|
drop
|
||||||
|
rep rep rep-gather-word supported-simd-op? [
|
||||||
|
[ rep (simd-boa) class boa ]
|
||||||
|
] [ word def>> ] if
|
||||||
|
] "custom-inlining" set-word-prop ;
|
||||||
|
|
||||||
|
: simd-with ( rep class x -- simd-array )
|
||||||
|
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
|
||||||
|
|
||||||
|
:: define-with-custom-inlining ( word rep class -- )
|
||||||
|
word [
|
||||||
|
drop
|
||||||
|
rep \ (simd-broadcast) supported-simd-op? [
|
||||||
|
[ rep rep-coerce rep (simd-broadcast) class boa ]
|
||||||
|
] [ word def>> ] if
|
||||||
|
] "custom-inlining" set-word-prop ;
|
||||||
|
|
||||||
|
: boa-effect ( rep n -- effect )
|
||||||
|
[ rep-components ] dip *
|
||||||
|
[ CHAR: a + 1string ] map
|
||||||
|
{ "simd-vector" } <effect> ;
|
||||||
|
|
||||||
|
: supported-simd-ops ( assoc rep -- assoc' )
|
||||||
|
[
|
||||||
|
{
|
||||||
|
{ v+ (simd-v+) }
|
||||||
|
{ v- (simd-v-) }
|
||||||
|
{ v* (simd-v*) }
|
||||||
|
{ v/ (simd-v/) }
|
||||||
|
{ vmin (simd-vmin) }
|
||||||
|
{ vmax (simd-vmax) }
|
||||||
|
{ sum (simd-sum) }
|
||||||
|
}
|
||||||
|
] dip
|
||||||
|
'[ nip _ swap supported-simd-op? ] assoc-filter
|
||||||
|
'[ drop _ key? ] assoc-filter ;
|
||||||
|
|
||||||
|
:: high-level-ops ( ctor -- assoc )
|
||||||
|
! Some SIMD operations are defined in terms of others.
|
||||||
|
{
|
||||||
|
{ vneg [ [ dup v- ] keep v- ] }
|
||||||
|
{ v. [ v* sum ] }
|
||||||
|
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||||
|
{ v+n [ ctor execute v+ ] }
|
||||||
|
{ n-v [ [ ctor execute ] dip v- ] }
|
||||||
|
{ v-n [ ctor execute v- ] }
|
||||||
|
{ n*v [ [ ctor execute ] dip v* ] }
|
||||||
|
{ v*n [ ctor execute v* ] }
|
||||||
|
{ n/v [ [ ctor execute ] dip v/ ] }
|
||||||
|
{ v/n [ ctor execute v/ ] }
|
||||||
|
{ norm-sq [ dup v. assert-positive ] }
|
||||||
|
{ norm [ norm-sq sqrt ] }
|
||||||
|
{ normalize [ dup norm v/n ] }
|
||||||
|
{ distance [ v- norm ] }
|
||||||
|
} ;
|
||||||
|
|
||||||
|
:: simd-vector-words ( class ctor rep assoc -- )
|
||||||
|
class
|
||||||
|
rep rep-component-type c-type-boxed-class
|
||||||
|
assoc rep supported-simd-ops
|
||||||
|
ctor high-level-ops assoc-union
|
||||||
|
specialize-vector-words ;
|
||||||
|
|
||||||
FUNCTOR: define-simd-128 ( T -- )
|
FUNCTOR: define-simd-128 ( T -- )
|
||||||
|
|
||||||
T-TYPE IS ${T}
|
N [ 16 T heap-size /i ]
|
||||||
|
|
||||||
N [ 16 T-TYPE heap-size /i ]
|
|
||||||
|
|
||||||
A DEFINES-CLASS ${T}-${N}
|
A DEFINES-CLASS ${T}-${N}
|
||||||
|
A-boa DEFINES ${A}-boa
|
||||||
|
A-with DEFINES ${A}-with
|
||||||
>A DEFINES >${A}
|
>A DEFINES >${A}
|
||||||
A{ DEFINES ${A}{
|
A{ DEFINES ${A}{
|
||||||
|
|
||||||
NTH [ T-TYPE dup c-type-getter-boxer array-accessor ]
|
NTH [ T dup c-type-getter-boxer array-accessor ]
|
||||||
SET-NTH [ T-TYPE dup c-setter array-accessor ]
|
SET-NTH [ T dup c-setter array-accessor ]
|
||||||
|
|
||||||
A-rep IS ${A}-rep
|
A-rep IS ${A}-rep
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
|
@ -59,6 +129,16 @@ M: A pprint* pprint-object ;
|
||||||
|
|
||||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||||
|
|
||||||
|
: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
|
||||||
|
|
||||||
|
\ A-with \ A-rep \ A define-with-custom-inlining
|
||||||
|
|
||||||
|
\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
|
||||||
|
|
||||||
|
\ A-rep rep-gather-word [
|
||||||
|
\ A-boa \ A-rep \ A define-boa-custom-inlining
|
||||||
|
] when
|
||||||
|
|
||||||
INSTANCE: A sequence
|
INSTANCE: A sequence
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -69,6 +149,16 @@ INSTANCE: A sequence
|
||||||
: A-v->n-op ( v quot -- n )
|
: A-v->n-op ( v quot -- n )
|
||||||
[ underlying>> A-rep ] dip call ; inline
|
[ underlying>> A-rep ] dip call ; inline
|
||||||
|
|
||||||
|
\ A \ A-with \ A-rep H{
|
||||||
|
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||||
|
{ sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
|
||||||
|
} simd-vector-words
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
;FUNCTOR
|
;FUNCTOR
|
||||||
|
@ -76,14 +166,16 @@ PRIVATE>
|
||||||
! Synthesize 256-bit vectors from a pair of 128-bit vectors
|
! Synthesize 256-bit vectors from a pair of 128-bit vectors
|
||||||
FUNCTOR: define-simd-256 ( T -- )
|
FUNCTOR: define-simd-256 ( T -- )
|
||||||
|
|
||||||
T-TYPE IS ${T}
|
N [ 32 T heap-size /i ]
|
||||||
|
|
||||||
N [ 32 T-TYPE heap-size /i ]
|
|
||||||
|
|
||||||
N/2 [ N 2 / ]
|
N/2 [ N 2 / ]
|
||||||
A/2 IS ${T}-${N/2}
|
A/2 IS ${T}-${N/2}
|
||||||
|
A/2-boa IS ${A/2}-boa
|
||||||
|
A/2-with IS ${A/2}-with
|
||||||
|
|
||||||
A DEFINES-CLASS ${T}-${N}
|
A DEFINES-CLASS ${T}-${N}
|
||||||
|
A-boa DEFINES ${A}-boa
|
||||||
|
A-with DEFINES ${A}-with
|
||||||
>A DEFINES >${A}
|
>A DEFINES >${A}
|
||||||
A{ DEFINES ${A}{
|
A{ DEFINES ${A}{
|
||||||
|
|
||||||
|
@ -137,6 +229,16 @@ M: A >pprint-sequence ;
|
||||||
|
|
||||||
M: A pprint* pprint-object ;
|
M: A pprint* pprint-object ;
|
||||||
|
|
||||||
|
: A-with ( x -- simd-array )
|
||||||
|
[ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
|
||||||
|
\ A boa ; inline
|
||||||
|
|
||||||
|
: A-boa ( ... -- simd-array )
|
||||||
|
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
|
||||||
|
\ A boa ;
|
||||||
|
|
||||||
|
\ A-rep 2 boa-effect \ A-boa set-stack-effect
|
||||||
|
|
||||||
INSTANCE: A sequence
|
INSTANCE: A sequence
|
||||||
|
|
||||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||||
|
@ -148,4 +250,14 @@ INSTANCE: A sequence
|
||||||
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
|
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
|
||||||
dip call ; inline
|
dip call ; inline
|
||||||
|
|
||||||
|
\ A \ A-with \ A-rep H{
|
||||||
|
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||||
|
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||||
|
{ sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
|
||||||
|
} simd-vector-words
|
||||||
|
|
||||||
;FUNCTOR
|
;FUNCTOR
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
IN: math.vectors.simd.intrinsics.tests
|
||||||
|
USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
|
||||||
|
|
||||||
|
[ 16 ] [ uchar-16-rep rep-components ] unit-test
|
||||||
|
[ 16 ] [ char-16-rep rep-components ] unit-test
|
||||||
|
[ 8 ] [ ushort-8-rep rep-components ] unit-test
|
||||||
|
[ 8 ] [ short-8-rep rep-components ] unit-test
|
||||||
|
[ 4 ] [ uint-4-rep rep-components ] unit-test
|
||||||
|
[ 4 ] [ int-4-rep rep-components ] unit-test
|
||||||
|
[ 4 ] [ float-4-rep rep-components ] unit-test
|
||||||
|
[ 2 ] [ double-2-rep rep-components ] unit-test
|
||||||
|
|
||||||
|
{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
|
||||||
|
{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
|
||||||
|
{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
|
||||||
|
{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel alien alien.data cpu.architecture libc ;
|
USING: alien alien.c-types alien.data assocs combinators
|
||||||
|
cpu.architecture fry generalizations kernel libc macros math
|
||||||
|
sequences ;
|
||||||
IN: math.vectors.simd.intrinsics
|
IN: math.vectors.simd.intrinsics
|
||||||
|
|
||||||
ERROR: bad-simd-call ;
|
ERROR: bad-simd-call ;
|
||||||
|
@ -26,3 +28,53 @@ ERROR: bad-simd-call ;
|
||||||
! Inefficient version for when intrinsics are missing
|
! Inefficient version for when intrinsics are missing
|
||||||
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
||||||
|
|
||||||
|
<<
|
||||||
|
|
||||||
|
: rep-components ( rep -- n )
|
||||||
|
16 swap rep-component-type heap-size /i ; foldable
|
||||||
|
|
||||||
|
: rep-coercer ( rep -- quot )
|
||||||
|
{
|
||||||
|
{ [ dup int-vector-rep? ] [ [ >fixnum ] ] }
|
||||||
|
{ [ dup float-vector-rep? ] [ [ >float ] ] }
|
||||||
|
} cond nip ; foldable
|
||||||
|
|
||||||
|
: rep-coerce ( value rep -- value' )
|
||||||
|
rep-coercer call( value -- value' ) ; inline
|
||||||
|
|
||||||
|
CONSTANT: rep-gather-words
|
||||||
|
{
|
||||||
|
{ 2 (simd-gather-2) }
|
||||||
|
{ 4 (simd-gather-4) }
|
||||||
|
}
|
||||||
|
|
||||||
|
: rep-gather-word ( rep -- word )
|
||||||
|
rep-components rep-gather-words at ;
|
||||||
|
|
||||||
|
>>
|
||||||
|
|
||||||
|
MACRO: (simd-boa) ( rep -- quot )
|
||||||
|
{
|
||||||
|
[ rep-coercer ]
|
||||||
|
[ rep-components ]
|
||||||
|
[ ]
|
||||||
|
[ rep-gather-word ]
|
||||||
|
} cleave
|
||||||
|
'[ _ _ napply _ _ execute ] ;
|
||||||
|
|
||||||
|
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||||
|
|
||||||
|
M: vector-rep supported-simd-op?
|
||||||
|
{
|
||||||
|
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||||
|
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||||
|
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||||
|
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||||
|
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||||
|
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||||
|
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||||
|
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||||
|
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||||
|
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||||
|
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||||
|
} case member? ;
|
||||||
|
|
|
@ -43,22 +43,6 @@ $nl
|
||||||
}
|
}
|
||||||
"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
|
"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
|
||||||
$nl
|
$nl
|
||||||
"Operations on " { $link float-4 } " instances:"
|
|
||||||
{ $subsection float-4-with }
|
|
||||||
{ $subsection float-4-boa }
|
|
||||||
{ $subsection POSTPONE: float-4{ }
|
|
||||||
"Operations on " { $link double-2 } " instances:"
|
|
||||||
{ $subsection double-2-with }
|
|
||||||
{ $subsection double-2-boa }
|
|
||||||
{ $subsection POSTPONE: double-2{ }
|
|
||||||
"Operations on " { $link float-8 } " instances:"
|
|
||||||
{ $subsection float-8-with }
|
|
||||||
{ $subsection float-8-boa }
|
|
||||||
{ $subsection POSTPONE: float-8{ }
|
|
||||||
"Operations on " { $link double-4 } " instances:"
|
|
||||||
{ $subsection double-4-with }
|
|
||||||
{ $subsection double-4-boa }
|
|
||||||
{ $subsection POSTPONE: double-4{ }
|
|
||||||
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
|
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
|
||||||
{ $see-also "c-types-specs" } ;
|
{ $see-also "c-types-specs" } ;
|
||||||
|
|
||||||
|
@ -184,72 +168,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
||||||
{ $subsection "math.vectors.simd.alien" }
|
{ $subsection "math.vectors.simd.alien" }
|
||||||
{ $subsection "math.vectors.simd.intrinsics" } ;
|
{ $subsection "math.vectors.simd.intrinsics" } ;
|
||||||
|
|
||||||
! ! ! float-4
|
|
||||||
|
|
||||||
HELP: float-4
|
|
||||||
{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ;
|
|
||||||
|
|
||||||
HELP: float-4-with
|
|
||||||
{ $values { "x" float } { "simd-array" float-4 } }
|
|
||||||
{ $description "Creates a new vector with all four components equal to a scalar." } ;
|
|
||||||
|
|
||||||
HELP: float-4-boa
|
|
||||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } }
|
|
||||||
{ $description "Creates a new vector from four scalar components." } ;
|
|
||||||
|
|
||||||
HELP: float-4{
|
|
||||||
{ $syntax "float-4{ a b c d }" }
|
|
||||||
{ $description "Literal syntax for a " { $link float-4 } "." } ;
|
|
||||||
|
|
||||||
! ! ! double-2
|
|
||||||
|
|
||||||
HELP: double-2
|
|
||||||
{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ;
|
|
||||||
|
|
||||||
HELP: double-2-with
|
|
||||||
{ $values { "x" float } { "simd-array" double-2 } }
|
|
||||||
{ $description "Creates a new vector with both components equal to a scalar." } ;
|
|
||||||
|
|
||||||
HELP: double-2-boa
|
|
||||||
{ $values { "a" float } { "b" float } { "simd-array" double-2 } }
|
|
||||||
{ $description "Creates a new vector from two scalar components." } ;
|
|
||||||
|
|
||||||
HELP: double-2{
|
|
||||||
{ $syntax "double-2{ a b }" }
|
|
||||||
{ $description "Literal syntax for a " { $link double-2 } "." } ;
|
|
||||||
|
|
||||||
! ! ! float-8
|
|
||||||
|
|
||||||
HELP: float-8
|
|
||||||
{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ;
|
|
||||||
|
|
||||||
HELP: float-8-with
|
|
||||||
{ $values { "x" float } { "simd-array" float-8 } }
|
|
||||||
{ $description "Creates a new vector with all eight components equal to a scalar." } ;
|
|
||||||
|
|
||||||
HELP: float-8-boa
|
|
||||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } }
|
|
||||||
{ $description "Creates a new vector from eight scalar components." } ;
|
|
||||||
|
|
||||||
HELP: float-8{
|
|
||||||
{ $syntax "float-8{ a b c d e f g h }" }
|
|
||||||
{ $description "Literal syntax for a " { $link float-8 } "." } ;
|
|
||||||
|
|
||||||
! ! ! double-4
|
|
||||||
|
|
||||||
HELP: double-4
|
|
||||||
{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ;
|
|
||||||
|
|
||||||
HELP: double-4-with
|
|
||||||
{ $values { "x" float } { "simd-array" double-4 } }
|
|
||||||
{ $description "Creates a new vector with all four components equal to a scalar." } ;
|
|
||||||
|
|
||||||
HELP: double-4-boa
|
|
||||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } }
|
|
||||||
{ $description "Creates a new vector from four scalar components." } ;
|
|
||||||
|
|
||||||
HELP: double-4{
|
|
||||||
{ $syntax "double-4{ a b c d }" }
|
|
||||||
{ $description "Literal syntax for a " { $link double-4 } "." } ;
|
|
||||||
|
|
||||||
ABOUT: "math.vectors.simd"
|
ABOUT: "math.vectors.simd"
|
||||||
|
|
|
@ -1,185 +1,15 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors alien.c-types byte-arrays cpu.architecture
|
USING: alien.c-types cpu.architecture kernel
|
||||||
kernel math math.functions math.vectors
|
math.vectors.simd.functor vocabs.loader ;
|
||||||
math.vectors.simd.functor math.vectors.simd.intrinsics
|
FROM: sequences => each ;
|
||||||
math.vectors.specialization parser prettyprint.custom sequences
|
|
||||||
sequences.private locals assocs words fry ;
|
|
||||||
FROM: alien.c-types => float ;
|
|
||||||
QUALIFIED-WITH: math m
|
|
||||||
IN: math.vectors.simd
|
IN: math.vectors.simd
|
||||||
|
|
||||||
<<
|
<<
|
||||||
|
|
||||||
DEFER: float-4
|
{ double float char uchar short ushort int uint }
|
||||||
DEFER: double-2
|
[ [ define-simd-128 ] [ define-simd-256 ] bi ] each
|
||||||
DEFER: float-8
|
|
||||||
DEFER: double-4
|
|
||||||
|
|
||||||
"double" define-simd-128
|
|
||||||
"float" define-simd-128
|
|
||||||
"double" define-simd-256
|
|
||||||
"float" define-simd-256
|
|
||||||
|
|
||||||
>>
|
>>
|
||||||
|
|
||||||
: float-4-with ( x -- simd-array )
|
|
||||||
[ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
|
|
||||||
|
|
||||||
: float-4-boa ( a b c d -- simd-array )
|
|
||||||
\ float-4 new 4sequence ;
|
|
||||||
|
|
||||||
: double-2-with ( x -- simd-array )
|
|
||||||
[ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
|
|
||||||
|
|
||||||
: double-2-boa ( a b -- simd-array )
|
|
||||||
\ double-2 new 2sequence ;
|
|
||||||
|
|
||||||
! More efficient expansions for the above, used when SIMD is
|
|
||||||
! actually available.
|
|
||||||
|
|
||||||
<<
|
|
||||||
|
|
||||||
\ float-4-with [
|
|
||||||
drop
|
|
||||||
\ (simd-broadcast) "intrinsic" word-prop [
|
|
||||||
[ >float float-4-rep (simd-broadcast) \ float-4 boa ]
|
|
||||||
] [ \ float-4-with def>> ] if
|
|
||||||
] "custom-inlining" set-word-prop
|
|
||||||
|
|
||||||
\ float-4-boa [
|
|
||||||
drop
|
|
||||||
\ (simd-gather-4) "intrinsic" word-prop [
|
|
||||||
[| a b c d |
|
|
||||||
a >float b >float c >float d >float
|
|
||||||
float-4-rep (simd-gather-4) \ float-4 boa
|
|
||||||
]
|
|
||||||
] [ \ float-4-boa def>> ] if
|
|
||||||
] "custom-inlining" set-word-prop
|
|
||||||
|
|
||||||
\ double-2-with [
|
|
||||||
drop
|
|
||||||
\ (simd-broadcast) "intrinsic" word-prop [
|
|
||||||
[ >float double-2-rep (simd-broadcast) \ double-2 boa ]
|
|
||||||
] [ \ double-2-with def>> ] if
|
|
||||||
] "custom-inlining" set-word-prop
|
|
||||||
|
|
||||||
\ double-2-boa [
|
|
||||||
drop
|
|
||||||
\ (simd-gather-4) "intrinsic" word-prop [
|
|
||||||
[ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
|
|
||||||
] [ \ double-2-boa def>> ] if
|
|
||||||
] "custom-inlining" set-word-prop
|
|
||||||
|
|
||||||
>>
|
|
||||||
|
|
||||||
: float-8-with ( x -- simd-array )
|
|
||||||
[ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
|
|
||||||
\ float-8 boa ; inline
|
|
||||||
|
|
||||||
:: float-8-boa ( a b c d e f g h -- simd-array )
|
|
||||||
a b c d float-4-boa
|
|
||||||
e f g h float-4-boa
|
|
||||||
[ underlying>> ] bi@
|
|
||||||
\ float-8 boa ; inline
|
|
||||||
|
|
||||||
: double-4-with ( x -- simd-array )
|
|
||||||
[ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
|
|
||||||
\ double-4 boa ; inline
|
|
||||||
|
|
||||||
:: double-4-boa ( a b c d -- simd-array )
|
|
||||||
a b double-2-boa
|
|
||||||
c d double-2-boa
|
|
||||||
[ underlying>> ] bi@
|
|
||||||
\ double-4 boa ; inline
|
|
||||||
|
|
||||||
<<
|
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
|
|
||||||
! Filter out operations that are not available, eg horizontal adds
|
|
||||||
! on SSE2. Fallback code in math.vectors is used in that case.
|
|
||||||
|
|
||||||
: supported-simd-ops ( assoc -- assoc' )
|
|
||||||
{
|
|
||||||
{ v+ (simd-v+) }
|
|
||||||
{ v- (simd-v-) }
|
|
||||||
{ v* (simd-v*) }
|
|
||||||
{ v/ (simd-v/) }
|
|
||||||
{ vmin (simd-vmin) }
|
|
||||||
{ vmax (simd-vmax) }
|
|
||||||
{ sum (simd-sum) }
|
|
||||||
} [ nip "intrinsic" word-prop ] assoc-filter
|
|
||||||
'[ drop _ key? ] assoc-filter ;
|
|
||||||
|
|
||||||
! Some SIMD operations are defined in terms of others.
|
|
||||||
|
|
||||||
:: high-level-ops ( ctor -- assoc )
|
|
||||||
{
|
|
||||||
{ vneg [ [ dup v- ] keep v- ] }
|
|
||||||
{ v. [ v* sum ] }
|
|
||||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
|
||||||
{ v+n [ ctor execute v+ ] }
|
|
||||||
{ n-v [ [ ctor execute ] dip v- ] }
|
|
||||||
{ v-n [ ctor execute v- ] }
|
|
||||||
{ n*v [ [ ctor execute ] dip v* ] }
|
|
||||||
{ v*n [ ctor execute v* ] }
|
|
||||||
{ n/v [ [ ctor execute ] dip v/ ] }
|
|
||||||
{ v/n [ ctor execute v/ ] }
|
|
||||||
{ norm-sq [ dup v. assert-positive ] }
|
|
||||||
{ norm [ norm-sq sqrt ] }
|
|
||||||
{ normalize [ dup norm v/n ] }
|
|
||||||
{ distance [ v- norm ] }
|
|
||||||
} ;
|
|
||||||
|
|
||||||
:: simd-vector-words ( class ctor elt-type assoc -- )
|
|
||||||
class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
|
|
||||||
specialize-vector-words ;
|
|
||||||
|
|
||||||
PRIVATE>
|
|
||||||
|
|
||||||
\ float-4 \ float-4-with m:float H{
|
|
||||||
{ v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
|
|
||||||
{ v- [ [ (simd-v-) ] float-4-vv->v-op ] }
|
|
||||||
{ v* [ [ (simd-v*) ] float-4-vv->v-op ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
|
|
||||||
{ sum [ [ (simd-sum) ] float-4-v->n-op ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
\ double-2 \ double-2-with m:float H{
|
|
||||||
{ v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
|
|
||||||
{ v- [ [ (simd-v-) ] double-2-vv->v-op ] }
|
|
||||||
{ v* [ [ (simd-v*) ] double-2-vv->v-op ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
|
|
||||||
{ sum [ [ (simd-sum) ] double-2-v->n-op ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
\ float-8 \ float-8-with m:float H{
|
|
||||||
{ v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
|
|
||||||
{ v- [ [ (simd-v-) ] float-8-vv->v-op ] }
|
|
||||||
{ v* [ [ (simd-v*) ] float-8-vv->v-op ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
|
|
||||||
{ sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
\ double-4 \ double-4-with m:float H{
|
|
||||||
{ v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
|
|
||||||
{ v- [ [ (simd-v-) ] double-4-vv->v-op ] }
|
|
||||||
{ v* [ [ (simd-v*) ] double-4-vv->v-op ] }
|
|
||||||
{ v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
|
|
||||||
{ vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
|
|
||||||
{ vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
|
|
||||||
{ sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
|
|
||||||
} simd-vector-words
|
|
||||||
|
|
||||||
>>
|
|
||||||
|
|
||||||
USE: vocabs.loader
|
|
||||||
|
|
||||||
"math.vectors.simd.alien" require
|
"math.vectors.simd.alien" require
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Single-instruction-multiple-data parallel vector operations
|
Loading…
Reference in New Issue