Merge branch 'master' of git://factorcode.org/git/factor
commit
9b64d206ff
|
@ -235,8 +235,13 @@ M:: ppc %box-float ( dst src temp -- )
|
||||||
dst 16 float temp %allot
|
dst 16 float temp %allot
|
||||||
src dst float-offset STFD ;
|
src dst float-offset STFD ;
|
||||||
|
|
||||||
: float-function-param ( i spill-slot -- )
|
GENERIC: float-function-param* ( dst src -- )
|
||||||
[ float-regs param-regs nth 1 ] [ n>> spill@ ] bi* LFD ;
|
|
||||||
|
M: spill-slot float-function-param* [ 1 ] dip n>> spill@ LFD ;
|
||||||
|
M: integer float-function-param* FMR ;
|
||||||
|
|
||||||
|
: float-function-param ( i src -- )
|
||||||
|
[ float-regs param-regs nth ] dip float-function-param* ;
|
||||||
|
|
||||||
: float-function-return ( reg -- )
|
: float-function-return ( reg -- )
|
||||||
float-regs return-reg double-rep %copy ;
|
float-regs return-reg double-rep %copy ;
|
||||||
|
|
|
@ -618,11 +618,15 @@ ALIAS: PINSRQ PINSRD
|
||||||
: MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ;
|
: MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ;
|
||||||
: MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ;
|
: MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
: 2shuffler ( indexes/mask -- mask )
|
: 2shuffler ( indexes/mask -- mask )
|
||||||
dup integer? [ first2 { 1 0 } bitfield ] unless ;
|
dup integer? [ first2 { 1 0 } bitfield ] unless ;
|
||||||
: 4shuffler ( indexes/mask -- mask )
|
: 4shuffler ( indexes/mask -- mask )
|
||||||
dup integer? [ first4 { 6 4 2 0 } bitfield ] unless ;
|
dup integer? [ first4 { 6 4 2 0 } bitfield ] unless ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
: PSHUFD ( dest src imm -- ) 4shuffler HEX: 70 HEX: 66 3-operand-rm-sse ;
|
: PSHUFD ( dest src imm -- ) 4shuffler HEX: 70 HEX: 66 3-operand-rm-sse ;
|
||||||
: PSHUFLW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f2 3-operand-rm-sse ;
|
: PSHUFLW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f2 3-operand-rm-sse ;
|
||||||
: PSHUFHW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f3 3-operand-rm-sse ;
|
: PSHUFHW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f3 3-operand-rm-sse ;
|
||||||
|
|
|
@ -579,12 +579,12 @@ MACRO: available-reps ( alist -- )
|
||||||
'[ _ cond ] ;
|
'[ _ cond ] ;
|
||||||
|
|
||||||
: unsign-rep ( rep -- rep' )
|
: unsign-rep ( rep -- rep' )
|
||||||
dup {
|
{
|
||||||
{ uint-4-rep int-4-rep }
|
{ uint-4-rep int-4-rep }
|
||||||
{ ulonglong-2-rep longlong-2-rep }
|
{ ulonglong-2-rep longlong-2-rep }
|
||||||
{ ushort-8-rep short-8-rep }
|
{ ushort-8-rep short-8-rep }
|
||||||
{ uchar-16-rep char-16-rep }
|
{ uchar-16-rep char-16-rep }
|
||||||
} at* [ nip ] [ drop ] if ;
|
} ?at drop ;
|
||||||
|
|
||||||
M:: x86 %broadcast-vector ( dst src rep -- )
|
M:: x86 %broadcast-vector ( dst src rep -- )
|
||||||
rep unsign-rep {
|
rep unsign-rep {
|
||||||
|
@ -592,22 +592,23 @@ M:: x86 %broadcast-vector ( dst src rep -- )
|
||||||
dst src float-4-rep %copy
|
dst src float-4-rep %copy
|
||||||
dst dst { 0 0 0 0 } SHUFPS
|
dst dst { 0 0 0 0 } SHUFPS
|
||||||
] }
|
] }
|
||||||
{ double-2-rep [
|
{ double-2-rep [
|
||||||
dst src MOVDDUP
|
dst src MOVDDUP
|
||||||
] }
|
] }
|
||||||
{ longlong-2-rep [
|
{ longlong-2-rep [
|
||||||
dst src = [
|
dst src =
|
||||||
dst dst PUNPCKLQDQ
|
[ dst dst PUNPCKLQDQ ]
|
||||||
] [
|
[ dst src { 0 1 0 1 } PSHUFD ]
|
||||||
dst src { 0 1 0 1 } PSHUFD
|
if
|
||||||
] if
|
|
||||||
] }
|
] }
|
||||||
{ int-4-rep [ dst src { 0 0 0 0 } PSHUFD ] }
|
{ int-4-rep [
|
||||||
{ short-8-rep [
|
dst src { 0 0 0 0 } PSHUFD
|
||||||
|
] }
|
||||||
|
{ short-8-rep [
|
||||||
dst src { 0 0 0 0 } PSHUFLW
|
dst src { 0 0 0 0 } PSHUFLW
|
||||||
dst dst PUNPCKLQDQ
|
dst dst PUNPCKLQDQ
|
||||||
] }
|
] }
|
||||||
{ char-16-rep [
|
{ char-16-rep [
|
||||||
dst src char-16-rep %copy
|
dst src char-16-rep %copy
|
||||||
dst dst PUNPCKLBW
|
dst dst PUNPCKLBW
|
||||||
dst dst { 0 0 0 0 } PSHUFLW
|
dst dst { 0 0 0 0 } PSHUFLW
|
||||||
|
@ -619,13 +620,7 @@ M: x86 %broadcast-vector-reps
|
||||||
{
|
{
|
||||||
! Can't do this with sse1 since it will want to unbox
|
! Can't do this with sse1 since it will want to unbox
|
||||||
! a double-precision float and convert to single precision
|
! a double-precision float and convert to single precision
|
||||||
{ sse2? {
|
{ sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
||||||
float-4-rep double-2-rep
|
|
||||||
longlong-2-rep ulonglong-2-rep
|
|
||||||
int-4-rep uint-4-rep
|
|
||||||
short-8-rep ushort-8-rep
|
|
||||||
char-16-rep uchar-16-rep
|
|
||||||
} }
|
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||||
|
|
|
@ -20,8 +20,9 @@ ERROR: bad-base-type type ;
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: define-simd-vocab ( type -- vocab )
|
: define-simd-vocab ( type -- vocab )
|
||||||
|
parse-base-type
|
||||||
[ simd-vocab ] keep '[
|
[ simd-vocab ] keep '[
|
||||||
_ parse-base-type
|
_
|
||||||
[ define-simd-128 ]
|
[ define-simd-128 ]
|
||||||
[ define-simd-256 ] bi
|
[ define-simd-256 ] bi
|
||||||
] generate-vocab ;
|
] generate-vocab ;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
USING: specialized-arrays sequences.complex
|
USING: specialized-arrays sequences.complex
|
||||||
kernel sequences tools.test arrays accessors ;
|
kernel sequences tools.test arrays accessors ;
|
||||||
SPECIALIZED-ARRAY: float
|
QUALIFIED-WITH: alien.c-types c
|
||||||
|
SPECIALIZED-ARRAY: c:float
|
||||||
IN: sequences.complex.tests
|
IN: sequences.complex.tests
|
||||||
|
|
||||||
: test-array ( -- x )
|
: test-array ( -- x )
|
||||||
|
|
|
@ -5,7 +5,8 @@ USING: arrays accessors io io.files io.files.temp
|
||||||
io.encodings.binary kernel math math.constants math.functions
|
io.encodings.binary kernel math math.constants math.functions
|
||||||
math.vectors math.vectors.simd math.parser make sequences
|
math.vectors math.vectors.simd math.parser make sequences
|
||||||
sequences.private words hints classes.struct ;
|
sequences.private words hints classes.struct ;
|
||||||
SIMD: double
|
QUALIFIED-WITH: alien.c-types c
|
||||||
|
SIMD: c:double
|
||||||
IN: benchmark.raytracer-simd
|
IN: benchmark.raytracer-simd
|
||||||
|
|
||||||
! parameters
|
! parameters
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel io math math.functions math.parser math.vectors
|
USING: kernel io math math.functions math.parser math.vectors
|
||||||
math.vectors.simd sequences specialized-arrays ;
|
math.vectors.simd sequences specialized-arrays ;
|
||||||
SIMD: float
|
QUALIFIED-WITH: alien.c-types c
|
||||||
|
SIMD: c:float
|
||||||
SPECIALIZED-ARRAY: float-4
|
SPECIALIZED-ARRAY: float-4
|
||||||
IN: benchmark.simd-1
|
IN: benchmark.simd-1
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue