Add longlong-2, ulonglong-2, longlong-4, ulonglong-4 SIMD types, fix int-4 multiplication on SSE2

db4
Slava Pestov 2009-09-23 20:23:25 -05:00
parent 27fed2746a
commit dfc9fd071e
7 changed files with 94 additions and 80 deletions

View File

@ -479,6 +479,8 @@ M: short-8-rep rep-component-type drop short ;
M: ushort-8-rep rep-component-type drop ushort ; M: ushort-8-rep rep-component-type drop ushort ;
M: int-4-rep rep-component-type drop int ; M: int-4-rep rep-component-type drop int ;
M: uint-4-rep rep-component-type drop uint ; M: uint-4-rep rep-component-type drop uint ;
M: longlong-2-rep rep-component-type drop longlong ;
M: ulonglong-2-rep rep-component-type drop ulonglong ;
M: float-4-rep rep-component-type drop float ; M: float-4-rep rep-component-type drop float ;
M: double-2-rep rep-component-type drop double ; M: double-2-rep rep-component-type drop double ;

View File

@ -27,7 +27,9 @@ uchar-16-rep
short-8-rep short-8-rep
ushort-8-rep ushort-8-rep
int-4-rep int-4-rep
uint-4-rep ; uint-4-rep
longlong-2-rep
ulonglong-2-rep ;
SINGLETONS: SINGLETONS:
float-4-rep float-4-rep
@ -39,7 +41,9 @@ uchar-16-rep
short-8-rep short-8-rep
ushort-8-rep ushort-8-rep
int-4-rep int-4-rep
uint-4-rep ; uint-4-rep
longlong-2-rep
ulonglong-2-rep ;
UNION: float-vector-rep UNION: float-vector-rep
float-4-rep float-4-rep

View File

@ -223,9 +223,13 @@ GENERIC: CALL ( op -- )
M: integer CALL HEX: e8 , 4, ; M: integer CALL HEX: e8 , 4, ;
M: operand CALL { BIN: 010 t HEX: ff } 1-operand ; M: operand CALL { BIN: 010 t HEX: ff } 1-operand ;
<PRIVATE
GENERIC# JUMPcc 1 ( addr opcode -- ) GENERIC# JUMPcc 1 ( addr opcode -- )
M: integer JUMPcc extended-opcode, 4, ; M: integer JUMPcc extended-opcode, 4, ;
PRIVATE>
: JO ( dst -- ) HEX: 80 JUMPcc ; : JO ( dst -- ) HEX: 80 JUMPcc ;
: JNO ( dst -- ) HEX: 81 JUMPcc ; : JNO ( dst -- ) HEX: 81 JUMPcc ;
: JB ( dst -- ) HEX: 82 JUMPcc ; : JB ( dst -- ) HEX: 82 JUMPcc ;

View File

@ -2,10 +2,10 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs alien alien.c-types arrays strings USING: accessors assocs alien alien.c-types arrays strings
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
cpu.architecture kernel kernel.private math memory namespaces make cpu.x86.features cpu.x86.features.private cpu.architecture kernel
sequences words system layouts combinators math.order fry locals kernel.private math memory namespaces make sequences words system
compiler.constants byte-arrays io macros quotations cpu.x86.features layouts combinators math.order fry locals compiler.constants
cpu.x86.features.private compiler compiler.units init vm byte-arrays io macros quotations compiler compiler.units init vm
compiler.cfg.registers compiler.cfg.registers
compiler.cfg.instructions compiler.cfg.instructions
compiler.cfg.intrinsics compiler.cfg.intrinsics
@ -259,8 +259,8 @@ MACRO: available-reps ( alist -- )
M: x86 %broadcast-vector ( dst src rep -- ) M: x86 %broadcast-vector ( dst src rep -- )
{ {
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] } { float-4-rep [ [ float-4-rep copy-register ] [ drop dup 0 SHUFPS ] 2bi ] }
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] } { double-2-rep [ [ double-2-rep copy-register ] [ drop dup UNPCKLPD ] 2bi ] }
} case ; } case ;
M: x86 %broadcast-vector-reps M: x86 %broadcast-vector-reps
@ -274,7 +274,7 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
{ {
float-4-rep float-4-rep
[ [
dst src1 MOVSS dst src1 float-4-rep copy-register
dst src2 UNPCKLPS dst src2 UNPCKLPS
src3 src4 UNPCKLPS src3 src4 UNPCKLPS
dst src3 MOVLHPS dst src3 MOVLHPS
@ -292,7 +292,7 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
{ {
double-2-rep double-2-rep
[ [
dst src1 MOVSD dst src1 double-2-rep copy-register
dst src2 UNPCKLPD dst src2 UNPCKLPD
] ]
} }
@ -313,12 +313,14 @@ M: x86 %add-vector ( dst src1 src2 rep -- )
{ ushort-8-rep [ PADDW ] } { ushort-8-rep [ PADDW ] }
{ int-4-rep [ PADDD ] } { int-4-rep [ PADDD ] }
{ uint-4-rep [ PADDD ] } { uint-4-rep [ PADDD ] }
{ longlong-2-rep [ PADDQ ] }
{ ulonglong-2-rep [ PADDQ ] }
} case drop ; } case drop ;
M: x86 %add-vector-reps M: x86 %add-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
@ -355,12 +357,14 @@ M: x86 %sub-vector ( dst src1 src2 rep -- )
{ ushort-8-rep [ PSUBW ] } { ushort-8-rep [ PSUBW ] }
{ int-4-rep [ PSUBD ] } { int-4-rep [ PSUBD ] }
{ uint-4-rep [ PSUBD ] } { uint-4-rep [ PSUBD ] }
{ longlong-2-rep [ PSUBQ ] }
{ ulonglong-2-rep [ PSUBQ ] }
} case drop ; } case drop ;
M: x86 %sub-vector-reps M: x86 %sub-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
@ -389,7 +393,8 @@ M: x86 %mul-vector ( dst src1 src2 rep -- )
M: x86 %mul-vector-reps M: x86 %mul-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep short-8-rep ushort-8-rep } }
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-mul-vector-reps M: x86 %saturated-mul-vector-reps
@ -448,8 +453,8 @@ M: x86 %max-vector-reps
M: x86 %horizontal-add-vector ( dst src rep -- ) M: x86 %horizontal-add-vector ( dst src rep -- )
{ {
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } { float-4-rep [ [ float-4-rep copy-register ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } { double-2-rep [ [ double-2-rep copy-register ] [ HADDPD ] 2bi ] }
} case ; } case ;
M: x86 %horizontal-add-vector-reps M: x86 %horizontal-add-vector-reps
@ -485,54 +490,39 @@ M: x86 %and-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ ANDPS ] } { float-4-rep [ ANDPS ] }
{ double-2-rep [ ANDPD ] } { double-2-rep [ ANDPD ] }
{ char-16-rep [ PAND ] } [ drop PAND ]
{ uchar-16-rep [ PAND ] }
{ short-8-rep [ PAND ] }
{ ushort-8-rep [ PAND ] }
{ int-4-rep [ PAND ] }
{ uint-4-rep [ PAND ] }
} case drop ; } case drop ;
M: x86 %and-vector-reps M: x86 %and-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %or-vector ( dst src1 src2 rep -- ) M: x86 %or-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ ORPS ] } { float-4-rep [ ORPS ] }
{ double-2-rep [ ORPD ] } { double-2-rep [ ORPD ] }
{ char-16-rep [ POR ] } [ drop POR ]
{ uchar-16-rep [ POR ] }
{ short-8-rep [ POR ] }
{ ushort-8-rep [ POR ] }
{ int-4-rep [ POR ] }
{ uint-4-rep [ POR ] }
} case drop ; } case drop ;
M: x86 %or-vector-reps M: x86 %or-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %xor-vector ( dst src1 src2 rep -- ) M: x86 %xor-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ XORPS ] } { float-4-rep [ XORPS ] }
{ double-2-rep [ XORPD ] } { double-2-rep [ XORPD ] }
{ char-16-rep [ PXOR ] } [ drop PXOR ]
{ uchar-16-rep [ PXOR ] }
{ short-8-rep [ PXOR ] }
{ ushort-8-rep [ PXOR ] }
{ int-4-rep [ PXOR ] }
{ uint-4-rep [ PXOR ] }
} case drop ; } case drop ;
M: x86 %xor-vector-reps M: x86 %xor-vector-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %unbox-alien ( dst src -- ) M: x86 %unbox-alien ( dst src -- )
@ -648,9 +638,6 @@ M: x86.64 has-small-reg? 2drop t ;
[ quot call ] with-save/restore [ quot call ] with-save/restore
] if ; inline ] if ; inline
: ?MOV ( dst src -- )
2dup = [ 2drop ] [ MOV ] if ; inline
M:: x86 %string-nth ( dst src index temp -- ) M:: x86 %string-nth ( dst src index temp -- )
! We request a small-reg of size 8 since those of size 16 are ! We request a small-reg of size 8 since those of size 16 are
! a superset. ! a superset.
@ -678,12 +665,12 @@ M:: x86 %string-nth ( dst src index temp -- )
! Compute code point ! Compute code point
new-dst temp XOR new-dst temp XOR
"end" resolve-label "end" resolve-label
dst new-dst ?MOV dst new-dst int-rep copy-register
] with-small-register ; ] with-small-register ;
M:: x86 %set-string-nth-fast ( ch str index temp -- ) M:: x86 %set-string-nth-fast ( ch str index temp -- )
ch { index str temp } 8 [| new-ch | ch { index str temp } 8 [| new-ch |
new-ch ch ?MOV new-ch ch int-rep copy-register
temp str index [+] LEA temp str index [+] LEA
temp string-offset [+] new-ch 8-bit-version-of MOV temp string-offset [+] new-ch 8-bit-version-of MOV
] with-small-register ; ] with-small-register ;
@ -692,7 +679,7 @@ M:: x86 %set-string-nth-fast ( ch str index temp -- )
dst { src } size [| new-dst | dst { src } size [| new-dst |
new-dst dup size n-bit-version-of dup src [] MOV new-dst dup size n-bit-version-of dup src [] MOV
quot call quot call
dst new-dst ?MOV dst new-dst int-rep copy-register
] with-small-register ; inline ] with-small-register ; inline
: %alien-unsigned-getter ( dst src size -- ) : %alien-unsigned-getter ( dst src size -- )
@ -716,7 +703,7 @@ M: x86 %alien-vector [ [] ] dip copy-register ;
:: %alien-integer-setter ( ptr value size -- ) :: %alien-integer-setter ( ptr value size -- )
value { ptr } size [| new-value | value { ptr } size [| new-value |
new-value value ?MOV new-value value int-rep copy-register
ptr [] new-value size n-bit-version-of MOV ptr [] new-value size n-bit-version-of MOV
] with-small-register ; inline ] with-small-register ; inline

View File

@ -52,6 +52,10 @@ $nl
"uint-4" "uint-4"
"int-8" "int-8"
"uint-8" "uint-8"
"longlong-2"
"ulonglong-2"
"longlong-4"
"ulonglong-4"
"float-4" "float-4"
"float-8" "float-8"
"double-2" "double-2"
@ -92,7 +96,7 @@ SYMBOLS: x y ;
{ $code { $code
"""USING: compiler.tree.debugger kernel.private """USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ; math.vectors math.vectors.simd ;
SIMD: float-4 SIMD: float
IN: simd-demo IN: simd-demo
: interpolate ( v a b -- w ) : interpolate ( v a b -- w )
@ -106,7 +110,7 @@ $nl
{ $code { $code
"""USING: compiler.tree.debugger hints """USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ; math.vectors math.vectors.simd ;
SIMD: float-4 SIMD: float
IN: simd-demo IN: simd-demo
: interpolate ( v a b -- w ) : interpolate ( v a b -- w )
@ -122,7 +126,7 @@ $nl
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code { $code
"""USING: compiler.tree.debugger math.vectors math.vectors.simd ; """USING: compiler.tree.debugger math.vectors math.vectors.simd ;
SIMD: float-4 SIMD: float
IN: simd-demo IN: simd-demo
STRUCT: actor STRUCT: actor
@ -192,8 +196,8 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
{ $subsection "math.vectors.simd.intrinsics" } ; { $subsection "math.vectors.simd.intrinsics" } ;
HELP: SIMD: HELP: SIMD:
{ $syntax "SIMD: type-length" } { $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } } { $values { "type" "a scalar C type" } }
{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; { $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
ABOUT: "math.vectors.simd" ABOUT: "math.vectors.simd"

View File

@ -5,35 +5,35 @@ math.vectors.simd.private prettyprint random sequences system
tools.test vocabs assocs compiler.cfg.debugger words tools.test vocabs assocs compiler.cfg.debugger words
locals math.vectors.specialization combinators cpu.architecture locals math.vectors.specialization combinators cpu.architecture
math.vectors.simd.intrinsics namespaces byte-arrays alien math.vectors.simd.intrinsics namespaces byte-arrays alien
specialized-arrays classes.struct ; specialized-arrays classes.struct eval ;
FROM: alien.c-types => c-type-boxed-class ; FROM: alien.c-types => c-type-boxed-class ;
SPECIALIZED-ARRAY: float SPECIALIZED-ARRAY: float
SIMD: char-16 SIMD: char
SIMD: uchar-16 SIMD: uchar
SIMD: char-32 SIMD: short
SIMD: uchar-32 SIMD: ushort
SIMD: short-8 SIMD: int
SIMD: ushort-8 SIMD: uint
SIMD: short-16 SIMD: longlong
SIMD: ushort-16 SIMD: ulonglong
SIMD: int-4 SIMD: float
SIMD: uint-4 SIMD: double
SIMD: int-8
SIMD: uint-8
SIMD: float-4
SIMD: float-8
SIMD: double-2
SIMD: double-4
IN: math.vectors.simd.tests IN: math.vectors.simd.tests
[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test ! Make sure the functor doesn't generate bogus vocabularies
2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times
[ float-4{ 0 0 0 0 } ] [ [ float-4 new ] compile-call ] unit-test [ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test
! Test type propagation
[ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test [ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
[ V{ float-4 } ] [ [ { float-4 } declare normalize ] final-classes ] unit-test
[ V{ float-4 } ] [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test
! Test puns; only on x86 ! Test puns; only on x86
cpu x86? [ cpu x86? [
[ double-2{ 4 1024 } ] [ [ double-2{ 4 1024 } ] [
@ -62,6 +62,10 @@ CONSTANT: simd-classes
uint-4 uint-4
int-8 int-8
uint-8 uint-8
longlong-2
ulonglong-2
longlong-4
ulonglong-4
float-4 float-4
float-8 float-8
double-2 double-2

View File

@ -3,30 +3,39 @@
USING: alien.c-types combinators fry kernel lexer math math.parser USING: alien.c-types combinators fry kernel lexer math math.parser
math.vectors.simd.functor sequences splitting vocabs.generated math.vectors.simd.functor sequences splitting vocabs.generated
vocabs.loader vocabs.parser words ; vocabs.loader vocabs.parser words ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd IN: math.vectors.simd
ERROR: bad-vector-size bits ; ERROR: bad-base-type type ;
<PRIVATE <PRIVATE
: simd-vocab ( type -- vocab ) : simd-vocab ( base-type -- vocab )
"math.vectors.simd.instances." prepend ; "math.vectors.simd.instances." prepend ;
: parse-simd-name ( string -- c-type quot ) : parse-base-type ( string -- c-type )
"-" split1 {
[ "alien.c-types" lookup dup heap-size ] [ string>number ] bi* { "char" [ c:char ] }
* 8 * { { "uchar" [ c:uchar ] }
{ 128 [ [ define-simd-128 ] ] } { "short" [ c:short ] }
{ 256 [ [ define-simd-256 ] ] } { "ushort" [ c:ushort ] }
[ bad-vector-size ] { "int" [ c:int ] }
{ "uint" [ c:uint ] }
{ "longlong" [ c:longlong ] }
{ "ulonglong" [ c:ulonglong ] }
{ "float" [ c:float ] }
{ "double" [ c:double ] }
[ bad-base-type ]
} case ; } case ;
PRIVATE> PRIVATE>
: define-simd-vocab ( type -- vocab ) : define-simd-vocab ( type -- vocab )
[ simd-vocab ] [ simd-vocab ] keep '[
[ '[ _ parse-simd-name call( type -- ) ] ] bi _ parse-base-type
generate-vocab ; [ define-simd-128 ]
[ define-simd-256 ] bi
] generate-vocab ;
SYNTAX: SIMD: SYNTAX: SIMD:
scan define-simd-vocab use-vocab ; scan define-simd-vocab use-vocab ;