diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index 1ad4f75a3c..0ed111c077 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -479,6 +479,8 @@ M: short-8-rep rep-component-type drop short ; M: ushort-8-rep rep-component-type drop ushort ; M: int-4-rep rep-component-type drop int ; M: uint-4-rep rep-component-type drop uint ; +M: longlong-2-rep rep-component-type drop longlong ; +M: ulonglong-2-rep rep-component-type drop ulonglong ; M: float-4-rep rep-component-type drop float ; M: double-2-rep rep-component-type drop double ; diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 2dbe724f0a..dd817117b6 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -27,7 +27,9 @@ uchar-16-rep short-8-rep ushort-8-rep int-4-rep -uint-4-rep ; +uint-4-rep +longlong-2-rep +ulonglong-2-rep ; SINGLETONS: float-4-rep @@ -39,7 +41,9 @@ uchar-16-rep short-8-rep ushort-8-rep int-4-rep -uint-4-rep ; +uint-4-rep +longlong-2-rep +ulonglong-2-rep ; UNION: float-vector-rep float-4-rep diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index 3cb41fd544..ceb9c54e6e 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -223,9 +223,13 @@ GENERIC: CALL ( op -- ) M: integer CALL HEX: e8 , 4, ; M: operand CALL { BIN: 010 t HEX: ff } 1-operand ; + + : JO ( dst -- ) HEX: 80 JUMPcc ; : JNO ( dst -- ) HEX: 81 JUMPcc ; : JB ( dst -- ) HEX: 82 JUMPcc ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 1a96e93c63..efc6ace101 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -2,10 +2,10 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs alien alien.c-types arrays strings cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands -cpu.architecture kernel kernel.private math memory namespaces make -sequences words system layouts combinators math.order fry locals -compiler.constants byte-arrays io macros quotations cpu.x86.features -cpu.x86.features.private compiler compiler.units init vm +cpu.x86.features cpu.x86.features.private cpu.architecture kernel +kernel.private math memory namespaces make sequences words system +layouts combinators math.order fry locals compiler.constants +byte-arrays io macros quotations compiler compiler.units init vm compiler.cfg.registers compiler.cfg.instructions compiler.cfg.intrinsics @@ -259,8 +259,8 @@ MACRO: available-reps ( alist -- ) M: x86 %broadcast-vector ( dst src rep -- ) { - { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] } - { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] } + { float-4-rep [ [ float-4-rep copy-register ] [ drop dup 0 SHUFPS ] 2bi ] } + { double-2-rep [ [ double-2-rep copy-register ] [ drop dup UNPCKLPD ] 2bi ] } } case ; M: x86 %broadcast-vector-reps @@ -274,7 +274,7 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) { float-4-rep [ - dst src1 MOVSS + dst src1 float-4-rep copy-register dst src2 UNPCKLPS src3 src4 UNPCKLPS dst src3 MOVLHPS @@ -292,7 +292,7 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) { double-2-rep [ - dst src1 MOVSD + dst src1 double-2-rep copy-register dst src2 UNPCKLPD ] } @@ -313,12 +313,14 @@ M: x86 %add-vector ( dst src1 src2 rep -- ) { ushort-8-rep [ PADDW ] } { int-4-rep [ PADDD ] } { uint-4-rep [ PADDD ] } + { longlong-2-rep [ PADDQ ] } + { ulonglong-2-rep [ PADDQ ] } } case drop ; M: x86 %add-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) @@ -355,12 +357,14 @@ M: x86 %sub-vector ( dst src1 src2 rep -- ) { ushort-8-rep [ PSUBW ] } { int-4-rep [ PSUBD ] } { uint-4-rep [ PSUBD ] } + { longlong-2-rep [ PSUBQ ] } + { ulonglong-2-rep [ PSUBQ ] } } case drop ; M: x86 %sub-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) @@ -389,7 +393,8 @@ M: x86 %mul-vector ( dst src1 src2 rep -- ) M: x86 %mul-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep short-8-rep ushort-8-rep } } + { sse4.1? { int-4-rep uint-4-rep } } } available-reps ; M: x86 %saturated-mul-vector-reps @@ -448,8 +453,8 @@ M: x86 %max-vector-reps M: x86 %horizontal-add-vector ( dst src rep -- ) { - { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } - { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } + { float-4-rep [ [ float-4-rep copy-register ] [ HADDPS ] [ HADDPS ] 2tri ] } + { double-2-rep [ [ double-2-rep copy-register ] [ HADDPD ] 2bi ] } } case ; M: x86 %horizontal-add-vector-reps @@ -485,54 +490,39 @@ M: x86 %and-vector ( dst src1 src2 rep -- ) { { float-4-rep [ ANDPS ] } { double-2-rep [ ANDPD ] } - { char-16-rep [ PAND ] } - { uchar-16-rep [ PAND ] } - { short-8-rep [ PAND ] } - { ushort-8-rep [ PAND ] } - { int-4-rep [ PAND ] } - { uint-4-rep [ PAND ] } + [ drop PAND ] } case drop ; M: x86 %and-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; M: x86 %or-vector ( dst src1 src2 rep -- ) { { float-4-rep [ ORPS ] } { double-2-rep [ ORPD ] } - { char-16-rep [ POR ] } - { uchar-16-rep [ POR ] } - { short-8-rep [ POR ] } - { ushort-8-rep [ POR ] } - { int-4-rep [ POR ] } - { uint-4-rep [ POR ] } + [ drop POR ] } case drop ; M: x86 %or-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; M: x86 %xor-vector ( dst src1 src2 rep -- ) { { float-4-rep [ XORPS ] } { double-2-rep [ XORPD ] } - { char-16-rep [ PXOR ] } - { uchar-16-rep [ PXOR ] } - { short-8-rep [ PXOR ] } - { ushort-8-rep [ PXOR ] } - { int-4-rep [ PXOR ] } - { uint-4-rep [ PXOR ] } + [ drop PXOR ] } case drop ; M: x86 %xor-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; M: x86 %unbox-alien ( dst src -- ) @@ -648,9 +638,6 @@ M: x86.64 has-small-reg? 2drop t ; [ quot call ] with-save/restore ] if ; inline -: ?MOV ( dst src -- ) - 2dup = [ 2drop ] [ MOV ] if ; inline - M:: x86 %string-nth ( dst src index temp -- ) ! We request a small-reg of size 8 since those of size 16 are ! a superset. @@ -678,12 +665,12 @@ M:: x86 %string-nth ( dst src index temp -- ) ! Compute code point new-dst temp XOR "end" resolve-label - dst new-dst ?MOV + dst new-dst int-rep copy-register ] with-small-register ; M:: x86 %set-string-nth-fast ( ch str index temp -- ) ch { index str temp } 8 [| new-ch | - new-ch ch ?MOV + new-ch ch int-rep copy-register temp str index [+] LEA temp string-offset [+] new-ch 8-bit-version-of MOV ] with-small-register ; @@ -692,7 +679,7 @@ M:: x86 %set-string-nth-fast ( ch str index temp -- ) dst { src } size [| new-dst | new-dst dup size n-bit-version-of dup src [] MOV quot call - dst new-dst ?MOV + dst new-dst int-rep copy-register ] with-small-register ; inline : %alien-unsigned-getter ( dst src size -- ) @@ -716,7 +703,7 @@ M: x86 %alien-vector [ [] ] dip copy-register ; :: %alien-integer-setter ( ptr value size -- ) value { ptr } size [| new-value | - new-value value ?MOV + new-value value int-rep copy-register ptr [] new-value size n-bit-version-of MOV ] with-small-register ; inline diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 9b832526d8..2fdb9ff88c 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -52,6 +52,10 @@ $nl "uint-4" "int-8" "uint-8" + "longlong-2" + "ulonglong-2" + "longlong-4" + "ulonglong-4" "float-4" "float-8" "double-2" @@ -92,7 +96,7 @@ SYMBOLS: x y ; { $code """USING: compiler.tree.debugger kernel.private math.vectors math.vectors.simd ; -SIMD: float-4 +SIMD: float IN: simd-demo : interpolate ( v a b -- w ) @@ -106,7 +110,7 @@ $nl { $code """USING: compiler.tree.debugger hints math.vectors math.vectors.simd ; -SIMD: float-4 +SIMD: float IN: simd-demo : interpolate ( v a b -- w ) @@ -122,7 +126,7 @@ $nl "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" { $code """USING: compiler.tree.debugger math.vectors math.vectors.simd ; -SIMD: float-4 +SIMD: float IN: simd-demo STRUCT: actor @@ -192,8 +196,8 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" { $subsection "math.vectors.simd.intrinsics" } ; HELP: SIMD: -{ $syntax "SIMD: type-length" } -{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } } -{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; +{ $syntax "SIMD: type" } +{ $values { "type" "a scalar C type" } } +{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index db8597fc9d..312dfc2cbd 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -5,35 +5,35 @@ math.vectors.simd.private prettyprint random sequences system tools.test vocabs assocs compiler.cfg.debugger words locals math.vectors.specialization combinators cpu.architecture math.vectors.simd.intrinsics namespaces byte-arrays alien -specialized-arrays classes.struct ; +specialized-arrays classes.struct eval ; FROM: alien.c-types => c-type-boxed-class ; SPECIALIZED-ARRAY: float -SIMD: char-16 -SIMD: uchar-16 -SIMD: char-32 -SIMD: uchar-32 -SIMD: short-8 -SIMD: ushort-8 -SIMD: short-16 -SIMD: ushort-16 -SIMD: int-4 -SIMD: uint-4 -SIMD: int-8 -SIMD: uint-8 -SIMD: float-4 -SIMD: float-8 -SIMD: double-2 -SIMD: double-4 +SIMD: char +SIMD: uchar +SIMD: short +SIMD: ushort +SIMD: int +SIMD: uint +SIMD: longlong +SIMD: ulonglong +SIMD: float +SIMD: double IN: math.vectors.simd.tests -[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test +! Make sure the functor doesn't generate bogus vocabularies +2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times -[ float-4{ 0 0 0 0 } ] [ [ float-4 new ] compile-call ] unit-test +[ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test +! Test type propagation [ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test +[ V{ float-4 } ] [ [ { float-4 } declare normalize ] final-classes ] unit-test + +[ V{ float-4 } ] [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test + ! Test puns; only on x86 cpu x86? [ [ double-2{ 4 1024 } ] [ @@ -62,6 +62,10 @@ CONSTANT: simd-classes uint-4 int-8 uint-8 + longlong-2 + ulonglong-2 + longlong-4 + ulonglong-4 float-4 float-8 double-2 diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index fe043032b8..71936b2657 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -3,30 +3,39 @@ USING: alien.c-types combinators fry kernel lexer math math.parser math.vectors.simd.functor sequences splitting vocabs.generated vocabs.loader vocabs.parser words ; +QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd -ERROR: bad-vector-size bits ; +ERROR: bad-base-type type ; number ] bi* - * 8 * { - { 128 [ [ define-simd-128 ] ] } - { 256 [ [ define-simd-256 ] ] } - [ bad-vector-size ] +: parse-base-type ( string -- c-type ) + { + { "char" [ c:char ] } + { "uchar" [ c:uchar ] } + { "short" [ c:short ] } + { "ushort" [ c:ushort ] } + { "int" [ c:int ] } + { "uint" [ c:uint ] } + { "longlong" [ c:longlong ] } + { "ulonglong" [ c:ulonglong ] } + { "float" [ c:float ] } + { "double" [ c:double ] } + [ bad-base-type ] } case ; PRIVATE> : define-simd-vocab ( type -- vocab ) - [ simd-vocab ] - [ '[ _ parse-simd-name call( type -- ) ] ] bi - generate-vocab ; + [ simd-vocab ] keep '[ + _ parse-base-type + [ define-simd-128 ] + [ define-simd-256 ] bi + ] generate-vocab ; SYNTAX: SIMD: scan define-simd-vocab use-vocab ;