math.vectors.simd: add hlshift, hrshift (128-bit shift), vbitandn intrinsics
parent
2de6ab9e21
commit
b2ea3afd84
|
@ -340,6 +340,21 @@ def: dst/scalar-rep
|
|||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-sub-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-shl-vector
|
||||
def: dst
|
||||
use: src1
|
||||
literal: src2 rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-shr-vector
|
||||
def: dst
|
||||
use: src1
|
||||
literal: src2 rep ;
|
||||
|
||||
PURE-INSN: ##abs-vector
|
||||
def: dst
|
||||
use: src
|
||||
|
@ -355,6 +370,11 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##andn-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##or-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
|
|
@ -167,10 +167,13 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||
sequences compiler.tree.propagation.info
|
||||
compiler.cfg.builder.blocks compiler.cfg.stacks
|
||||
compiler.cfg.stacks.local compiler.cfg.hats
|
||||
|
@ -9,8 +9,8 @@ compiler.cfg.intrinsics.alien ;
|
|||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
: emit-vector-op ( node quot: ( rep -- ) -- )
|
||||
[ dup node-input-infos last literal>> ] dip over representation?
|
||||
[ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
|
||||
[ dup node-input-infos last literal>> dup representation? ] dip
|
||||
'[ nip @ ] [ drop emit-primitive ] if ; inline
|
||||
|
||||
: emit-binary-vector-op ( node quot -- )
|
||||
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
|
||||
|
@ -18,6 +18,16 @@ IN: compiler.cfg.intrinsics.simd
|
|||
: emit-unary-vector-op ( node quot -- )
|
||||
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
|
||||
|
||||
: emit-horizontal-shift ( node quot -- )
|
||||
[
|
||||
dup node-input-infos
|
||||
[ second literal>> ] [ third literal>> ] bi
|
||||
2dup [ integer? ] [ representation? ] bi* and
|
||||
] dip
|
||||
'[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ]
|
||||
[ 2drop emit-primitive ]
|
||||
if ; inline
|
||||
|
||||
: emit-gather-vector-2 ( node -- )
|
||||
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
||||
|
||||
|
|
|
@ -176,8 +176,12 @@ CODEGEN: ##min-vector %min-vector
|
|||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
|
||||
CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector
|
||||
CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector
|
||||
CODEGEN: ##abs-vector %abs-vector
|
||||
CODEGEN: ##and-vector %and-vector
|
||||
CODEGEN: ##andn-vector %andn-vector
|
||||
CODEGEN: ##or-vector %or-vector
|
||||
CODEGEN: ##xor-vector %xor-vector
|
||||
CODEGEN: ##shl-vector %shl-vector
|
||||
|
|
|
@ -17,10 +17,13 @@ IN: compiler.tree.propagation.simd
|
|||
(simd-vabs)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-broadcast)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
|
|
|
@ -225,12 +225,16 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
||||
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
|
||||
|
||||
HOOK: %integer>scalar cpu ( dst src rep -- )
|
||||
HOOK: %scalar>integer cpu ( dst src rep -- )
|
||||
|
@ -250,12 +254,16 @@ HOOK: %min-vector-reps cpu ( -- reps )
|
|||
HOOK: %max-vector-reps cpu ( -- reps )
|
||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %abs-vector-reps cpu ( -- reps )
|
||||
HOOK: %and-vector-reps cpu ( -- reps )
|
||||
HOOK: %andn-vector-reps cpu ( -- reps )
|
||||
HOOK: %or-vector-reps cpu ( -- reps )
|
||||
HOOK: %xor-vector-reps cpu ( -- reps )
|
||||
HOOK: %shl-vector-reps cpu ( -- reps )
|
||||
HOOK: %shr-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-shr-vector-reps cpu ( -- reps )
|
||||
|
||||
HOOK: %unbox-alien cpu ( dst src -- )
|
||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||
|
|
|
@ -412,6 +412,22 @@ M: x86 %horizontal-add-vector-reps
|
|||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
|
||||
two-operand PSLLDQ ;
|
||||
|
||||
M: x86 %horizontal-shl-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
|
||||
two-operand PSRLDQ ;
|
||||
|
||||
M: x86 %horizontal-shr-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %abs-vector ( dst src rep -- )
|
||||
{
|
||||
{ char-16-rep [ PABSB ] }
|
||||
|
@ -450,6 +466,20 @@ M: x86 %and-vector-reps
|
|||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %andn-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ANDNPS ] }
|
||||
{ double-2-rep [ ANDNPD ] }
|
||||
[ drop PANDN ]
|
||||
} case ;
|
||||
|
||||
M: x86 %andn-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
|
|
@ -40,14 +40,18 @@ SIMD-OP: vsqrt
|
|||
SIMD-OP: sum
|
||||
SIMD-OP: vabs
|
||||
SIMD-OP: vbitand
|
||||
SIMD-OP: vbitandn
|
||||
SIMD-OP: vbitor
|
||||
SIMD-OP: vbitxor
|
||||
SIMD-OP: vlshift
|
||||
SIMD-OP: vrshift
|
||||
SIMD-OP: hlshift
|
||||
SIMD-OP: hrshift
|
||||
|
||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
|
||||
: assert-positive ( x -- y ) ;
|
||||
|
||||
: alien-vector ( c-ptr n rep -- value )
|
||||
|
@ -110,10 +114,13 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||
|
|
|
@ -63,12 +63,12 @@ $nl
|
|||
} ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
|
||||
"For each SIMD vector type, several words are defined:"
|
||||
"For each SIMD vector type, several words are defined, where " { $snippet "type" } " is the type in question:"
|
||||
{ $table
|
||||
{ "Word" "Stack effect" "Description" }
|
||||
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
|
||||
{ { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" }
|
||||
{ { $snipept "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
|
||||
{ { $snippet "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
|
||||
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
|
||||
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
|
||||
}
|
||||
|
|
|
@ -148,9 +148,13 @@ CONSTANT: simd-classes
|
|||
: remove-integer-words ( alist -- alist' )
|
||||
[ drop { vlshift vrshift } member? not ] assoc-filter ;
|
||||
|
||||
: remove-horizontal-shifts ( alist -- alist' )
|
||||
[ drop { hlshift hrshift } member? not ] assoc-filter ;
|
||||
|
||||
: ops-to-check ( elt-class -- alist )
|
||||
[ vector-words >alist ] dip
|
||||
float = [ remove-integer-words ] [ remove-float-words ] if ;
|
||||
float = [ remove-integer-words ] [ remove-float-words ] if
|
||||
remove-horizontal-shifts ;
|
||||
|
||||
: check-vector-ops ( class elt-class compare-quot -- )
|
||||
[
|
||||
|
@ -255,3 +259,15 @@ STRUCT: simd-struct
|
|||
] unit-test
|
||||
|
||||
[ ] [ char-16 new 1array stack. ] unit-test
|
||||
|
||||
[ int-4{ 256 512 1024 2048 } ]
|
||||
[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
|
||||
|
||||
[ int-4{ 256 512 1024 2048 } ]
|
||||
[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
|
||||
|
||||
[ int-4{ 1 2 4 8 } ]
|
||||
[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
|
||||
|
||||
[ int-4{ 1 2 4 8 } ]
|
||||
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
|
||||
|
|
|
@ -82,10 +82,13 @@ H{
|
|||
{ vabs { +vector+ -> +vector+ } }
|
||||
{ vsqrt { +vector+ -> +vector+ } }
|
||||
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitandn { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vlshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ hlshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ hrshift { +vector+ +scalar+ -> +vector+ } }
|
||||
}
|
||||
|
||||
PREDICATE: vector-word < word vector-words key? ;
|
||||
|
@ -116,7 +119,7 @@ M: vector-word subwords specializations values [ word? ] filter ;
|
|||
:: input-signature ( word array-type elt-type -- signature )
|
||||
array-type elt-type word word-schema inputs signature-for-schema ;
|
||||
|
||||
: vector-words-for-type ( elt-type -- alist )
|
||||
: vector-words-for-type ( elt-type -- words )
|
||||
{
|
||||
! Can't do shifts on floats
|
||||
{ [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
|
||||
|
@ -125,10 +128,13 @@ M: vector-word subwords specializations values [ word? ] filter ;
|
|||
! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
|
||||
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
|
||||
[ { } ]
|
||||
} cond nip ;
|
||||
} cond
|
||||
! Don't specialize horizontal shifts at all, they're only for SIMD
|
||||
{ hlshift hrshift } diff
|
||||
nip ;
|
||||
|
||||
:: specialize-vector-words ( array-type elt-type simd -- )
|
||||
elt-type vector-words-for-type [
|
||||
elt-type vector-words-for-type simd keys union [
|
||||
[ array-type elt-type simd specialize-vector-word ]
|
||||
[ array-type elt-type input-signature ]
|
||||
[ ]
|
||||
|
|
|
@ -36,6 +36,7 @@ $nl
|
|||
{ $subsection vmin }
|
||||
"Bitwise operations:"
|
||||
{ $subsection vbitand }
|
||||
{ $subsection vbitandn }
|
||||
{ $subsection vbitor }
|
||||
{ $subsection vbitxor }
|
||||
{ $subsection vlshift }
|
||||
|
@ -197,6 +198,11 @@ HELP: vbitand
|
|||
{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
|
||||
{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
|
||||
|
||||
HELP: vbitandn
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Takes the bitwise and-not of " { $snippet "u" } " and " { $snippet "v" } " component-wise, where " { $snippet "x and-not y" } " is defined as " { $snippet "not(x) and y" } "." }
|
||||
{ $notes "This word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
|
||||
|
||||
HELP: vbitor
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
|
||||
|
@ -217,6 +223,14 @@ HELP: vrshift
|
|||
{ $description "Shifts each element of " { $snippet "u" } " to the right by " { $snippet "n" } " bits." }
|
||||
{ $notes "Undefined behavior will result if " { $snippet "n" } " is negative." } ;
|
||||
|
||||
HELP: hlshift
|
||||
{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
|
||||
{ $description "Shifts the entire SIMD array to the left by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
|
||||
|
||||
HELP: hrshift
|
||||
{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
|
||||
{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
|
||||
|
||||
HELP: norm-sq
|
||||
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||
{ $description "Computes the squared length of a mathematical vector." } ;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays alien.c-types kernel sequences math math.functions
|
||||
hints math.order math.libm fry combinators ;
|
||||
hints math.order math.libm fry combinators byte-arrays accessors ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors
|
||||
|
||||
|
@ -62,9 +62,12 @@ PRIVATE>
|
|||
[ drop call ]
|
||||
} case ; inline
|
||||
|
||||
: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||
: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
|
||||
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
|
||||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
||||
|
@ -86,6 +89,14 @@ PRIVATE>
|
|||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||
|
||||
: hlshift ( u n -- w )
|
||||
[ clone ] dip
|
||||
'[ _ <byte-array> append 16 tail* ] change-underlying ;
|
||||
|
||||
: hrshift ( u n -- w )
|
||||
[ clone ] dip
|
||||
'[ _ <byte-array> prepend 16 head* ] change-underlying ;
|
||||
|
||||
: vfloor ( u -- v ) [ floor ] map ;
|
||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||
|
|
Loading…
Reference in New Issue