math.vectors.simd: add hlshift, hrshift (128-bit shift), vbitandn intrinsics

2009-09-28 02:17:46 -05:00 · 2009-09-28 02:17:46 -05:00 · b2ea3afd84
parent 2de6ab9e21
commit b2ea3afd84
13 changed files with 142 additions and 10 deletions
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -340,6 +340,21 @@ def: dst/scalar-rep
 use: src
 literal: rep ;

+PURE-INSN: ##horizontal-sub-vector
+def: dst/scalar-rep
+use: src
+literal: rep ;
+
+PURE-INSN: ##horizontal-shl-vector
+def: dst
+use: src1
+literal: src2 rep ;
+
+PURE-INSN: ##horizontal-shr-vector
+def: dst
+use: src1
+literal: src2 rep ;
+
 PURE-INSN: ##abs-vector
 def: dst
 use: src
@ -355,6 +370,11 @@ def: dst
 use: src1 src2
 literal: rep ;

+PURE-INSN: ##andn-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
 PURE-INSN: ##or-vector
 def: dst
 use: src1 src2
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@ -167,10 +167,13 @@ IN: compiler.cfg.intrinsics
        { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
+        { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
        { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
        { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@ -1,6 +1,6 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays fry cpu.architecture kernel
+USING: accessors byte-arrays fry cpu.architecture kernel math
 sequences compiler.tree.propagation.info
 compiler.cfg.builder.blocks compiler.cfg.stacks
 compiler.cfg.stacks.local compiler.cfg.hats
@ -9,8 +9,8 @@ compiler.cfg.intrinsics.alien ;
 IN: compiler.cfg.intrinsics.simd

 : emit-vector-op ( node quot: ( rep -- ) -- )
-    [ dup node-input-infos last literal>> ] dip over representation?
-    [ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
+    [ dup node-input-infos last literal>> dup representation? ] dip
+    '[ nip @ ] [ drop emit-primitive ] if ; inline

 : emit-binary-vector-op ( node quot -- )
    '[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
@ -18,6 +18,16 @@ IN: compiler.cfg.intrinsics.simd
 : emit-unary-vector-op ( node quot -- )
    '[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline

+: emit-horizontal-shift ( node quot -- )
+    [
+        dup node-input-infos
+        [ second literal>> ] [ third literal>> ] bi
+        2dup [ integer? ] [ representation? ] bi* and
+    ] dip
+    '[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ]
+    [ 2drop emit-primitive ]
+    if ; inline
+
 : emit-gather-vector-2 ( node -- )
    [ ^^gather-vector-2 ] emit-binary-vector-op ;

--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@ -176,8 +176,12 @@ CODEGEN: ##min-vector %min-vector
 CODEGEN: ##max-vector %max-vector
 CODEGEN: ##sqrt-vector %sqrt-vector
 CODEGEN: ##horizontal-add-vector %horizontal-add-vector
+CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
+CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector
+CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector
 CODEGEN: ##abs-vector %abs-vector
 CODEGEN: ##and-vector %and-vector
+CODEGEN: ##andn-vector %andn-vector
 CODEGEN: ##or-vector %or-vector
 CODEGEN: ##xor-vector %xor-vector
 CODEGEN: ##shl-vector %shl-vector
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@ -17,10 +17,13 @@ IN: compiler.tree.propagation.simd
    (simd-vabs)
    (simd-vsqrt)
    (simd-vbitand)
+    (simd-vbitandn)
    (simd-vbitor)
    (simd-vbitxor)
    (simd-vlshift)
    (simd-vrshift)
+    (simd-hlshift)
+    (simd-hrshift)
    (simd-broadcast)
    (simd-gather-2)
    (simd-gather-4)
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -225,12 +225,16 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
 HOOK: %max-vector cpu ( dst src1 src2 rep -- )
 HOOK: %sqrt-vector cpu ( dst src rep -- )
 HOOK: %horizontal-add-vector cpu ( dst src rep -- )
+HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
 HOOK: %abs-vector cpu ( dst src rep -- )
 HOOK: %and-vector cpu ( dst src1 src2 rep -- )
+HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
 HOOK: %or-vector cpu ( dst src1 src2 rep -- )
 HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
 HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
 HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
+HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
+HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )

 HOOK: %integer>scalar cpu ( dst src rep -- )
 HOOK: %scalar>integer cpu ( dst src rep -- )
@ -250,12 +254,16 @@ HOOK: %min-vector-reps cpu ( -- reps )
 HOOK: %max-vector-reps cpu ( -- reps )
 HOOK: %sqrt-vector-reps cpu ( -- reps )
 HOOK: %horizontal-add-vector-reps cpu ( -- reps )
+HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
 HOOK: %abs-vector-reps cpu ( -- reps )
 HOOK: %and-vector-reps cpu ( -- reps )
+HOOK: %andn-vector-reps cpu ( -- reps )
 HOOK: %or-vector-reps cpu ( -- reps )
 HOOK: %xor-vector-reps cpu ( -- reps )
 HOOK: %shl-vector-reps cpu ( -- reps )
 HOOK: %shr-vector-reps cpu ( -- reps )
+HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
+HOOK: %horizontal-shr-vector-reps cpu ( -- reps )

 HOOK: %unbox-alien cpu ( dst src -- )
 HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -412,6 +412,22 @@ M: x86 %horizontal-add-vector-reps
        { sse3? { float-4-rep double-2-rep } }
    } available-reps ;

+M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
+    two-operand PSLLDQ ;
+
+M: x86 %horizontal-shl-vector-reps
+    {
+        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+    } available-reps ;
+
+M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
+    two-operand PSRLDQ ;
+
+M: x86 %horizontal-shr-vector-reps
+    {
+        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+    } available-reps ;
+
 M: x86 %abs-vector ( dst src rep -- )
    {
        { char-16-rep [ PABSB ] }
@ -450,6 +466,20 @@ M: x86 %and-vector-reps
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
    } available-reps ;

+M: x86 %andn-vector ( dst src1 src2 rep -- )
+    [ two-operand ] keep
+    {
+        { float-4-rep [ ANDNPS ] }
+        { double-2-rep [ ANDNPD ] }
+        [ drop PANDN ]
+    } case ;
+
+M: x86 %andn-vector-reps
+    {
+        { sse? { float-4-rep } }
+        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+    } available-reps ;
+
 M: x86 %or-vector ( dst src1 src2 rep -- )
    [ two-operand ] keep
    {
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@ -40,14 +40,18 @@ SIMD-OP: vsqrt
 SIMD-OP: sum
 SIMD-OP: vabs
 SIMD-OP: vbitand
+SIMD-OP: vbitandn
 SIMD-OP: vbitor
 SIMD-OP: vbitxor
 SIMD-OP: vlshift
 SIMD-OP: vrshift
+SIMD-OP: hlshift
+SIMD-OP: hrshift

 : (simd-broadcast) ( x rep -- v ) bad-simd-call ;
 : (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
 : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
+
 : assert-positive ( x -- y ) ;

 : alien-vector ( c-ptr n rep -- value )
@ -110,10 +114,13 @@ M: vector-rep supported-simd-op?
        { \ (simd-sum)       [ %horizontal-add-vector-reps ] }
        { \ (simd-vabs)      [ %abs-vector-reps            ] }
        { \ (simd-vbitand)   [ %and-vector-reps            ] }
+        { \ (simd-vbitandn)  [ %andn-vector-reps           ] }
        { \ (simd-vbitor)    [ %or-vector-reps             ] }
        { \ (simd-vbitxor)   [ %xor-vector-reps            ] }
        { \ (simd-vlshift)   [ %shl-vector-reps            ] }
        { \ (simd-vrshift)   [ %shr-vector-reps            ] }
+        { \ (simd-hlshift)   [ %horizontal-shl-vector-reps ] }
+        { \ (simd-hrshift)   [ %horizontal-shr-vector-reps ] }
        { \ (simd-broadcast) [ %broadcast-vector-reps      ] }
        { \ (simd-gather-2)  [ %gather-vector-2-reps       ] }
        { \ (simd-gather-4)  [ %gather-vector-4-reps       ] }
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@ -63,12 +63,12 @@ $nl
 } ;

 ARTICLE: "math.vectors.simd.words" "SIMD vector words"
-"For each SIMD vector type, several words are defined:"
+"For each SIMD vector type, several words are defined, where " { $snippet "type" } " is the type in question:"
 { $table
    { "Word" "Stack effect" "Description" }
    { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
    { { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" }
-    { { $snipept "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
+    { { $snippet "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
    { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
    { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
 }
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@ -148,9 +148,13 @@ CONSTANT: simd-classes
 : remove-integer-words ( alist -- alist' )
    [ drop { vlshift vrshift } member? not ] assoc-filter ;

+: remove-horizontal-shifts ( alist -- alist' )
+    [ drop { hlshift hrshift } member? not ] assoc-filter ;
+
 : ops-to-check ( elt-class -- alist )
    [ vector-words >alist ] dip
-    float = [ remove-integer-words ] [ remove-float-words ] if ;
+    float = [ remove-integer-words ] [ remove-float-words ] if
+    remove-horizontal-shifts ;

 : check-vector-ops ( class elt-class compare-quot -- )
    [
@ -255,3 +259,15 @@ STRUCT: simd-struct
 ] unit-test

 [ ] [ char-16 new 1array stack. ] unit-test
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
--- a/basis/math/vectors/specialization/specialization.factor
+++ b/basis/math/vectors/specialization/specialization.factor
@ -82,10 +82,13 @@ H{
    { vabs { +vector+ -> +vector+ } }
    { vsqrt { +vector+ -> +vector+ } }
    { vbitand { +vector+ +vector+ -> +vector+ } }
+    { vbitandn { +vector+ +vector+ -> +vector+ } }
    { vbitor { +vector+ +vector+ -> +vector+ } }
    { vbitxor { +vector+ +vector+ -> +vector+ } }
    { vlshift { +vector+ +scalar+ -> +vector+ } }
    { vrshift { +vector+ +scalar+ -> +vector+ } }
+    { hlshift { +vector+ +scalar+ -> +vector+ } }
+    { hrshift { +vector+ +scalar+ -> +vector+ } }
 }

 PREDICATE: vector-word < word vector-words key? ;
@ -116,7 +119,7 @@ M: vector-word subwords specializations values [ word? ] filter ;
 :: input-signature ( word array-type elt-type -- signature )
    array-type elt-type word word-schema inputs signature-for-schema ;

-: vector-words-for-type ( elt-type -- alist )
+: vector-words-for-type ( elt-type -- words )
    {
        ! Can't do shifts on floats
        { [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
@ -125,10 +128,13 @@ M: vector-word subwords specializations values [ word? ] filter ;
        ! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
        { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
        [ { } ]
-    } cond nip ;
+    } cond
+    ! Don't specialize horizontal shifts at all, they're only for SIMD
+    { hlshift hrshift } diff
+    nip ;

 :: specialize-vector-words ( array-type elt-type simd -- )
-    elt-type vector-words-for-type [
+    elt-type vector-words-for-type simd keys union [
        [ array-type elt-type simd specialize-vector-word ]
        [ array-type elt-type input-signature ]
        [ ]
--- a/basis/math/vectors/vectors-docs.factor
+++ b/basis/math/vectors/vectors-docs.factor
@ -36,6 +36,7 @@ $nl
 { $subsection vmin }
 "Bitwise operations:"
 { $subsection vbitand }
+{ $subsection vbitandn }
 { $subsection vbitor }
 { $subsection vbitxor }
 { $subsection vlshift }
@ -197,6 +198,11 @@ HELP: vbitand
 { $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
 { $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;

+HELP: vbitandn
+{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
+{ $description "Takes the bitwise and-not of " { $snippet "u" } " and " { $snippet "v" } " component-wise, where " { $snippet "x and-not y" } " is defined as " { $snippet "not(x) and y" } "." }
+{ $notes "This word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
+
 HELP: vbitor
 { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
 { $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
@ -217,6 +223,14 @@ HELP: vrshift
 { $description "Shifts each element of " { $snippet "u" } " to the right by " { $snippet "n" } " bits." }
 { $notes "Undefined behavior will result if " { $snippet "n" } " is negative." } ;

+HELP: hlshift
+{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
+{ $description "Shifts the entire SIMD array to the left by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
+
+HELP: hrshift
+{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
+{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
+
 HELP: norm-sq
 { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
 { $description "Computes the squared length of a mathematical vector." } ;
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2005, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: arrays alien.c-types kernel sequences math math.functions
-hints math.order math.libm fry combinators ;
+hints math.order math.libm fry combinators byte-arrays accessors ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors

@ -62,9 +62,12 @@ PRIVATE>
        [ drop call ]
    } case ; inline

+: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
+
 PRIVATE>

 : vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
+: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
 : vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
 : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
 : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
@ -86,6 +89,14 @@ PRIVATE>
 : vlshift ( u n -- w ) '[ _ shift ] map ;
 : vrshift ( u n -- w ) neg '[ _ shift ] map ;

+: hlshift ( u n -- w )
+    [ clone ] dip
+    '[ _ <byte-array> append 16 tail* ] change-underlying ;
+
+: hrshift ( u n -- w )
+    [ clone ] dip
+    '[ _ <byte-array> prepend 16 head* ] change-underlying ;
+
 : vfloor    ( u -- v ) [ floor ] map ;
 : vceiling  ( u -- v ) [ ceiling ] map ;
 : vtruncate ( u -- v ) [ truncate ] map ;