From b2ea3afd84e77778774ec2a2d3b17c85ec677e61 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 02:17:46 -0500 Subject: [PATCH] math.vectors.simd: add hlshift, hrshift (128-bit shift), vbitandn intrinsics --- .../cfg/instructions/instructions.factor | 20 +++++++++++++ .../compiler/cfg/intrinsics/intrinsics.factor | 3 ++ .../compiler/cfg/intrinsics/simd/simd.factor | 16 ++++++++-- basis/compiler/codegen/codegen.factor | 4 +++ .../tree/propagation/simd/simd.factor | 3 ++ basis/cpu/architecture/architecture.factor | 8 +++++ basis/cpu/x86/x86.factor | 30 +++++++++++++++++++ .../vectors/simd/intrinsics/intrinsics.factor | 7 +++++ basis/math/vectors/simd/simd-docs.factor | 4 +-- basis/math/vectors/simd/simd-tests.factor | 18 ++++++++++- .../specialization/specialization.factor | 12 ++++++-- basis/math/vectors/vectors-docs.factor | 14 +++++++++ basis/math/vectors/vectors.factor | 13 +++++++- 13 files changed, 142 insertions(+), 10 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index f06d31b9df..d560cfbab6 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -340,6 +340,21 @@ def: dst/scalar-rep use: src literal: rep ; +PURE-INSN: ##horizontal-sub-vector +def: dst/scalar-rep +use: src +literal: rep ; + +PURE-INSN: ##horizontal-shl-vector +def: dst +use: src1 +literal: src2 rep ; + +PURE-INSN: ##horizontal-shr-vector +def: dst +use: src1 +literal: src2 rep ; + PURE-INSN: ##abs-vector def: dst use: src @@ -355,6 +370,11 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##andn-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##or-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 056e2471ef..7b8e0c5989 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -167,10 +167,13 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] } + { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] } { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index f1a6f986df..f975c7b39f 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays fry cpu.architecture kernel +USING: accessors byte-arrays fry cpu.architecture kernel math sequences compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats @@ -9,8 +9,8 @@ compiler.cfg.intrinsics.alien ; IN: compiler.cfg.intrinsics.simd : emit-vector-op ( node quot: ( rep -- ) -- ) - [ dup node-input-infos last literal>> ] dip over representation? - [ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline + [ dup node-input-infos last literal>> dup representation? ] dip + '[ nip @ ] [ drop emit-primitive ] if ; inline : emit-binary-vector-op ( node quot -- ) '[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline @@ -18,6 +18,16 @@ IN: compiler.cfg.intrinsics.simd : emit-unary-vector-op ( node quot -- ) '[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline +: emit-horizontal-shift ( node quot -- ) + [ + dup node-input-infos + [ second literal>> ] [ third literal>> ] bi + 2dup [ integer? ] [ representation? ] bi* and + ] dip + '[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ] + [ 2drop emit-primitive ] + if ; inline + : emit-gather-vector-2 ( node -- ) [ ^^gather-vector-2 ] emit-binary-vector-op ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 0fd4e63e02..7a8253a35e 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -176,8 +176,12 @@ CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector +CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector +CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector +CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector CODEGEN: ##abs-vector %abs-vector CODEGEN: ##and-vector %and-vector +CODEGEN: ##andn-vector %andn-vector CODEGEN: ##or-vector %or-vector CODEGEN: ##xor-vector %xor-vector CODEGEN: ##shl-vector %shl-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 6a619b298e..5ad483405a 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -17,10 +17,13 @@ IN: compiler.tree.propagation.simd (simd-vabs) (simd-vsqrt) (simd-vbitand) + (simd-vbitandn) (simd-vbitor) (simd-vbitxor) (simd-vlshift) (simd-vrshift) + (simd-hlshift) + (simd-hrshift) (simd-broadcast) (simd-gather-2) (simd-gather-4) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 78525e6d00..1e4e27b6e4 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -225,12 +225,16 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- ) +HOOK: %horizontal-sub-vector cpu ( dst src rep -- ) HOOK: %abs-vector cpu ( dst src rep -- ) HOOK: %and-vector cpu ( dst src1 src2 rep -- ) +HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) HOOK: %or-vector cpu ( dst src1 src2 rep -- ) HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) HOOK: %shl-vector cpu ( dst src1 src2 rep -- ) HOOK: %shr-vector cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- ) HOOK: %integer>scalar cpu ( dst src rep -- ) HOOK: %scalar>integer cpu ( dst src rep -- ) @@ -250,12 +254,16 @@ HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps ) HOOK: %sqrt-vector-reps cpu ( -- reps ) HOOK: %horizontal-add-vector-reps cpu ( -- reps ) +HOOK: %horizontal-sub-vector-reps cpu ( -- reps ) HOOK: %abs-vector-reps cpu ( -- reps ) HOOK: %and-vector-reps cpu ( -- reps ) +HOOK: %andn-vector-reps cpu ( -- reps ) HOOK: %or-vector-reps cpu ( -- reps ) HOOK: %xor-vector-reps cpu ( -- reps ) HOOK: %shl-vector-reps cpu ( -- reps ) HOOK: %shr-vector-reps cpu ( -- reps ) +HOOK: %horizontal-shl-vector-reps cpu ( -- reps ) +HOOK: %horizontal-shr-vector-reps cpu ( -- reps ) HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index bf2f9f1f98..d6a9e4df26 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -412,6 +412,22 @@ M: x86 %horizontal-add-vector-reps { sse3? { float-4-rep double-2-rep } } } available-reps ; +M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- ) + two-operand PSLLDQ ; + +M: x86 %horizontal-shl-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + +M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- ) + two-operand PSRLDQ ; + +M: x86 %horizontal-shr-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %abs-vector ( dst src rep -- ) { { char-16-rep [ PABSB ] } @@ -450,6 +466,20 @@ M: x86 %and-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %andn-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep + { + { float-4-rep [ ANDNPS ] } + { double-2-rep [ ANDNPD ] } + [ drop PANDN ] + } case ; + +M: x86 %andn-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %or-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6989ac2bc2..9d31dfa813 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -40,14 +40,18 @@ SIMD-OP: vsqrt SIMD-OP: sum SIMD-OP: vabs SIMD-OP: vbitand +SIMD-OP: vbitandn SIMD-OP: vbitor SIMD-OP: vbitxor SIMD-OP: vlshift SIMD-OP: vrshift +SIMD-OP: hlshift +SIMD-OP: hrshift : (simd-broadcast) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; + : assert-positive ( x -- y ) ; : alien-vector ( c-ptr n rep -- value ) @@ -110,10 +114,13 @@ M: vector-rep supported-simd-op? { \ (simd-sum) [ %horizontal-add-vector-reps ] } { \ (simd-vabs) [ %abs-vector-reps ] } { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitandn) [ %andn-vector-reps ] } { \ (simd-vbitor) [ %or-vector-reps ] } { \ (simd-vbitxor) [ %xor-vector-reps ] } { \ (simd-vlshift) [ %shl-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] } + { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } + { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } { \ (simd-broadcast) [ %broadcast-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 6dc0f87dd4..7a3ba727e0 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -63,12 +63,12 @@ $nl } ; ARTICLE: "math.vectors.simd.words" "SIMD vector words" -"For each SIMD vector type, several words are defined:" +"For each SIMD vector type, several words are defined, where " { $snippet "type" } " is the type in question:" { $table { "Word" "Stack effect" "Description" } { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" } { { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" } - { { $snipept "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" } + { { $snippet "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" } { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" } { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" } } diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index f7c051fdce..001f1be814 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -148,9 +148,13 @@ CONSTANT: simd-classes : remove-integer-words ( alist -- alist' ) [ drop { vlshift vrshift } member? not ] assoc-filter ; +: remove-horizontal-shifts ( alist -- alist' ) + [ drop { hlshift hrshift } member? not ] assoc-filter ; + : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip - float = [ remove-integer-words ] [ remove-float-words ] if ; + float = [ remove-integer-words ] [ remove-float-words ] if + remove-horizontal-shifts ; : check-vector-ops ( class elt-class compare-quot -- ) [ @@ -255,3 +259,15 @@ STRUCT: simd-struct ] unit-test [ ] [ char-16 new 1array stack. ] unit-test + +[ int-4{ 256 512 1024 2048 } ] +[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test + +[ int-4{ 256 512 1024 2048 } ] +[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test + +[ int-4{ 1 2 4 8 } ] +[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test + +[ int-4{ 1 2 4 8 } ] +[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 6c8ffd6f61..8b78f798ec 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -82,10 +82,13 @@ H{ { vabs { +vector+ -> +vector+ } } { vsqrt { +vector+ -> +vector+ } } { vbitand { +vector+ +vector+ -> +vector+ } } + { vbitandn { +vector+ +vector+ -> +vector+ } } { vbitor { +vector+ +vector+ -> +vector+ } } { vbitxor { +vector+ +vector+ -> +vector+ } } { vlshift { +vector+ +scalar+ -> +vector+ } } { vrshift { +vector+ +scalar+ -> +vector+ } } + { hlshift { +vector+ +scalar+ -> +vector+ } } + { hrshift { +vector+ +scalar+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -116,7 +119,7 @@ M: vector-word subwords specializations values [ word? ] filter ; :: input-signature ( word array-type elt-type -- signature ) array-type elt-type word word-schema inputs signature-for-schema ; -: vector-words-for-type ( elt-type -- alist ) +: vector-words-for-type ( elt-type -- words ) { ! Can't do shifts on floats { [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] } @@ -125,10 +128,13 @@ M: vector-word subwords specializations values [ word? ] filter ; ! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt) { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] } [ { } ] - } cond nip ; + } cond + ! Don't specialize horizontal shifts at all, they're only for SIMD + { hlshift hrshift } diff + nip ; :: specialize-vector-words ( array-type elt-type simd -- ) - elt-type vector-words-for-type [ + elt-type vector-words-for-type simd keys union [ [ array-type elt-type simd specialize-vector-word ] [ array-type elt-type input-signature ] [ ] diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 2d9a70ad58..32c354354f 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -36,6 +36,7 @@ $nl { $subsection vmin } "Bitwise operations:" { $subsection vbitand } +{ $subsection vbitandn } { $subsection vbitor } { $subsection vbitxor } { $subsection vlshift } @@ -197,6 +198,11 @@ HELP: vbitand { $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } { $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; +HELP: vbitandn +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise and-not of " { $snippet "u" } " and " { $snippet "v" } " component-wise, where " { $snippet "x and-not y" } " is defined as " { $snippet "not(x) and y" } "." } +{ $notes "This word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + HELP: vbitor { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } { $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } @@ -217,6 +223,14 @@ HELP: vrshift { $description "Shifts each element of " { $snippet "u" } " to the right by " { $snippet "n" } " bits." } { $notes "Undefined behavior will result if " { $snippet "n" } " is negative." } ; +HELP: hlshift +{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } } +{ $description "Shifts the entire SIMD array to the left by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; + +HELP: hrshift +{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } } +{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index fb4c7592d3..1dcff8e8a9 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays alien.c-types kernel sequences math math.functions -hints math.order math.libm fry combinators ; +hints math.order math.libm fry combinators byte-arrays accessors ; QUALIFIED-WITH: alien.c-types c IN: math.vectors @@ -62,9 +62,12 @@ PRIVATE> [ drop call ] } case ; inline +: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline + PRIVATE> : vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; +: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ; : vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ; : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; @@ -86,6 +89,14 @@ PRIVATE> : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ; +: hlshift ( u n -- w ) + [ clone ] dip + '[ _ append 16 tail* ] change-underlying ; + +: hrshift ( u n -- w ) + [ clone ] dip + '[ _ prepend 16 head* ] change-underlying ; + : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; : vtruncate ( u -- v ) [ truncate ] map ;