From a8ea929ad97724d9cd587de5b3b45d238a8ad411 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 17:31:34 -0500 Subject: [PATCH 1/2] Work in progress --- basis/compiler/cfg/hats/hats.factor | 3 -- .../cfg/instructions/instructions.factor | 23 +++++++++++ .../compiler/cfg/intrinsics/intrinsics.factor | 3 ++ .../compiler/cfg/intrinsics/simd/simd.factor | 7 ++++ basis/compiler/codegen/codegen.factor | 5 +++ basis/cpu/architecture/architecture.factor | 9 +++++ basis/cpu/x86/x86.factor | 40 +++++++++++++++++++ .../math/vectors/simd/functor/functor.factor | 9 +---- .../vectors/simd/intrinsics/intrinsics.factor | 6 +++ basis/math/vectors/vectors.factor | 20 +++++----- 10 files changed, 106 insertions(+), 19 deletions(-) diff --git a/basis/compiler/cfg/hats/hats.factor b/basis/compiler/cfg/hats/hats.factor index 36fa631050..4bfcb3dac8 100644 --- a/basis/compiler/cfg/hats/hats.factor +++ b/basis/compiler/cfg/hats/hats.factor @@ -51,9 +51,6 @@ insn-classes get [ : ^^unbox-c-ptr ( src class -- dst ) [ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ; -: ^^neg ( src -- dst ) - [ 0 ^^load-literal ] dip ^^sub ; - : ^^allot-tuple ( n -- dst ) 2 + cells tuple ^^allot ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index d560cfbab6..2e9d2863c3 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -186,6 +186,10 @@ PURE-INSN: ##not def: dst/int-rep use: src/int-rep ; +PURE-INSN: ##neg +def: dst/int-rep +use: src/int-rep ; + PURE-INSN: ##log2 def: dst/int-rep use: src/int-rep ; @@ -270,6 +274,10 @@ def: dst use: src/int-rep literal: rep ; +PURE-INSN: ##zero-vector +def: dst +literal: rep ; + PURE-INSN: ##broadcast-vector def: dst use: src/scalar-rep @@ -285,6 +293,16 @@ def: dst use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep literal: rep ; +PURE-INSN: ##shuffle-vector +def: dst +use: src +literal: shuffle rep ; + +PURE-INSN: ##select-vector +def: dst +use: src +literal: n rep ; + PURE-INSN: ##add-vector def: dst use: src1 src2 @@ -335,6 +353,11 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##dot-vector +def: dst/scalar-rep +use: src1 src2 +literal: rep ; + PURE-INSN: ##horizontal-add-vector def: dst/scalar-rep use: src diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 7b8e0c5989..2f6529692d 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -164,6 +164,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } @@ -177,6 +178,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vselect) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index f975c7b39f..68012cfeb5 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -45,6 +45,13 @@ IN: compiler.cfg.intrinsics.simd ds-push ] emit-vector-op ; +: emit-shuffle-vector ( node -- ) + ; + +: emit-select-vector ( node -- ) + + ; + : emit-alien-vector ( node -- ) dup [ '[ diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 7a8253a35e..12cf303b4c 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -142,6 +142,7 @@ CODEGEN: ##sar-imm %sar-imm CODEGEN: ##min %min CODEGEN: ##max %max CODEGEN: ##not %not +CODEGEN: ##neg %neg CODEGEN: ##log2 %log2 CODEGEN: ##copy %copy CODEGEN: ##unbox-float %unbox-float @@ -160,9 +161,12 @@ CODEGEN: ##double>single-float %double>single-float CODEGEN: ##integer>float %integer>float CODEGEN: ##float>integer %float>integer CODEGEN: ##unbox-vector %unbox-vector +CODEGEN: ##zero-vector %zero-vector CODEGEN: ##broadcast-vector %broadcast-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 +CODEGEN: ##shuffle-vector %shuffle-vector +CODEGEN: ##select-vector %select-vector CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##saturated-add-vector %saturated-add-vector @@ -174,6 +178,7 @@ CODEGEN: ##saturated-mul-vector %saturated-mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector +CODEGEN: ##dot-vector %dot-vector CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 1e4e27b6e4..9222fcc17c 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -180,6 +180,7 @@ HOOK: %sar-imm cpu ( dst src1 src2 -- ) HOOK: %min cpu ( dst src1 src2 -- ) HOOK: %max cpu ( dst src1 src2 -- ) HOOK: %not cpu ( dst src -- ) +HOOK: %neg cpu ( dst src -- ) HOOK: %log2 cpu ( dst src -- ) HOOK: %copy cpu ( dst src rep -- ) @@ -210,9 +211,12 @@ HOOK: %float>integer cpu ( dst src -- ) HOOK: %box-vector cpu ( dst src temp rep -- ) HOOK: %unbox-vector cpu ( dst src rep -- ) +HOOK: %zero-vector cpu ( dst rep -- ) HOOK: %broadcast-vector cpu ( dst src rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) +HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %select-vector cpu ( dst src n rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) @@ -223,6 +227,7 @@ HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) +HOOK: %dot-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- ) HOOK: %horizontal-sub-vector cpu ( dst src rep -- ) @@ -239,9 +244,12 @@ HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- ) HOOK: %integer>scalar cpu ( dst src rep -- ) HOOK: %scalar>integer cpu ( dst src rep -- ) +HOOK: %zero-vector-reps cpu ( -- reps ) HOOK: %broadcast-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %shuffle-vector-reps cpu ( -- reps ) +HOOK: %select-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps ) @@ -252,6 +260,7 @@ HOOK: %saturated-mul-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps ) +HOOK: %dot-vector-reps cpu ( -- reps ) HOOK: %sqrt-vector-reps cpu ( -- reps ) HOOK: %horizontal-add-vector-reps cpu ( -- reps ) HOOK: %horizontal-sub-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 857e1ede6b..4d9c53d774 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -129,6 +129,7 @@ M: x86 %min int-rep two-operand [ CMP ] [ CMOVG ] 2bi ; M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ; M: x86 %not int-rep one-operand NOT ; +M: x86 %neg int-rep one-operand NEG ; M: x86 %log2 BSR ; GENERIC: copy-register* ( dst src rep -- ) @@ -578,6 +579,19 @@ MACRO: available-reps ( alist -- ) reverse [ { } ] suffix '[ _ cond ] ; +M: x86 %zero-vector + { + { double-2-rep [ dup XORPD ] } + { float-4-rep [ dup XORPS ] } + [ drop dup PXOR ] + } case ; + +M: x86 %zero-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + : unsign-rep ( rep -- rep' ) { { uint-4-rep int-4-rep } @@ -663,6 +677,10 @@ M: x86 %gather-vector-2-reps { sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %shuffle-vector-reps { } ; + +M: x86 %select-vector-reps { } ; + M: x86 %add-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { @@ -820,6 +838,28 @@ M: x86 %max-vector-reps { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %dot-vector + [ two-operand ] keep + { + { float-4-rep [ + sse4.1? + [ HEX: ff DPPS ] + [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ] + if + ] } + { double-2-rep [ + sse4.1? + [ HEX: ff DPPD ] + [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ] + if + ] } + } case ; + +M: x86 %dot-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %horizontal-add-vector ( dst src rep -- ) { { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] } diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index bc42bddf02..c2eb85e3a6 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -55,7 +55,7 @@ ERROR: bad-schema schema ; :: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. { - { vneg [ [ dup v- ] keep v- ] } + { vneg [ [ dup vbitxor ] keep v- ] } { n+v [ [ ctor execute ] dip v+ ] } { v+n [ ctor execute v+ ] } { n-v [ [ ctor execute ] dip v- ] } @@ -71,12 +71,7 @@ ERROR: bad-schema schema ; ! To compute dot product and distance with integer vectors, we ! have to do things less efficiently, with integer overflow checks, ! in the general case. - elt-class m:float = [ - { - { distance [ v- norm ] } - { v. [ v* sum ] } - } append - ] when ; + elt-class m:float = [ { distance [ v- norm ] } suffix ] when ; :: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- ) rep rep-component-type c-type-boxed-class :> elt-class diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 9d31dfa813..78301ae3a5 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -36,6 +36,7 @@ SIMD-OP: v* SIMD-OP: v/ SIMD-OP: vmin SIMD-OP: vmax +SIMD-OP: v. SIMD-OP: vsqrt SIMD-OP: sum SIMD-OP: vabs @@ -47,10 +48,12 @@ SIMD-OP: vlshift SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift +SIMD-OP: vshuffle : (simd-broadcast) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; +: (simd-select) ( v n rep -- x ) bad-simd-call ; : assert-positive ( x -- y ) ; @@ -110,6 +113,7 @@ M: vector-rep supported-simd-op? { \ (simd-v/) [ %div-vector-reps ] } { \ (simd-vmin) [ %min-vector-reps ] } { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-v.) [ %dot-vector-reps ] } { \ (simd-vsqrt) [ %sqrt-vector-reps ] } { \ (simd-sum) [ %horizontal-add-vector-reps ] } { \ (simd-vabs) [ %abs-vector-reps ] } @@ -121,7 +125,9 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } + { \ (simd-vshuffle) [ %shuffle-vector-reps ] } { \ (simd-broadcast) [ %broadcast-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } + { \ (simd-select) [ %select-vector-reps ] } } case member? ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index de9ba51aec..5f9b7e395b 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -66,6 +66,9 @@ PRIVATE> GENERIC: new-underlying ( underlying seq -- seq' ) +: change-underlying ( seq quot -- seq' ) + '[ underlying>> @ ] keep new-underlying ; inline + PRIVATE> : vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; @@ -74,6 +77,14 @@ PRIVATE> : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; +: vshuffle ( u perm -- v ) swap nths ; + +: vlshift ( u n -- w ) '[ _ shift ] map ; +: vrshift ( u n -- w ) neg '[ _ shift ] map ; + +: hlshift ( u n -- w ) '[ _ prepend 16 head ] change-underlying ; +: hrshift ( u n -- w ) '[ _ append 16 tail* ] change-underlying ; + : vand ( u v -- w ) [ and ] 2map ; : vor ( u v -- w ) [ or ] 2map ; : vxor ( u v -- w ) [ xor ] 2map ; @@ -88,15 +99,6 @@ PRIVATE> : v? ( ? u v -- w ) [ ? ] pick 3map-as ; -: vlshift ( u n -- w ) '[ _ shift ] map ; -: vrshift ( u n -- w ) neg '[ _ shift ] map ; - -: hlshift ( u n -- w ) - [ [ underlying>> ] dip prepend 16 head ] [ drop ] 2bi new-underlying ; - -: hrshift ( u n -- w ) - [ [ underlying>> ] dip append 16 tail* ] [ drop ] 2bi new-underlying ; - : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; : vtruncate ( u -- v ) [ truncate ] map ; From e40a95c1e10b1b0722a5c5a6444a9fe8865a8c8e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 23:12:13 -0500 Subject: [PATCH 2/2] math.vectors.simd: add vshuffle intrinsic --- .../compiler/cfg/intrinsics/intrinsics.factor | 2 +- .../compiler/cfg/intrinsics/simd/simd.factor | 54 +++++++++++++------ .../tree/propagation/simd/simd.factor | 15 ++++-- basis/cpu/x86/x86.factor | 51 ++++++++++++++++-- .../math/vectors/simd/functor/functor.factor | 18 +++++-- basis/math/vectors/simd/simd-tests.factor | 51 ++++++++++++++++-- .../specialization/specialization.factor | 17 +++--- basis/math/vectors/vectors-docs.factor | 14 +++++ basis/math/vectors/vectors.factor | 2 +- 9 files changed, 186 insertions(+), 38 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 2f6529692d..9784855b6d 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -179,7 +179,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } - { math.vectors.simd.intrinsics:(simd-vselect) [ emit-select-vector ] } + { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 68012cfeb5..7f393fdc83 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,32 +1,51 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays fry cpu.architecture kernel math -sequences compiler.tree.propagation.info +sequences macros generalizations combinators +combinators.short-circuit arrays compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien ; IN: compiler.cfg.intrinsics.simd +MACRO: check-elements ( quots -- ) + [ length '[ _ firstn ] ] + [ '[ _ spread ] ] + [ length 1 - \ and [ ] like ] + tri 3append ; + +MACRO: if-literals-match ( quots -- ) + [ length ] [ ] [ length ] tri + ! n quots n n + '[ + ! node quot + [ + dup node-input-infos + _ tail-slice* [ literal>> ] map + dup _ check-elements + ] dip + swap [ + ! node literals quot + [ _ firstn ] dip call + drop + ] [ 2drop emit-primitive ] if + ] ; + : emit-vector-op ( node quot: ( rep -- ) -- ) - [ dup node-input-infos last literal>> dup representation? ] dip - '[ nip @ ] [ drop emit-primitive ] if ; inline + { [ representation? ] } if-literals-match ; inline : emit-binary-vector-op ( node quot -- ) - '[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline + '[ [ ds-drop 2inputs ] dip @ ds-push ] + emit-vector-op ; inline : emit-unary-vector-op ( node quot -- ) - '[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline + '[ [ ds-drop ds-pop ] dip @ ds-push ] + emit-vector-op ; inline : emit-horizontal-shift ( node quot -- ) - [ - dup node-input-infos - [ second literal>> ] [ third literal>> ] bi - 2dup [ integer? ] [ representation? ] bi* and - ] dip - '[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ] - [ 2drop emit-primitive ] - if ; inline + '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] + { [ integer? ] [ representation? ] } if-literals-match ; inline : emit-gather-vector-2 ( node -- ) [ ^^gather-vector-2 ] emit-binary-vector-op ; @@ -45,12 +64,15 @@ IN: compiler.cfg.intrinsics.simd ds-push ] emit-vector-op ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + : emit-shuffle-vector ( node -- ) - ; + [ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ] + { [ shuffle? ] [ representation? ] } if-literals-match ; inline : emit-select-vector ( node -- ) - - ; + [ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ] + { [ integer? ] [ representation? ] } if-literals-match ; inline : emit-alien-vector ( node -- ) dup [ diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 5ad483405a..552ab799ba 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -24,22 +24,27 @@ IN: compiler.tree.propagation.simd (simd-vrshift) (simd-hlshift) (simd-hrshift) + (simd-vshuffle) (simd-broadcast) (simd-gather-2) (simd-gather-4) + (simd-select) alien-vector } [ { byte-array } "default-output-classes" set-word-prop ] each -\ (simd-sum) [ - nip dup literal?>> [ +: scalar-output-class ( rep -- class ) + dup literal?>> [ literal>> scalar-rep-of { { float-rep [ float ] } { double-rep [ float ] } - [ integer ] + [ drop integer ] } case ] [ drop real ] if - -] "outputs" set-word-prop + ; + +\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop + +\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop \ assert-positive [ real [0,inf] value-info-intersect diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4d9c53d774..397a26a464 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -602,8 +602,8 @@ M: x86 %zero-vector-reps M:: x86 %broadcast-vector ( dst src rep -- ) rep unsign-rep { - { float-4-rep [ - dst src float-4-rep %copy + { float-4-rep [ + dst src float-4-rep %copy dst dst { 0 0 0 0 } SHUFPS ] } { double-2-rep [ @@ -677,7 +677,52 @@ M: x86 %gather-vector-2-reps { sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M: x86 %shuffle-vector-reps { } ; +: double-2-shuffle ( dst shuffle -- ) + { + { { 0 1 } [ drop ] } + { { 0 0 } [ dup UNPCKLPD ] } + { { 1 1 } [ dup UNPCKHPD ] } + [ dupd SHUFPD ] + } case ; + +: float-4-shuffle ( dst shuffle -- ) + { + { { 0 1 2 3 } [ drop ] } + { { 0 0 2 2 } [ dup MOVSLDUP ] } + { { 1 1 3 3 } [ dup MOVSHDUP ] } + { { 0 1 0 1 } [ dup MOVLHPS ] } + { { 2 3 2 3 } [ dup MOVHLPS ] } + { { 0 0 1 1 } [ dup UNPCKLPS ] } + { { 2 2 3 3 } [ dup UNPCKHPS ] } + [ dupd SHUFPS ] + } case ; + +: int-4-shuffle ( dst shuffle -- ) + { + { { 0 1 2 3 } [ drop ] } + { { 0 0 1 1 } [ dup PUNPCKLDQ ] } + { { 2 2 3 3 } [ dup PUNPCKHDQ ] } + { { 0 1 0 1 } [ dup PUNPCKLQDQ ] } + { { 2 3 2 3 } [ dup PUNPCKHQDQ ] } + [ dupd PSHUFD ] + } case ; + +: longlong-2-shuffle ( dst shuffle -- ) + first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ; + +M:: x86 %shuffle-vector ( dst src shuffle rep -- ) + dst src rep %copy + dst shuffle rep unsign-rep { + { double-2-rep [ double-2-shuffle ] } + { float-4-rep [ float-4-shuffle ] } + { int-4-rep [ int-4-shuffle ] } + { longlong-2-rep [ longlong-2-shuffle ] } + } case ; + +M: x86 %shuffle-vector-reps + { + { sse2? { double-2-rep float-4-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; M: x86 %select-vector-reps { } ; diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index c2eb85e3a6..ba045cda60 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -73,13 +73,15 @@ ERROR: bad-schema schema ; ! in the general case. elt-class m:float = [ { distance [ v- norm ] } suffix ] when ; -:: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- ) +:: simd-vector-words ( class ctor rep vv->v vn->v vv->n v->v v->n -- ) rep rep-component-type c-type-boxed-class :> elt-class class elt-class { { { +vector+ +vector+ -> +vector+ } vv->v } { { +vector+ +scalar+ -> +vector+ } vn->v } + { { +vector+ +literal+ -> +vector+ } vn->v } + { { +vector+ +vector+ -> +scalar+ } vv->n } { { +vector+ -> +vector+ } v->v } { { +vector+ -> +scalar+ } v->n } { { +vector+ -> +nonnegative+ } v->n } @@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op +A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -181,13 +184,16 @@ INSTANCE: A sequence : A-vn->v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline +: A-vv->n-op ( v1 v2 quot -- n ) + [ [ underlying>> ] bi@ A-rep ] dip call ; inline + : A-v->v-op ( v1 quot -- v2 ) [ underlying>> A-rep ] dip call \ A boa ; inline : A-v->n-op ( v quot -- n ) [ underlying>> A-rep ] dip call ; inline -\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words +\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-rep define-simd-128-type PRIVATE> @@ -238,6 +244,7 @@ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op +A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -312,6 +319,11 @@ INSTANCE: A sequence [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi \ A boa ; inline +: A-vv->n-op ( v1 v2 quot -- v3 ) + [ [ [ underlying1>> ] bi@ A-rep ] dip call ] + [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi + + ; inline + : A-v->v-op ( v1 combine-quot -- v2 ) [ [ underlying1>> A-rep ] dip call ] [ [ underlying2>> A-rep ] dip call ] 2bi @@ -320,7 +332,7 @@ INSTANCE: A sequence : A-v->n-op ( v1 combine-quot -- v2 ) [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline -\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words +\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-rep define-simd-256-type ;FUNCTOR diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index c1428b9c33..588ef83816 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -148,13 +148,14 @@ CONSTANT: simd-classes : remove-integer-words ( alist -- alist' ) [ drop { vlshift vrshift } member? not ] assoc-filter ; -: remove-horizontal-shifts ( alist -- alist' ) - [ drop { hlshift hrshift } member? not ] assoc-filter ; +: remove-special-words ( alist -- alist' ) + ! These have their own tests later + [ drop { hlshift hrshift vshuffle } member? not ] assoc-filter ; : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip float = [ remove-integer-words ] [ remove-float-words ] if - remove-horizontal-shifts ; + remove-special-words ; : check-vector-ops ( class elt-class compare-quot -- ) [ @@ -271,3 +272,47 @@ STRUCT: simd-struct [ int-4{ 1 2 4 8 } ] [ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test + +! Shuffles +: test-shuffle ( input shuffle -- failures ) + [ dup class 1array ] dip + '[ _ declare _ vshuffle ] + [ call ] [ compile-call ] 2bi = not ; inline + +: shuffles-for ( seq -- shuffles ) + length { + { 2 [ + { + { 0 1 } + { 1 1 } + { 1 0 } + { 0 0 } + } + ] } + { 4 [ + { + { 1 2 3 0 } + { 0 1 2 3 } + { 1 1 2 2 } + { 0 0 1 1 } + { 2 2 3 3 } + { 0 1 0 1 } + { 2 3 2 3 } + { 0 0 2 2 } + { 1 1 3 3 } + { 0 1 0 1 } + { 2 2 3 3 } + } + ] } + } case ; + +: test-shuffles ( input -- failures ) + dup shuffles-for [ test-shuffle ] with filter ; inline + +[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-shuffles ] unit-test +[ { } ] [ int-4{ 1 2 3 4 } test-shuffles ] unit-test +[ { } ] [ uint-4{ 1 2 3 4 } test-shuffles ] unit-test + +[ { } ] [ double-2{ 1.0 2.0 } test-shuffles ] unit-test +[ { } ] [ longlong-2{ 1 2 } test-shuffles ] unit-test +[ { } ] [ ulonglong-2{ 1 2 } test-shuffles ] unit-test diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 8b78f798ec..333e787086 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -6,7 +6,7 @@ namespaces assocs fry splitting classes.algebra generalizations locals compiler.tree.propagation.info ; IN: math.vectors.specialization -SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ; +SYMBOLS: -> +vector+ +scalar+ +nonnegative+ +literal+ ; : signature-for-schema ( array-type elt-type schema -- signature ) [ @@ -14,6 +14,7 @@ SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ; { +vector+ [ drop ] } { +scalar+ [ nip ] } { +nonnegative+ [ nip ] } + { +literal+ [ 2drop object ] } } case ] with with map ; @@ -87,8 +88,9 @@ H{ { vbitxor { +vector+ +vector+ -> +vector+ } } { vlshift { +vector+ +scalar+ -> +vector+ } } { vrshift { +vector+ +scalar+ -> +vector+ } } - { hlshift { +vector+ +scalar+ -> +vector+ } } - { hrshift { +vector+ +scalar+ -> +vector+ } } + { hlshift { +vector+ +literal+ -> +vector+ } } + { hrshift { +vector+ +literal+ -> +vector+ } } + { vshuffle { +vector+ +literal+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -102,7 +104,10 @@ M: vector-word subwords specializations values [ word? ] filter ; : add-specialization ( new-word signature word -- ) specializations set-at ; -: word-schema ( word -- schema ) vector-words at ; +ERROR: bad-vector-word word ; + +: word-schema ( word -- schema ) + vector-words ?at [ bad-vector-word ] unless ; : inputs ( schema -- seq ) { -> } split first ; @@ -129,8 +134,8 @@ M: vector-word subwords specializations values [ word? ] filter ; { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] } [ { } ] } cond - ! Don't specialize horizontal shifts at all, they're only for SIMD - { hlshift hrshift } diff + ! Don't specialize horizontal shifts or shuffles at all, they're only for SIMD + { hlshift hrshift vshuffle } diff nip ; :: specialize-vector-words ( array-type elt-type simd -- ) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 34b2c0bec6..c3f17ba6d5 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -41,6 +41,8 @@ $nl { $subsection vbitxor } { $subsection vlshift } { $subsection vrshift } +"Shuffling:" +{ $subsection vshuffle } "Inner product and norm:" { $subsection v. } { $subsection norm } @@ -231,6 +233,18 @@ HELP: hrshift { $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } } { $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; +HELP: vshuffle +{ $values { "u" "a SIMD array" } { "perm" "an array of integers" } { "v" "a SIMD array" } } +{ $description "Permutes the elements of a SIMD array. Duplicate entries are allowed in the permutation." } +{ $examples + { $example + "USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;" + "SIMD: int" + "int-4{ 69 42 911 13 } { 1 3 2 3 } vshuffle ." + "int-4{ 42 13 911 13 }" + } +} ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 5f9b7e395b..e3d4f1b342 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -77,7 +77,7 @@ PRIVATE> : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; -: vshuffle ( u perm -- v ) swap nths ; +: vshuffle ( u perm -- v ) swap [ nths ] keep like ; : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ;