diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 9c4447e654..3b6674efee 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } - { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 3f7530caca..e608cf999c 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien byte-arrays fry cpu.architecture kernel math -sequences math.vectors math.vectors.simd.intrinsics macros -generalizations combinators combinators.short-circuit arrays locals +USING: accessors alien byte-arrays fry classes.algebra +cpu.architecture kernel math sequences math.vectors +math.vectors.simd.intrinsics macros generalizations combinators +combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats @@ -75,46 +76,43 @@ MACRO: if-literals-match ( quots -- ) ds-push ] emit-vector-op ; -: variable-shuffle? ( obj -- ? ) - ! the vshuffle intrinsic current doesn't allow variable shuffles - drop f ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; -: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; - -: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ; - -: (>variable-shuffle) ( shuffle rep -- shuffle ) +: >variable-shuffle ( shuffle rep -- shuffle' ) rep-component-type heap-size [ dup >byte-array ] [ iota >byte-array ] bi '[ _ n*v _ v+ ] map concat ; -: >variable-shuffle ( shuffle rep -- shuffle' ) - over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ; - -: generate-shuffle-vector-imm? ( shuffle rep -- ? ) - { - [ drop immediate-shuffle? ] - [ nip %shuffle-vector-imm-reps member? ] - } 2&& ; - -: generate-shuffle-vector ( src shuffle rep -- dst ) - 2dup generate-shuffle-vector-imm? +: generate-shuffle-vector-imm ( src shuffle rep -- dst ) + dup %shuffle-vector-imm-reps member? [ ^^shuffle-vector-imm ] [ [ >variable-shuffle ^^load-constant ] keep ^^shuffle-vector ] if ; -: emit-shuffle-vector ( node -- ) +: emit-shuffle-vector-imm ( node -- ) ! Pad the permutation with zeroes if it's too short, since we ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param] + [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param] { [ shuffle? ] [ representation? ] } if-literals-match ; +: emit-shuffle-vector-var ( node -- ) + [ ^^shuffle-vector ] [binary] + { [ %shuffle-vector-reps member? ] } if-literals-match ; + +: emit-shuffle-vector ( node -- ) + dup node-input-infos { + [ length 3 = ] + [ first class>> byte-array class<= ] + [ second class>> byte-array class<= ] + [ third literal>> representation? ] + } 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ; + : ^^broadcast-vector ( src n rep -- dst ) [ rep-components swap ] keep - generate-shuffle-vector ; + generate-shuffle-vector-imm ; : emit-broadcast-vector ( node -- ) [ ^^broadcast-vector ] [unary/param] diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1909a83488..1637148b88 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd (simd-vrshift) (simd-hlshift) (simd-hrshift) - (simd-vshuffle) + (simd-vshuffle-bytes) + (simd-vshuffle-elements) (simd-(vmerge-head)) (simd-(vmerge-tail)) (simd-(v>float)) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 7f28f644e1..2ddaf2b8a5 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline : simd-with/nth-fast? ( rep -- ? ) - [ \ (simd-vshuffle) supported-simd-op? ] + [ \ (simd-vshuffle-elements) supported-simd-op? ] [ rep-component-type can-be-unboxed? ] bi and ; @@ -184,6 +184,8 @@ WHERE TUPLE: A { underlying byte-array read-only initial: $[ 16 ] } ; +INSTANCE: A simd-128 + M: A clone underlying>> clone \ A boa ; inline M: A length drop N ; inline @@ -315,7 +317,7 @@ SLOT: underlying2 class c:typedef ; : (define-simd-256) ( simd -- ) - simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops + simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops [ define-simd ] [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ; @@ -362,6 +364,8 @@ TUPLE: A { underlying1 byte-array initial: $[ 16 ] read-only } { underlying2 byte-array initial: $[ 16 ] read-only } ; +INSTANCE: A simd-256 + M: A clone [ underlying1>> clone ] [ underlying2>> clone ] bi \ A boa ; inline diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index deb92c2944..fab55949b4 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -67,7 +67,8 @@ SIMD-OP: vlshift SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift -SIMD-OP: vshuffle +SIMD-OP: vshuffle-elements +SIMD-OP: vshuffle-bytes SIMD-OP: (vmerge-head) SIMD-OP: (vmerge-tail) SIMD-OP: v<= @@ -148,7 +149,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) union { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; -: (%shuffle-reps) ( -- reps ) +: (%shuffle-imm-reps) ( -- reps ) %shuffle-vector-reps %shuffle-vector-imm-reps union ; M: vector-rep supported-simd-op? @@ -182,7 +183,8 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ (%shuffle-reps) ] } + { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] } + { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 9e999ba9b7..460059809e 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -174,7 +174,7 @@ CONSTANT: simd-classes : remove-special-words ( alist -- alist' ) ! These have their own tests later { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast vany? vall? vnone? (v>float) (v>integer) (vpack-signed) (vpack-unsigned) @@ -360,6 +360,23 @@ simd-classes [ ] unit-test ] each +"== Checking variable shuffles" print + +: random-shift-vector ( class -- vec ) + new [ drop 16 random ] map ; + +:: test-shift-vector ( class -- ? ) + class random-int-vector :> src + char-16 random-shift-vector :> perm + { class char-16 } :> decl + + src perm vshuffle + src perm [ decl declare vshuffle ] compile-call + = ; inline + +{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 } +[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each + "== Checking vector tests" print :: test-vector-tests-bool ( vector declaration -- none? any? all? ) diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 28c3ee82c3..e51d8c4553 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -98,7 +98,8 @@ H{ { vrshift { +vector+ +scalar+ -> +vector+ } } { hlshift { +vector+ +literal+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } } - { vshuffle { +vector+ +literal+ -> +vector+ } } + { vshuffle-elements { +vector+ +literal+ -> +vector+ } } + { vshuffle-bytes { +vector+ +vector+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } { (vmerge-head) { +vector+ +vector+ -> +vector+ } } { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } @@ -162,7 +163,7 @@ ERROR: bad-vector-word word ; } cond ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast (v>integer) (v>float) (vpack-signed) (vpack-unsigned) (vunpack-head) (vunpack-tail) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index c65009950d..4cb03af44c 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -6,6 +6,9 @@ locals ; QUALIFIED-WITH: alien.c-types c IN: math.vectors +MIXIN: simd-128 +MIXIN: simd-256 + GENERIC: element-type ( obj -- c-type ) M: object element-type drop f ; inline @@ -83,7 +86,20 @@ PRIVATE> : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; :: vbroadcast ( u n -- v ) u length n u nth u like ; -: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-elements ( u perm -- v ) + swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-bytes ( u perm -- v ) + underlying>> [ + swap [ '[ _ nth ] ] keep map-as + ] curry change-underlying ; + +GENERIC: vshuffle ( u perm -- v ) +M: array vshuffle ( u perm -- v ) + vshuffle-elements ; inline +M: simd-128 vshuffle ( u perm -- v ) + vshuffle-bytes ; inline : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ; diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index a3d24c10c2..c5de95b5b5 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer -libc math math.vectors math.vectors.specialization namespaces +libc math math.vectors math.vectors.private +math.vectors.specialization namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader vocabs.parser vocabs.generated words fry combinators present ; @@ -68,6 +69,8 @@ TUPLE: A [ drop \ T bad-byte-array-length ] unless ; inline +M: A new-underlying drop byte-array>A ; + M: A clone [ underlying>> clone ] [ length>> ] bi ; inline M: A length length>> ; inline