diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index c9cbc71a8c..76dace1f28 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -175,7 +175,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] } { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] } - { math.vectors.simd.intrinsics:(simd-broadcast) [ emit-broadcast-vector ] } + { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 07ee55fc3c..51eced4e35 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -35,16 +35,23 @@ MACRO: if-literals-match ( quots -- ) : emit-vector-op ( node quot: ( rep -- ) -- ) { [ representation? ] } if-literals-match ; inline +: [binary] ( quot -- quot' ) + '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline + : emit-binary-vector-op ( node quot -- ) - '[ [ ds-drop 2inputs ] dip @ ds-push ] - emit-vector-op ; inline + [binary] emit-vector-op ; inline + +: [unary] ( quot -- quot' ) + '[ [ ds-drop ds-pop ] dip @ ds-push ] ; inline : emit-unary-vector-op ( node quot -- ) - '[ [ ds-drop ds-pop ] dip @ ds-push ] - emit-vector-op ; inline + [unary] emit-vector-op ; inline + +: [unary/param] ( quot -- quot' ) + '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline : emit-horizontal-shift ( node quot -- ) - '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] + [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline : emit-gather-vector-2 ( node -- ) @@ -67,24 +74,25 @@ MACRO: if-literals-match ( quots -- ) : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; : emit-shuffle-vector ( node -- ) - [ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ] - { [ shuffle? ] [ representation? ] } if-literals-match ; inline + [ ^^shuffle-vector ] [unary/param] + { [ shuffle? ] [ representation? ] } if-literals-match ; -: ^^broadcast-vector ( src rep -- dst ) - [ ^^scalar>vector ] keep - [ rep-components 0 ] keep +: ^^broadcast-vector ( src n rep -- dst ) + [ rep-components swap ] keep ^^shuffle-vector ; : emit-broadcast-vector ( node -- ) - [ ^^broadcast-vector ] emit-unary-vector-op ; + [ ^^broadcast-vector ] [unary/param] + { [ integer? ] [ representation? ] } if-literals-match ; + +: ^^with-vector ( src rep -- dst ) + [ ^^scalar>vector ] keep [ 0 ] dip ^^broadcast-vector ; : ^^select-vector ( src n rep -- dst ) - [ rep-components swap ] keep - [ ^^shuffle-vector ] keep - ^^vector>scalar ; + [ ^^broadcast-vector ] keep ^^vector>scalar ; : emit-select-vector ( node -- ) - [ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ] + [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline : emit-alien-vector ( node -- ) diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index f29b534deb..e2c2b15f2d 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -25,7 +25,7 @@ IN: compiler.tree.propagation.simd (simd-hlshift) (simd-hrshift) (simd-vshuffle) - (simd-broadcast) + (simd-with) (simd-gather-2) (simd-gather-4) alien-vector diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 89ab073845..5b72c544ae 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -1,12 +1,12 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types assocs byte-arrays classes effects fry +USING: accessors assocs byte-arrays classes effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.private math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture -namespaces arrays quotations combinators sets ; -QUALIFIED-WITH: math m +namespaces arrays quotations combinators sets layouts ; +QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd.functor ERROR: bad-length got expected ; @@ -14,10 +14,22 @@ ERROR: bad-length got expected ; MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; +: can-be-unboxed? ( type -- ? ) + { + { c:float [ t ] } + { c:double [ t ] } + [ c:heap-size cell < ] + } case ; + +: simd-boa-fast? ( rep -- ? ) + [ dup rep-gather-word supported-simd-op? ] + [ rep-component-type can-be-unboxed? ] + bi and ; + :: define-boa-custom-inlining ( word rep class -- ) word [ drop - rep rep rep-gather-word supported-simd-op? [ + rep simd-boa-fast? [ [ rep (simd-boa) class boa ] ] [ word def>> ] if ] "custom-inlining" set-word-prop ; @@ -25,25 +37,34 @@ MACRO: simd-boa ( rep class -- simd-array ) : simd-with ( rep class x -- simd-array ) [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline +: simd-with-fast? ( rep -- ? ) + [ \ (simd-vshuffle) supported-simd-op? ] + [ rep-component-type can-be-unboxed? ] + bi and ; + :: define-with-custom-inlining ( word rep class -- ) word [ drop - rep \ (simd-vshuffle) supported-simd-op? [ - [ rep rep-coerce rep (simd-broadcast) class boa ] + rep simd-with-fast? [ + [ rep rep-coerce rep (simd-with) class boa ] ] [ word def>> ] if ] "custom-inlining" set-word-prop ; +: simd-nth-fast? ( rep -- ? ) + [ \ (simd-vshuffle) supported-simd-op? ] + [ rep-component-type can-be-unboxed? ] + bi and ; + : simd-nth-fast ( rep -- quot ) [ rep-components ] keep '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index '[ swap >fixnum _ case ] ; : simd-nth-slow ( rep -- quot ) - rep-component-type dup c-type-getter-boxer array-accessor ; + rep-component-type dup c:c-type-getter-boxer c:array-accessor ; MACRO: simd-nth ( rep -- x ) - dup \ (simd-vshuffle) supported-simd-op? - [ simd-nth-fast ] [ simd-nth-slow ] if ; + dup simd-nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ; : boa-effect ( rep n -- effect ) [ rep-components ] dip * @@ -67,6 +88,7 @@ ERROR: bad-schema schema ; :: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. { + { vbroadcast [ swap nth ctor execute ] } { vneg [ [ dup vbitxor ] keep v- ] } { n+v [ [ ctor execute ] dip v+ ] } { v+n [ ctor execute v+ ] } @@ -83,12 +105,12 @@ ERROR: bad-schema schema ; ! To compute dot product and distance with integer vectors, we ! have to do things less efficiently, with integer overflow checks, ! in the general case. - elt-class m:float = [ { distance [ v- norm ] } suffix ] when ; + elt-class float = [ { distance [ v- norm ] } suffix ] when ; TUPLE: simd class elt-class ops wrappers ctor rep ; : define-simd ( simd -- ) - dup rep>> rep-component-type c-type-boxed-class >>elt-class + dup rep>> rep-component-type c:c-type-boxed-class >>elt-class { [ class>> ] [ elt-class>> ] @@ -99,7 +121,7 @@ TUPLE: simd class elt-class ops wrappers ctor rep ; specialize-vector-words ; :: define-simd-128-type ( class rep -- ) - + c: byte-array >>class class >>boxed-class [ rep alien-vector class boa ] >>getter @@ -107,7 +129,7 @@ TUPLE: simd class elt-class ops wrappers ctor rep ; 16 >>size 8 >>align rep >>rep - class typedef ; + class c:typedef ; : (define-simd-128) ( simd -- ) simd-ops get >>ops @@ -116,7 +138,7 @@ TUPLE: simd class elt-class ops wrappers ctor rep ; FUNCTOR: define-simd-128 ( T -- ) -N [ 16 T heap-size /i ] +N [ 16 T c:heap-size /i ] A DEFINES-CLASS ${T}-${N} A-boa DEFINES ${A}-boa @@ -125,7 +147,7 @@ A-cast DEFINES ${A}-cast >A DEFINES >${A} A{ DEFINES ${A}{ -SET-NTH [ T dup c-setter array-accessor ] +SET-NTH [ T dup c:c-setter c:array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op @@ -161,7 +183,7 @@ M: A new-sequence M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; -M: A byte-length underlying>> length ; inline +M: A c:byte-length underlying>> length ; inline M: A element-type drop A-rep rep-component-type ; @@ -229,7 +251,7 @@ SLOT: underlying1 SLOT: underlying2 :: define-simd-256-type ( class rep -- ) - + c: class >>class class >>boxed-class [ @@ -245,7 +267,7 @@ SLOT: underlying2 32 >>size 8 >>align rep >>rep - class typedef ; + class c:typedef ; : (define-simd-256) ( simd -- ) simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops @@ -254,7 +276,7 @@ SLOT: underlying2 FUNCTOR: define-simd-256 ( T -- ) -N [ 32 T heap-size /i ] +N [ 32 T c:heap-size /i ] N/2 [ N 2 / ] A/2 IS ${T}-${N/2} @@ -311,7 +333,7 @@ M: A new-sequence M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; -M: A byte-length drop 32 ; inline +M: A c:byte-length drop 32 ; inline M: A element-type drop A-rep rep-component-type ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 522488d804..6008a20844 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -50,7 +50,7 @@ SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle -: (simd-broadcast) ( x rep -- v ) bad-simd-call ; +: (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; : (simd-select) ( v n rep -- x ) bad-simd-call ; @@ -103,29 +103,29 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { - { \ (simd-v+) [ %add-vector-reps ] } - { \ (simd-vs+) [ %saturated-add-vector-reps ] } - { \ (simd-v+-) [ %add-sub-vector-reps ] } - { \ (simd-v-) [ %sub-vector-reps ] } - { \ (simd-vs-) [ %saturated-sub-vector-reps ] } - { \ (simd-v*) [ %mul-vector-reps ] } - { \ (simd-vs*) [ %saturated-mul-vector-reps ] } - { \ (simd-v/) [ %div-vector-reps ] } - { \ (simd-vmin) [ %min-vector-reps ] } - { \ (simd-vmax) [ %max-vector-reps ] } - { \ (simd-v.) [ %dot-vector-reps ] } - { \ (simd-vsqrt) [ %sqrt-vector-reps ] } - { \ (simd-sum) [ %horizontal-add-vector-reps ] } - { \ (simd-vabs) [ %abs-vector-reps ] } - { \ (simd-vbitand) [ %and-vector-reps ] } - { \ (simd-vbitandn) [ %andn-vector-reps ] } - { \ (simd-vbitor) [ %or-vector-reps ] } - { \ (simd-vbitxor) [ %xor-vector-reps ] } - { \ (simd-vlshift) [ %shl-vector-reps ] } - { \ (simd-vrshift) [ %shr-vector-reps ] } - { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } - { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ %shuffle-vector-reps ] } - { \ (simd-gather-2) [ %gather-vector-2-reps ] } - { \ (simd-gather-4) [ %gather-vector-4-reps ] } + { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } + { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } + { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } + { \ (simd-v/) [ %div-vector-reps ] } + { \ (simd-vmin) [ %min-vector-reps ] } + { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-v.) [ %dot-vector-reps ] } + { \ (simd-vsqrt) [ %sqrt-vector-reps ] } + { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-vabs) [ %abs-vector-reps ] } + { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitandn) [ %andn-vector-reps ] } + { \ (simd-vbitor) [ %or-vector-reps ] } + { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-vlshift) [ %shl-vector-reps ] } + { \ (simd-vrshift) [ %shr-vector-reps ] } + { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } + { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } + { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-gather-2) [ %gather-vector-2-reps ] } + { \ (simd-gather-4) [ %gather-vector-4-reps ] } } case member? ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 3f43a21c10..83d29ac308 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -6,7 +6,7 @@ tools.test vocabs assocs compiler.cfg.debugger words locals math.vectors.specialization combinators cpu.architecture math.vectors.simd.intrinsics namespaces byte-arrays alien specialized-arrays classes.struct eval classes.algebra sets -quotations ; +quotations math.constants ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAY: c:float SIMD: c:char @@ -124,6 +124,10 @@ CONSTANT: simd-classes ] [ = ] check-optimizer ] unit-test +[ HEX: ffffffff ] [ HEX: ffffffff uint-4-with first ] unit-test + +[ HEX: ffffffff ] [ HEX: ffffffff [ uint-4-with ] compile-call first ] unit-test + "== Checking -boa constructors" print [ { } ] [ @@ -133,6 +137,8 @@ CONSTANT: simd-classes ] [ = ] check-optimizer ] unit-test +[ HEX: ffffffff ] [ HEX: ffffffff 2 3 4 [ uint-4-boa ] compile-call first ] unit-test + "== Checking vector operations" print : random-vector ( class -- vec ) @@ -155,7 +161,7 @@ CONSTANT: simd-classes : remove-special-words ( alist -- alist' ) ! These have their own tests later - { hlshift hrshift vshuffle } unique assoc-diff ; + { hlshift hrshift vshuffle vbroadcast } unique assoc-diff ; : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip @@ -263,6 +269,9 @@ simd-classes [ [ { } ] [ int-4{ HEX: 7fffffff 3 4 -8 } test-accesses ] unit-test [ { } ] [ uint-4{ HEX: ffffffff 2 3 4 } test-accesses ] unit-test +[ HEX: 7fffffff ] [ int-4{ HEX: 7fffffff 3 4 -8 } first ] unit-test +[ HEX: ffffffff ] [ uint-4{ HEX: ffffffff 2 3 4 } first ] unit-test + [ { } ] [ double-2{ 1.0 2.0 } test-accesses ] unit-test [ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test [ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test @@ -275,6 +284,27 @@ simd-classes [ [ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test [ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test +"== Checking broadcast" print +: test-broadcast ( seq -- failures ) + [ length >array ] keep + '[ [ _ 1quotation ] dip '[ _ vbroadcast ] ] [ = ] check-optimizer ; inline + +[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test +[ { } ] [ int-4{ HEX: 7fffffff 3 4 -8 } test-broadcast ] unit-test +[ { } ] [ uint-4{ HEX: ffffffff 2 3 4 } test-broadcast ] unit-test + +[ { } ] [ double-2{ 1.0 2.0 } test-broadcast ] unit-test +[ { } ] [ longlong-2{ 1 2 } test-broadcast ] unit-test +[ { } ] [ ulonglong-2{ 1 2 } test-broadcast ] unit-test + +[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-broadcast ] unit-test +[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test +[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test + +[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test +[ { } ] [ longlong-4{ 1 2 3 4 } test-broadcast ] unit-test +[ { } ] [ ulonglong-4{ 1 2 3 4 } test-broadcast ] unit-test + "== Checking alien operations" print [ float-4{ 1 2 3 4 } ] [ @@ -344,8 +374,25 @@ STRUCT: simd-struct [ ] [ char-16 new 1array stack. ] unit-test -! Other regressions +! CSSA bug [ 8000000 ] [ int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 } [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call ] unit-test + +! Coalescing was too aggressive +:: broken ( axis theta -- a b c ) + axis { float-4 } declare drop + theta { float } declare drop + + theta cos float-4-with :> cc + theta sin float-4-with :> ss + + axis cc v+ :> diagonal + + diagonal cc ss ; inline + +[ t ] [ + float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ] + [ compile-call ] [ call ] 3bi = +] unit-test diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 333e787086..ea9947a0c5 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -91,6 +91,7 @@ H{ { hlshift { +vector+ +literal+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } + { vbroadcast { +vector+ +literal+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index c3f17ba6d5..048e084188 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -233,6 +233,18 @@ HELP: hrshift { $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } } { $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; +HELP: vbroadcast +{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "v" "a SIMD array" } } +{ $description "Outputs a new SIMD array of the same type as " { $snippet "u" } " where every element is equal to the " { $snippet "n" } "th element of " { $snippet "u" } "." } +{ $examples + { $example + "USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;" + "SIMD: int" + "int-4{ 69 42 911 13 } 2 vbroadcast ." + "int-4{ 911 911 911 911 }" + } +} ; + HELP: vshuffle { $values { "u" "a SIMD array" } { "perm" "an array of integers" } { "v" "a SIMD array" } } { $description "Permutes the elements of a SIMD array. Duplicate entries are allowed in the permutation." } diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index bac7183c6d..a3d51752bd 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays alien.c-types kernel sequences math math.functions -hints math.order math.libm fry combinators byte-arrays accessors ; +hints math.order math.libm fry combinators byte-arrays accessors +locals ; QUALIFIED-WITH: alien.c-types c IN: math.vectors @@ -77,7 +78,8 @@ PRIVATE> : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; -: vshuffle ( u perm -- v ) swap [ nths ] keep like ; +:: vbroadcast ( u n -- v ) u length n u nth u like ; +: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ; : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ;