math.vectors.simd: add vshuffle intrinsic

db4
Slava Pestov 2009-09-28 23:12:13 -05:00
parent 7065dd09df
commit e40a95c1e1
9 changed files with 186 additions and 38 deletions

View File

@ -179,7 +179,7 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
{ math.vectors.simd.intrinsics:(simd-vselect) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }

View File

@ -1,32 +1,51 @@
! Copyright (C) 2009 Slava Pestov. ! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel math USING: accessors byte-arrays fry cpu.architecture kernel math
sequences compiler.tree.propagation.info sequences macros generalizations combinators
combinators.short-circuit arrays compiler.tree.propagation.info
compiler.cfg.builder.blocks compiler.cfg.stacks compiler.cfg.builder.blocks compiler.cfg.stacks
compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien ; compiler.cfg.intrinsics.alien ;
IN: compiler.cfg.intrinsics.simd IN: compiler.cfg.intrinsics.simd
MACRO: check-elements ( quots -- )
[ length '[ _ firstn ] ]
[ '[ _ spread ] ]
[ length 1 - \ and <repetition> [ ] like ]
tri 3append ;
MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri
! n quots n n
'[
! node quot
[
dup node-input-infos
_ tail-slice* [ literal>> ] map
dup _ check-elements
] dip
swap [
! node literals quot
[ _ firstn ] dip call
drop
] [ 2drop emit-primitive ] if
] ;
: emit-vector-op ( node quot: ( rep -- ) -- ) : emit-vector-op ( node quot: ( rep -- ) -- )
[ dup node-input-infos last literal>> dup representation? ] dip { [ representation? ] } if-literals-match ; inline
'[ nip @ ] [ drop emit-primitive ] if ; inline
: emit-binary-vector-op ( node quot -- ) : emit-binary-vector-op ( node quot -- )
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline '[ [ ds-drop 2inputs ] dip @ ds-push ]
emit-vector-op ; inline
: emit-unary-vector-op ( node quot -- ) : emit-unary-vector-op ( node quot -- )
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline '[ [ ds-drop ds-pop ] dip @ ds-push ]
emit-vector-op ; inline
: emit-horizontal-shift ( node quot -- ) : emit-horizontal-shift ( node quot -- )
[ '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ]
dup node-input-infos { [ integer? ] [ representation? ] } if-literals-match ; inline
[ second literal>> ] [ third literal>> ] bi
2dup [ integer? ] [ representation? ] bi* and
] dip
'[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ]
[ 2drop emit-primitive ]
if ; inline
: emit-gather-vector-2 ( node -- ) : emit-gather-vector-2 ( node -- )
[ ^^gather-vector-2 ] emit-binary-vector-op ; [ ^^gather-vector-2 ] emit-binary-vector-op ;
@ -45,12 +64,15 @@ IN: compiler.cfg.intrinsics.simd
ds-push ds-push
] emit-vector-op ; ] emit-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: emit-shuffle-vector ( node -- ) : emit-shuffle-vector ( node -- )
; [ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ]
{ [ shuffle? ] [ representation? ] } if-literals-match ; inline
: emit-select-vector ( node -- ) : emit-select-vector ( node -- )
[ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ]
; { [ integer? ] [ representation? ] } if-literals-match ; inline
: emit-alien-vector ( node -- ) : emit-alien-vector ( node -- )
dup [ dup [

View File

@ -24,22 +24,27 @@ IN: compiler.tree.propagation.simd
(simd-vrshift) (simd-vrshift)
(simd-hlshift) (simd-hlshift)
(simd-hrshift) (simd-hrshift)
(simd-vshuffle)
(simd-broadcast) (simd-broadcast)
(simd-gather-2) (simd-gather-2)
(simd-gather-4) (simd-gather-4)
(simd-select)
alien-vector alien-vector
} [ { byte-array } "default-output-classes" set-word-prop ] each } [ { byte-array } "default-output-classes" set-word-prop ] each
\ (simd-sum) [ : scalar-output-class ( rep -- class )
nip dup literal?>> [ dup literal?>> [
literal>> scalar-rep-of { literal>> scalar-rep-of {
{ float-rep [ float ] } { float-rep [ float ] }
{ double-rep [ float ] } { double-rep [ float ] }
[ integer ] [ drop integer ]
} case } case
] [ drop real ] if ] [ drop real ] if
<class-info> <class-info> ;
] "outputs" set-word-prop
\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
\ assert-positive [ \ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect real [0,inf] <class/interval-info> value-info-intersect

View File

@ -602,8 +602,8 @@ M: x86 %zero-vector-reps
M:: x86 %broadcast-vector ( dst src rep -- ) M:: x86 %broadcast-vector ( dst src rep -- )
rep unsign-rep { rep unsign-rep {
{ float-4-rep [ { float-4-rep [
dst src float-4-rep %copy dst src float-4-rep %copy
dst dst { 0 0 0 0 } SHUFPS dst dst { 0 0 0 0 } SHUFPS
] } ] }
{ double-2-rep [ { double-2-rep [
@ -677,7 +677,52 @@ M: x86 %gather-vector-2-reps
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } } { sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %shuffle-vector-reps { } ; : double-2-shuffle ( dst shuffle -- )
{
{ { 0 1 } [ drop ] }
{ { 0 0 } [ dup UNPCKLPD ] }
{ { 1 1 } [ dup UNPCKHPD ] }
[ dupd SHUFPD ]
} case ;
: float-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
{ { 0 1 0 1 } [ dup MOVLHPS ] }
{ { 2 3 2 3 } [ dup MOVHLPS ] }
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
{ { 2 2 3 3 } [ dup UNPCKHPS ] }
[ dupd SHUFPS ]
} case ;
: int-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
{ { 0 0 1 1 } [ dup PUNPCKLDQ ] }
{ { 2 2 3 3 } [ dup PUNPCKHDQ ] }
{ { 0 1 0 1 } [ dup PUNPCKLQDQ ] }
{ { 2 3 2 3 } [ dup PUNPCKHQDQ ] }
[ dupd PSHUFD ]
} case ;
: longlong-2-shuffle ( dst shuffle -- )
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
dst src rep %copy
dst shuffle rep unsign-rep {
{ double-2-rep [ double-2-shuffle ] }
{ float-4-rep [ float-4-shuffle ] }
{ int-4-rep [ int-4-shuffle ] }
{ longlong-2-rep [ longlong-2-shuffle ] }
} case ;
M: x86 %shuffle-vector-reps
{
{ sse2? { double-2-rep float-4-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %select-vector-reps { } ; M: x86 %select-vector-reps { } ;

View File

@ -73,13 +73,15 @@ ERROR: bad-schema schema ;
! in the general case. ! in the general case.
elt-class m:float = [ { distance [ v- norm ] } suffix ] when ; elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
:: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- ) :: simd-vector-words ( class ctor rep vv->v vn->v vv->n v->v v->n -- )
rep rep-component-type c-type-boxed-class :> elt-class rep rep-component-type c-type-boxed-class :> elt-class
class class
elt-class elt-class
{ {
{ { +vector+ +vector+ -> +vector+ } vv->v } { { +vector+ +vector+ -> +vector+ } vv->v }
{ { +vector+ +scalar+ -> +vector+ } vn->v } { { +vector+ +scalar+ -> +vector+ } vn->v }
{ { +vector+ +literal+ -> +vector+ } vn->v }
{ { +vector+ +vector+ -> +scalar+ } vv->n }
{ { +vector+ -> +vector+ } v->v } { { +vector+ -> +vector+ } v->v }
{ { +vector+ -> +scalar+ } v->n } { { +vector+ -> +scalar+ } v->n }
{ { +vector+ -> +nonnegative+ } v->n } { { +vector+ -> +nonnegative+ } v->n }
@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ]
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -181,13 +184,16 @@ INSTANCE: A sequence
: A-vn->v-op ( v1 v2 quot -- v3 ) : A-vn->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
: A-vv->n-op ( v1 v2 quot -- n )
[ [ underlying>> ] bi@ A-rep ] dip call ; inline
: A-v->v-op ( v1 quot -- v2 ) : A-v->v-op ( v1 quot -- v2 )
[ underlying>> A-rep ] dip call \ A boa ; inline [ underlying>> A-rep ] dip call \ A boa ; inline
: A-v->n-op ( v quot -- n ) : A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline [ underlying>> A-rep ] dip call ; inline
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-128-type \ A \ A-rep define-simd-128-type
PRIVATE> PRIVATE>
@ -238,6 +244,7 @@ A-deref DEFINES-PRIVATE ${A}-deref
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -312,6 +319,11 @@ INSTANCE: A sequence
[ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
\ A boa ; inline \ A boa ; inline
: A-vv->n-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
+ ; inline
: A-v->v-op ( v1 combine-quot -- v2 ) : A-v->v-op ( v1 combine-quot -- v2 )
[ [ underlying1>> A-rep ] dip call ] [ [ underlying1>> A-rep ] dip call ]
[ [ underlying2>> A-rep ] dip call ] 2bi [ [ underlying2>> A-rep ] dip call ] 2bi
@ -320,7 +332,7 @@ INSTANCE: A sequence
: A-v->n-op ( v1 combine-quot -- v2 ) : A-v->n-op ( v1 combine-quot -- v2 )
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-256-type \ A \ A-rep define-simd-256-type
;FUNCTOR ;FUNCTOR

View File

@ -148,13 +148,14 @@ CONSTANT: simd-classes
: remove-integer-words ( alist -- alist' ) : remove-integer-words ( alist -- alist' )
[ drop { vlshift vrshift } member? not ] assoc-filter ; [ drop { vlshift vrshift } member? not ] assoc-filter ;
: remove-horizontal-shifts ( alist -- alist' ) : remove-special-words ( alist -- alist' )
[ drop { hlshift hrshift } member? not ] assoc-filter ; ! These have their own tests later
[ drop { hlshift hrshift vshuffle } member? not ] assoc-filter ;
: ops-to-check ( elt-class -- alist ) : ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip [ vector-words >alist ] dip
float = [ remove-integer-words ] [ remove-float-words ] if float = [ remove-integer-words ] [ remove-float-words ] if
remove-horizontal-shifts ; remove-special-words ;
: check-vector-ops ( class elt-class compare-quot -- ) : check-vector-ops ( class elt-class compare-quot -- )
[ [
@ -271,3 +272,47 @@ STRUCT: simd-struct
[ int-4{ 1 2 4 8 } ] [ int-4{ 1 2 4 8 } ]
[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test [ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
! Shuffles
: test-shuffle ( input shuffle -- failures )
[ dup class 1array ] dip
'[ _ declare _ vshuffle ]
[ call ] [ compile-call ] 2bi = not ; inline
: shuffles-for ( seq -- shuffles )
length {
{ 2 [
{
{ 0 1 }
{ 1 1 }
{ 1 0 }
{ 0 0 }
}
] }
{ 4 [
{
{ 1 2 3 0 }
{ 0 1 2 3 }
{ 1 1 2 2 }
{ 0 0 1 1 }
{ 2 2 3 3 }
{ 0 1 0 1 }
{ 2 3 2 3 }
{ 0 0 2 2 }
{ 1 1 3 3 }
{ 0 1 0 1 }
{ 2 2 3 3 }
}
] }
} case ;
: test-shuffles ( input -- failures )
dup shuffles-for [ test-shuffle ] with filter ; inline
[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-shuffles ] unit-test
[ { } ] [ int-4{ 1 2 3 4 } test-shuffles ] unit-test
[ { } ] [ uint-4{ 1 2 3 4 } test-shuffles ] unit-test
[ { } ] [ double-2{ 1.0 2.0 } test-shuffles ] unit-test
[ { } ] [ longlong-2{ 1 2 } test-shuffles ] unit-test
[ { } ] [ ulonglong-2{ 1 2 } test-shuffles ] unit-test

View File

@ -6,7 +6,7 @@ namespaces assocs fry splitting classes.algebra generalizations
locals compiler.tree.propagation.info ; locals compiler.tree.propagation.info ;
IN: math.vectors.specialization IN: math.vectors.specialization
SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ; SYMBOLS: -> +vector+ +scalar+ +nonnegative+ +literal+ ;
: signature-for-schema ( array-type elt-type schema -- signature ) : signature-for-schema ( array-type elt-type schema -- signature )
[ [
@ -14,6 +14,7 @@ SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ;
{ +vector+ [ drop ] } { +vector+ [ drop ] }
{ +scalar+ [ nip ] } { +scalar+ [ nip ] }
{ +nonnegative+ [ nip ] } { +nonnegative+ [ nip ] }
{ +literal+ [ 2drop object ] }
} case } case
] with with map ; ] with with map ;
@ -87,8 +88,9 @@ H{
{ vbitxor { +vector+ +vector+ -> +vector+ } } { vbitxor { +vector+ +vector+ -> +vector+ } }
{ vlshift { +vector+ +scalar+ -> +vector+ } } { vlshift { +vector+ +scalar+ -> +vector+ } }
{ vrshift { +vector+ +scalar+ -> +vector+ } } { vrshift { +vector+ +scalar+ -> +vector+ } }
{ hlshift { +vector+ +scalar+ -> +vector+ } } { hlshift { +vector+ +literal+ -> +vector+ } }
{ hrshift { +vector+ +scalar+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } }
{ vshuffle { +vector+ +literal+ -> +vector+ } }
} }
PREDICATE: vector-word < word vector-words key? ; PREDICATE: vector-word < word vector-words key? ;
@ -102,7 +104,10 @@ M: vector-word subwords specializations values [ word? ] filter ;
: add-specialization ( new-word signature word -- ) : add-specialization ( new-word signature word -- )
specializations set-at ; specializations set-at ;
: word-schema ( word -- schema ) vector-words at ; ERROR: bad-vector-word word ;
: word-schema ( word -- schema )
vector-words ?at [ bad-vector-word ] unless ;
: inputs ( schema -- seq ) { -> } split first ; : inputs ( schema -- seq ) { -> } split first ;
@ -129,8 +134,8 @@ M: vector-word subwords specializations values [ word? ] filter ;
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] } { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
[ { } ] [ { } ]
} cond } cond
! Don't specialize horizontal shifts at all, they're only for SIMD ! Don't specialize horizontal shifts or shuffles at all, they're only for SIMD
{ hlshift hrshift } diff { hlshift hrshift vshuffle } diff
nip ; nip ;
:: specialize-vector-words ( array-type elt-type simd -- ) :: specialize-vector-words ( array-type elt-type simd -- )

View File

@ -41,6 +41,8 @@ $nl
{ $subsection vbitxor } { $subsection vbitxor }
{ $subsection vlshift } { $subsection vlshift }
{ $subsection vrshift } { $subsection vrshift }
"Shuffling:"
{ $subsection vshuffle }
"Inner product and norm:" "Inner product and norm:"
{ $subsection v. } { $subsection v. }
{ $subsection norm } { $subsection norm }
@ -231,6 +233,18 @@ HELP: hrshift
{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } } { $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; { $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
HELP: vshuffle
{ $values { "u" "a SIMD array" } { "perm" "an array of integers" } { "v" "a SIMD array" } }
{ $description "Permutes the elements of a SIMD array. Duplicate entries are allowed in the permutation." }
{ $examples
{ $example
"USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
"SIMD: int"
"int-4{ 69 42 911 13 } { 1 3 2 3 } vshuffle ."
"int-4{ 42 13 911 13 }"
}
} ;
HELP: norm-sq HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ; { $description "Computes the squared length of a mathematical vector." } ;

View File

@ -77,7 +77,7 @@ PRIVATE>
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
: vshuffle ( u perm -- v ) swap nths ; : vshuffle ( u perm -- v ) swap [ nths ] keep like ;
: vlshift ( u n -- w ) '[ _ shift ] map ; : vlshift ( u n -- w ) '[ _ shift ] map ;
: vrshift ( u n -- w ) neg '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ;