backend fixups

db4
Joe Groff 2009-11-14 20:59:03 -06:00
parent e323071c44
commit 8a8699ac98
5 changed files with 199 additions and 188 deletions

View File

@ -1,5 +1,10 @@
! (c)2009 Joe Groff bsd license
USING: accessors fry generalizations kernel locals math sequences
USING: accessors arrays classes combinators
combinators.short-circuit compiler.cfg.builder.blocks
compiler.cfg.registers compiler.cfg.stacks
compiler.cfg.stacks.local compiler.tree.propagation.info
cpu.architecture effects fry generalizations help.lint.checks
kernel locals macros math namespaces quotations sequences
splitting words ;
IN: compiler.cfg.intrinsics.simd.backend
@ -8,55 +13,51 @@ IN: compiler.cfg.intrinsics.simd.backend
: can-has? ( quot -- ? )
[ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
GENERIC: create-can-has-word ( word -- word' )
GENERIC: create-can-has ( word -- word' )
PREDICATE: vector-op-word
PREDICATE: vector-op-word < word
{
[ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ]
[ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ]
[ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
} 1&& ;
: reps-word ( word -- word' )
name>> "^^" ?head drop "##" ?head drop
"%" "-reps" surround "cpu.architecture" lookup ;
:: can-has-^^-quot ( word def effect -- def' )
:: can-has-^^-quot ( word def effect -- quot )
effect in>> { "rep" } split1 [ length ] bi@ 1 +
word reps-word
effect out>> length f <array> >quotation
'[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ;
:: can-has-^-quot ( word def effect -- def' )
:: can-has-^-quot ( word def effect -- quot )
def create-can-has ;
M: object create-can-has ;
M: object create-can-has 1quotation ;
M: sequence create-can-has
[ create-can-has-word ] map ;
M: array create-can-has
[ create-can-has ] map concat ;
M: callable create-can-has
[ create-can-has ] map concat ;
: (create-can-has-word) ( word -- word' created? )
name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend"
2dup lookup
[ 2nip f ] [ create t ] if* ;
: (can-has-word) ( word -- word' )
name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
: (create-can-has-quot) ( word -- def effect )
[ ] [ def>> ] [ stack-effect ] tri [
{
{ [ pick "^^" head? ] [ can-has-^^-quot ] }
{ [ pick "##" head? ] [ can-has-^^-quot ] }
{ [ pick "^" head? ] [ can-has-^-quot ] }
} cond
] keep ;
: (can-has-quot) ( word -- quot )
[ ] [ def>> ] [ stack-effect ] tri {
{ [ pick name>> "^^" head? ] [ can-has-^^-quot ] }
{ [ pick name>> "##" head? ] [ can-has-^^-quot ] }
{ [ pick name>> "^" head? ] [ can-has-^-quot ] }
} cond ;
M: vector-op-word create-can-has
[ (create-can-has-word) ] keep
'[ _ (create-can-has-quot) define-declared ]
[ nip ] if ;
dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
M:: callable >can-has-cond
M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
#dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ;
M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
pair first2 :> ( class quot )
#pick class #dup quot create-can-has
@ -113,7 +114,7 @@ CONSTANT: [quaternary]
-4 inc-d
]
:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ;
:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot )
params-quot trials op-quot literal-preds
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
@ -126,10 +127,11 @@ MACRO: emit-vv-vector-op ( trials -- )
MACRO: emit-vvvv-vector-op ( trials -- )
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- )
literal-pred trials literal-pred trials
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- )
literal-pred imm-trials literal-pred var-trials
'[
dup node-input-infos 2 tail-slice* first literal>> @
[ _ _ emit-vl-vector-op ]
[ _ emit-vv-vector-op ] if
] ;

View File

@ -1,18 +1,20 @@
! Copyright (C) 2009 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien byte-arrays fry classes.algebra
cpu.architecture kernel math sequences math.vectors
math.vectors.simd macros generalizations combinators
combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks
USING: accessors alien alien.c-types byte-arrays fry
classes.algebra cpu.architecture kernel math sequences
math.vectors math.vectors.simd math.vectors.simd.private
macros generalizations combinators combinators.short-circuit
arrays locals compiler.tree.propagation.info
compiler.cfg.builder.blocks
compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics
compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.simd.backend
specialized-arrays ;
FROM: alien.c-types => heap-size char short int longlong float double ;
SPECIALIZED-ARRAYS: char short int longlong float double ;
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
IN: compiler.cfg.intrinsics.simd
! compound vector ops
@ -69,8 +71,14 @@ IN: compiler.cfg.intrinsics.simd
mask false rep ^^andn-vector
rep ^^or-vector ;
: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
order-cc {
: ^not-vector ( src rep -- dst )
{
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
} v-vector-op ;
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
@ -96,7 +104,7 @@ IN: compiler.cfg.intrinsics.simd
[ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
reduce
not? [ rep generate-not-vector ] when
not? [ rep ^not-vector ] when
] if ;
: ^compare-vector ( src1 src2 rep cc -- dst )
@ -118,7 +126,7 @@ IN: compiler.cfg.intrinsics.simd
{ signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-head :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-rep ^shr-vector-imm
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero
@ -135,7 +143,7 @@ IN: compiler.cfg.intrinsics.simd
{ signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-tail :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep ^widened-shr-vector-imm
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero
@ -144,7 +152,7 @@ IN: compiler.cfg.intrinsics.simd
] }
} v-vector-op ;
: ^(sum-2) ( src rep -- dst )
: ^(sum-vector-2) ( src rep -- dst )
{
[ dupd ^^horizontal-add-vector ]
[| src rep |
@ -154,7 +162,7 @@ IN: compiler.cfg.intrinsics.simd
]
} v-vector-op ;
: ^(sum-4) ( src rep -- dst )
: ^(sum-vector-4) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
@ -165,14 +173,14 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-rep :> rep'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-8) ( src rep -- dst )
: ^(sum-vector-8) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
@ -184,19 +192,19 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-rep :> rep'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-rep :> rep''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-16) ( src rep -- dst )
: ^(sum-vector-16) ( src rep -- dst )
{
[
{
@ -211,17 +219,17 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-rep :> rep'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-rep :> rep''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector :> src'''
rep'' widen-rep :> rep'''
rep'' widen-vector-rep :> rep'''
src''' src''' rep''' ^^merge-vector-head :> head'''
src''' src''' rep''' ^^merge-vector-tail :> tail'''
head''' tail''' rep ^^add-vector
@ -230,11 +238,11 @@ IN: compiler.cfg.intrinsics.simd
: ^(sum-vector) ( src rep -- dst )
[
rep-length {
{ 2 [ ^(sum-2) ] }
{ 4 [ ^(sum-4) ] }
{ 8 [ ^(sum-8) ] }
{ 16 [ ^(sum-16) ] }
dup rep-length {
{ 2 [ ^(sum-vector-2) ] }
{ 4 [ ^(sum-vector-4) ] }
{ 8 [ ^(sum-vector-8) ] }
{ 16 [ ^(sum-vector-16) ] }
} case
] [ ^^vector>scalar ] bi ;
@ -244,11 +252,29 @@ IN: compiler.cfg.intrinsics.simd
{ int-vector-rep [| src rep |
src rep ^unpack-vector-head :> head
src rep ^unpack-vector-tail :> tail
rep widen-rep :> wide-rep
rep widen-vector-rep :> wide-rep
head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
] }
} v-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: ^shuffle-vector-imm ( src1 src2 rep -- dst )
{
[ ^^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
: ^with-vector ( src rep -- dst )
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
: ^select-vector ( src n rep -- dst )
[ ^broadcast-vector ] keep ^^vector>scalar ;
! intrinsic emitters
: emit-simd-v+ ( node -- )
@ -380,8 +406,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-vnot ( node -- )
{
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
[ ^not-vector ]
} emit-v-vector-op ;
: emit-simd-vlshift ( node -- )
@ -408,12 +433,9 @@ IN: compiler.cfg.intrinsics.simd
[ ^^horizontal-shr-vector-imm ]
} [ integer? ] emit-vl-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: emit-simd-vshuffle-elements ( node -- )
{
[ ^^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
[ ^shuffle-vector-imm ]
} [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- )
@ -458,28 +480,28 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-vany? ( node -- )
{
[ vcc-any ^test-vector ]
[ vcc-any ^^test-vector ]
} emit-vv-vector-op ;
: emit-simd-vall? ( node -- )
{
[ vcc-all ^test-vector ]
[ vcc-all ^^test-vector ]
} emit-vv-vector-op ;
: emit-simd-vnone? ( node -- )
{
[ vcc-none ^test-vector ]
[ vcc-none ^^test-vector ]
} emit-vv-vector-op ;
: emit-simd-v>float ( node -- )
{
{ float-vector-rep [ drop ] }
{ int-vector-rep [ ^^integer>float-vector ] }
} emit-vv-vector-op ;
} emit-v-vector-op ;
: emit-simd-v>integer ( node -- )
{
{ float-vector-rep [ ^^float>integer-vector ] }
{ int-vector-rep [ dup ] }
} emit-vv-vector-op ;
} emit-v-vector-op ;
: emit-simd-vpack-signed ( node -- )
{
@ -503,7 +525,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-with ( node -- )
{
[ ^^with-vector ]
[ ^with-vector ]
} emit-v-vector-op ;
: emit-simd-gather-2 ( node -- )
@ -518,7 +540,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-select ( node -- )
{
[ ^^select-vector ]
[ ^select-vector ]
} [ integer? ] emit-vl-vector-op ;
: emit-alien-vector ( node -- )
@ -540,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd
inline-alien
] with { [ %alien-vector-reps member? ] } if-literals-match ;
: enable-simd ( -- )
{
{ (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
enable-simd
! : enable-simd ( -- )
! {
! { (simd-v+) [ emit-simd-v+ ] }
! { (simd-v-) [ emit-simd-v- ] }
! { (simd-vneg) [ emit-simd-vneg ] }
! { (simd-v+-) [ emit-simd-v+- ] }
! { (simd-vs+) [ emit-simd-vs+ ] }
! { (simd-vs-) [ emit-simd-vs- ] }
! { (simd-vs*) [ emit-simd-vs* ] }
! { (simd-v*) [ emit-simd-v* ] }
! { (simd-v/) [ emit-simd-v/ ] }
! { (simd-vmin) [ emit-simd-vmin ] }
! { (simd-vmax) [ emit-simd-vmax ] }
! { (simd-v.) [ emit-simd-v. ] }
! { (simd-vsqrt) [ emit-simd-vsqrt ] }
! { (simd-sum) [ emit-simd-sum ] }
! { (simd-vabs) [ emit-simd-vabs ] }
! { (simd-vbitand) [ emit-simd-vand ] }
! { (simd-vbitandn) [ emit-simd-vandn ] }
! { (simd-vbitor) [ emit-simd-vor ] }
! { (simd-vbitxor) [ emit-simd-vxor ] }
! { (simd-vbitnot) [ emit-simd-vnot ] }
! { (simd-vand) [ emit-simd-vand ] }
! { (simd-vandn) [ emit-simd-vandn ] }
! { (simd-vor) [ emit-simd-vor ] }
! { (simd-vxor) [ emit-simd-vxor ] }
! { (simd-vnot) [ emit-simd-vnot ] }
! { (simd-vlshift) [ emit-simd-vlshift ] }
! { (simd-vrshift) [ emit-simd-vrshift ] }
! { (simd-hlshift) [ emit-simd-hlshift ] }
! { (simd-hrshift) [ emit-simd-hrshift ] }
! { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
! { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
! { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
! { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
! { (simd-v<=) [ emit-simd-v<= ] }
! { (simd-v<) [ emit-simd-v< ] }
! { (simd-v=) [ emit-simd-v= ] }
! { (simd-v>) [ emit-simd-v> ] }
! { (simd-v>=) [ emit-simd-v>= ] }
! { (simd-vunordered?) [ emit-simd-vunordered? ] }
! { (simd-vany?) [ emit-simd-vany? ] }
! { (simd-vall?) [ emit-simd-vall? ] }
! { (simd-vnone?) [ emit-simd-vnone? ] }
! { (simd-v>float) [ emit-simd-v>float ] }
! { (simd-v>integer) [ emit-simd-v>integer ] }
! { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
! { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
! { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
! { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
! { (simd-with) [ emit-simd-with ] }
! { (simd-gather-2) [ emit-simd-gather-2 ] }
! { (simd-gather-4) [ emit-simd-gather-4 ] }
! { (simd-select) [ emit-simd-select ] }
! { alien-vector [ emit-alien-vector ] }
! { set-alien-vector [ emit-set-alien-vector ] }
! } enable-intrinsics ;
!
! enable-simd

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd ;
math.intervals math.vectors.simd math.vectors.simd.private ;
IN: compiler.tree.propagation.simd
{
@ -51,7 +51,6 @@ IN: compiler.tree.propagation.simd
(simd-gather-2)
(simd-gather-4)
alien-vector
alien-vector-aligned
} [ { byte-array } "default-output-classes" set-word-prop ] each
: scalar-output-class ( rep -- class )

View File

@ -1,6 +1,6 @@
USING: classes.tuple.private cpu.architecture help.markup
help.syntax kernel.private math math.vectors
math.vectors.simd.intrinsics sequences ;
sequences ;
IN: math.vectors.simd
ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
@ -23,7 +23,7 @@ $nl
$nl
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
$nl
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
$nl
@ -36,26 +36,7 @@ $nl
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
{ $subsections
POSTPONE: SIMD:
POSTPONE: SIMDS:
}
"The following scalar types are supported:"
{ $code
"char"
"uchar"
"short"
"ushort"
"int"
"uint"
"longlong"
"ulonglong"
"float"
"double"
}
"The following vector types are generated from the above scalar types:"
"The following vector types are available:"
{ $code
"char-16"
"uchar-16"
@ -218,16 +199,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"math.vectors.simd.intrinsics"
} ;
HELP: SIMD:
{ $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
HELP: SIMDS:
{ $syntax "SIMDS: type type type ... ;" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words
ABOUT: "math.vectors.simd"

View File

@ -1,5 +1,9 @@
! (c)2009 Slava Pestov, Joe Groff bsd license
USING: math.vectors math.vectors.private ;
USING: accessors alien.c-types byte-arrays classes combinators
cpu.architecture fry functors generalizations generic
generic.parser kernel lexer literals macros math math.functions
math.vectors math.vectors.private namespaces parser
prettyprint.custom quotations sequences sequences.private vocabs
vocabs.loader ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd
@ -8,8 +12,11 @@ DEFER: simd-with
DEFER: simd-boa
DEFER: simd-cast
<PRIVATE
ERROR: bad-simd-call word ;
ERROR: bad-simd-length got expected ;
<<
<PRIVATE
! Primitive SIMD constructors
GENERIC: new-underlying ( underlying seq -- seq' )
@ -18,6 +25,10 @@ GENERIC: new-underlying ( underlying seq -- seq' )
dip new-underlying ; inline
: change-underlying ( seq quot -- seq' )
'[ underlying>> @ ] keep new-underlying ; inline
PRIVATE>
>>
<PRIVATE
! SIMD intrinsics
@ -34,18 +45,18 @@ GENERIC: new-underlying ( underlying seq -- seq' )
: (simd-vmax) ( a b rep -- c ) \ vmax bad-simd-call ;
: (simd-v.) ( a b rep -- n ) \ v. bad-simd-call ;
: (simd-vsqrt) ( a rep -- c ) \ vsqrt bad-simd-call ;
: (simd-sum) ( a b rep -- n ) \ sum bad-simd-call ;
: (simd-sum) ( a rep -- n ) \ sum bad-simd-call ;
: (simd-vabs) ( a rep -- c ) \ vabs bad-simd-call ;
: (simd-vbitand) ( a b rep -- c ) \ vbitand bad-simd-call ;
: (simd-vbitandn) ( a b rep -- c ) \ vbitandn bad-simd-call ;
: (simd-vbitor) ( a b rep -- c ) \ vbitor bad-simd-call ;
: (simd-vbitxor) ( a b rep -- c ) \ vbitxor bad-simd-call ;
: (simd-vbitnot) ( a b rep -- c ) \ vbitnot bad-simd-call ;
: (simd-vbitnot) ( a rep -- c ) \ vbitnot bad-simd-call ;
: (simd-vand) ( a b rep -- c ) \ vand bad-simd-call ;
: (simd-vandn) ( a b rep -- c ) \ vandn bad-simd-call ;
: (simd-vor) ( a b rep -- c ) \ vor bad-simd-call ;
: (simd-vxor) ( a b rep -- c ) \ vxor bad-simd-call ;
: (simd-vnot) ( a b rep -- c ) \ vnot bad-simd-call ;
: (simd-vnot) ( a rep -- c ) \ vnot bad-simd-call ;
: (simd-vlshift) ( a n rep -- c ) \ vlshift bad-simd-call ;
: (simd-vrshift) ( a n rep -- c ) \ vrshift bad-simd-call ;
: (simd-hlshift) ( a n rep -- c ) \ hlshift bad-simd-call ;
@ -74,9 +85,13 @@ GENERIC: new-underlying ( underlying seq -- seq' )
: (simd-gather-4) ( m n o p rep -- v ) \ simd-boa bad-simd-call ;
: (simd-select) ( a n rep -- n ) \ nth bad-simd-call ;
PRIVATE>
: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
<PRIVATE
! Helper for boolean vector literals
: vector-true-value ( class -- value )
@ -102,10 +117,11 @@ TUPLE: simd-128
GENERIC: simd-element-type ( obj -- c-type )
GENERIC: simd-rep ( simd -- rep )
<<
: rep-length ( rep -- n )
16 swap rep-component-type heap-size /i ; foldable
<< <PRIVATE
<PRIVATE
! SIMD concrete type functor
@ -161,9 +177,11 @@ c:<c-type>
;FUNCTOR
SYNTAX: SIMD-128:
scan scan-word define-simd-128 ;
scan define-simd-128 ;
PRIVATE> >>
PRIVATE>
>>
SIMD-128: char-16
SIMD-128: uchar-16
@ -176,16 +194,14 @@ SIMD-128: ulonglong-2
SIMD-128: float-4
SIMD-128: double-2
ERROR: bad-simd-call word ;
ERROR: bad-simd-length got expected ;
: assert-positive ( x -- y ) ;
! SIMD vectors as sequences
M: simd-128 hashcode* underlying>> hashcode* ; inline
M: simd-128 clone [ clone ] change-underlying ; inline
M: simd-128 length simd-rep rep-length ; inline
M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline
M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline
M: simd-128 c:byte-length drop 16 ; inline
M: simd-128 new-sequence
@ -193,16 +209,13 @@ M: simd-128 new-sequence
[ nip [ 16 (byte-array) ] make-underlying ]
[ length bad-simd-length ] if ; inline
M: simd-128 equal?
[ v= vall? ] [ 2drop f ] if-vectors-match ; inline
M: simd-128 >pprint-sequence ;
M: simd-128 pprint* pprint-object ;
INSTANCE: simd-128 sequence
! Unboxers for SIMD operations
<<
<PRIVATE
: if-both-vectors ( a b t f -- )
@ -221,6 +234,9 @@ INSTANCE: simd-128 sequence
: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c )
[ simd-unbox ] dip 2curry make-underlying ; inline
: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c )
[ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline
: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n )
[ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline
@ -241,7 +257,7 @@ INSTANCE: simd-128 sequence
[ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline
: (simd-method-fallback) ( accum word -- accum )
[ current-method get \ (call-next-method) [ ] 2sequence suffix! ]
[ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ]
dip suffix! ;
SYNTAX: simd-vv->v-op
@ -252,6 +268,10 @@ SYNTAX: simd-vv->n-op
\ (simd-vv->n-op) (simd-method-fallback) ;
PRIVATE>
>>
M: simd-128 equal?
[ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline
! SIMD constructors
@ -283,26 +303,26 @@ M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inl
M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline
M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline
M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline
M: simd-128 sum [ (simd-sum) ] simd-vv->n-op ; inline
M: simd-128 sum [ (simd-sum) ] simd-v->n-op ; inline
M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline
M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline
M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline
M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline
M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline
M: simd-128 vbitnot [ (simd-vbitnot) ] simd-vv->v-op ; inline
M: simd-128 vbitnot [ (simd-vbitnot) ] simd-v->v-op ; inline
M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline
M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline
M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline
M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline
M: simd-128 vnot [ (simd-vnot) ] simd-vv->v-op ; inline
M: simd-128 vnot [ (simd-vnot) ] simd-v->v-op ; inline
M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline
M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline
M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline
M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline
M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline
M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline
M: simd-128 vmerge-head [ (simd-vmerge-head) ] simd-vv->v-op ; inline
M: simd-128 vmerge-tail [ (simd-vmerge-tail) ] simd-vv->v-op ; inline
M: simd-128 (vmerge-head) [ (simd-vmerge-head) ] simd-vv->v-op ; inline
M: simd-128 (vmerge-tail) [ (simd-vmerge-tail) ] simd-vv->v-op ; inline
M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline
M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline
M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline
@ -326,7 +346,6 @@ M: simd-128 v*n over simd-with v* ; inline
M: simd-128 v/n over simd-with v/ ; inline
M: simd-128 norm-sq dup v. assert-positive ; inline
M: simd-128 norm norm-sq sqrt ; inline
M: simd-128 normalize dup norm v/n ; inline
M: simd-128 distance v- norm ; inline
! misc