Merge branch 'master' of git://factorcode.org/git/factor
Conflicts: basis/math/vectors/simd/simd-docs.factordb4
commit
810bd63820
|
@ -1,5 +1,6 @@
|
|||
USING: alien alien.syntax alien.c-types kernel tools.test
|
||||
sequences system libc alien.strings io.encodings.utf8 ;
|
||||
sequences system libc alien.strings io.encodings.utf8
|
||||
math.constants ;
|
||||
IN: alien.c-types.tests
|
||||
|
||||
CONSTANT: xyz 123
|
||||
|
@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE
|
|||
os windows? cpu x86.64? and [
|
||||
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
|
||||
] when
|
||||
|
||||
[ 0 ] [ -10 uchar c-type-clamp ] unit-test
|
||||
[ 12 ] [ 12 uchar c-type-clamp ] unit-test
|
||||
[ -10 ] [ -10 char c-type-clamp ] unit-test
|
||||
[ 127 ] [ 230 char c-type-clamp ] unit-test
|
||||
[ t ] [ pi dup float c-type-clamp = ] unit-test
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
! Copyright (C) 2004, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: byte-arrays arrays assocs kernel kernel.private math
|
||||
namespaces make parser sequences strings words splitting math.parser
|
||||
cpu.architecture alien alien.accessors alien.strings quotations
|
||||
layouts system compiler.units io io.files io.encodings.binary
|
||||
io.streams.memory accessors combinators effects continuations fry
|
||||
classes vocabs vocabs.loader words.symbol ;
|
||||
math.order math.parser namespaces make parser sequences strings
|
||||
words splitting cpu.architecture alien alien.accessors
|
||||
alien.strings quotations layouts system compiler.units io
|
||||
io.files io.encodings.binary io.streams.memory accessors
|
||||
combinators effects continuations fry classes vocabs
|
||||
vocabs.loader words.symbol ;
|
||||
QUALIFIED: math
|
||||
IN: alien.c-types
|
||||
|
||||
|
@ -472,3 +473,25 @@ SYMBOLS:
|
|||
\ ulong \ size_t typedef
|
||||
] with-compilation-unit
|
||||
|
||||
M: char-16-rep rep-component-type drop char ;
|
||||
M: uchar-16-rep rep-component-type drop uchar ;
|
||||
M: short-8-rep rep-component-type drop short ;
|
||||
M: ushort-8-rep rep-component-type drop ushort ;
|
||||
M: int-4-rep rep-component-type drop int ;
|
||||
M: uint-4-rep rep-component-type drop uint ;
|
||||
M: float-4-rep rep-component-type drop float ;
|
||||
M: double-2-rep rep-component-type drop double ;
|
||||
|
||||
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
|
||||
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
|
||||
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
|
||||
: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
|
||||
|
||||
: c-type-interval ( c-type -- from to )
|
||||
{
|
||||
{ [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
|
||||
{ [ dup { char short int long longlong } memq? ] [ signed-interval ] }
|
||||
{ [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
|
||||
} cond ; foldable
|
||||
|
||||
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2008 Slava Pestov
|
||||
! copyright (C) 2008 Slava Pestov
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays alien.c-types alien.data kernel
|
||||
continuations destructors sequences io openssl openssl.libcrypto
|
||||
|
@ -23,10 +23,10 @@ TUPLE: evp-md-context < disposable handle ;
|
|||
|
||||
: <evp-md-context> ( -- ctx )
|
||||
evp-md-context new-disposable
|
||||
EVP_MD_CTX <struct> dup EVP_MD_CTX_init >>handle ;
|
||||
EVP_MD_CTX_create >>handle ;
|
||||
|
||||
M: evp-md-context dispose*
|
||||
handle>> EVP_MD_CTX_cleanup drop ;
|
||||
handle>> EVP_MD_CTX_destroy ;
|
||||
|
||||
: with-evp-md-context ( quot -- )
|
||||
maybe-init-ssl [ <evp-md-context> ] dip with-disposal ; inline
|
||||
|
|
|
@ -305,16 +305,36 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##saturated-add-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##add-sub-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##sub-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##saturated-sub-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##mul-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##saturated-mul-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##div-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
|
@ -330,14 +350,34 @@ def: dst
|
|||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##abs-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##sqrt-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
PURE-INSN: ##and-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##or-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##xor-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
! Boxing and unboxing aliens
|
||||
|
|
|
@ -151,27 +151,31 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-sse2-simd ( -- )
|
||||
: enable-simd ( -- )
|
||||
{
|
||||
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-sse3-simd ( -- )
|
||||
{
|
||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: emit-intrinsic ( node word -- )
|
||||
"intrinsic" word-prop call( node -- ) ;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs kernel locals fry
|
||||
USING: accessors assocs kernel locals fry sequences
|
||||
cpu.architecture
|
||||
compiler.cfg.rpo
|
||||
compiler.cfg.def-use
|
||||
compiler.cfg.utilities
|
||||
compiler.cfg.registers
|
||||
compiler.cfg.instructions
|
||||
|
@ -13,10 +14,19 @@ IN: compiler.cfg.ssa.cssa
|
|||
! selection, so it must keep track of representations when introducing
|
||||
! new values.
|
||||
|
||||
: insert-copy? ( bb vreg -- ? )
|
||||
! If the last instruction defines a value (which means it is
|
||||
! ##fixnum-add, ##fixnum-sub or ##fixnum-mul) then we don't
|
||||
! need to insert a copy since in fact doing so will result
|
||||
! in incorrect code.
|
||||
[ instructions>> last defs-vreg ] dip eq? not ;
|
||||
|
||||
:: insert-copy ( bb src rep -- bb dst )
|
||||
rep next-vreg-rep :> dst
|
||||
bb [ dst src rep src rep-of emit-conversion ] add-instructions
|
||||
bb dst ;
|
||||
bb src insert-copy? [
|
||||
rep next-vreg-rep :> dst
|
||||
bb [ dst src rep src rep-of emit-conversion ] add-instructions
|
||||
bb dst
|
||||
] [ bb src ] if ;
|
||||
|
||||
: convert-phi ( ##phi -- )
|
||||
dup dst>> rep-of '[ [ _ insert-copy ] assoc-map ] change-inputs drop ;
|
||||
|
|
|
@ -47,11 +47,18 @@ UNION: two-operand-insn
|
|||
##min-float
|
||||
##max-float
|
||||
##add-vector
|
||||
##saturated-add-vector
|
||||
##add-sub-vector
|
||||
##sub-vector
|
||||
##saturated-sub-vector
|
||||
##mul-vector
|
||||
##saturated-mul-vector
|
||||
##div-vector
|
||||
##min-vector
|
||||
##max-vector ;
|
||||
##max-vector
|
||||
##and-vector
|
||||
##or-vector
|
||||
##xor-vector ;
|
||||
|
||||
GENERIC: convert-two-operand* ( insn -- )
|
||||
|
||||
|
|
|
@ -169,13 +169,21 @@ CODEGEN: ##gather-vector-2 %gather-vector-2
|
|||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##box-vector %box-vector
|
||||
CODEGEN: ##add-vector %add-vector
|
||||
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||
CODEGEN: ##add-sub-vector %add-sub-vector
|
||||
CODEGEN: ##sub-vector %sub-vector
|
||||
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
|
||||
CODEGEN: ##mul-vector %mul-vector
|
||||
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
|
||||
CODEGEN: ##div-vector %div-vector
|
||||
CODEGEN: ##min-vector %min-vector
|
||||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##abs-vector %abs-vector
|
||||
CODEGEN: ##and-vector %and-vector
|
||||
CODEGEN: ##or-vector %or-vector
|
||||
CODEGEN: ##xor-vector %xor-vector
|
||||
CODEGEN: ##box-alien %box-alien
|
||||
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
||||
CODEGEN: ##unbox-alien %unbox-alien
|
||||
|
|
|
@ -3,7 +3,7 @@ math hashtables.private math.private namespaces sequences tools.test
|
|||
namespaces.private slots.private sequences.private byte-arrays alien
|
||||
alien.accessors layouts words definitions compiler.units io
|
||||
combinators vectors grouping make alien.c-types combinators.short-circuit
|
||||
math.order math.libm math.parser ;
|
||||
math.order math.libm math.parser alien.c-types ;
|
||||
FROM: math => float ;
|
||||
QUALIFIED: namespaces.private
|
||||
IN: compiler.tests.codegen
|
||||
|
@ -431,3 +431,21 @@ cell 4 = [
|
|||
] curry each-integer
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-only } ;
|
||||
|
||||
[ 2 ] [
|
||||
little-endian?
|
||||
T{ myseq f B{ 1 0 0 0 } B{ 1 0 0 0 } }
|
||||
T{ myseq f B{ 0 0 0 1 } B{ 0 0 0 1 } } ?
|
||||
[
|
||||
{ myseq } declare
|
||||
[ 0 2 ] dip dup
|
||||
[
|
||||
[
|
||||
over 1 < [ underlying1>> ] [ [ 1 - ] dip underlying2>> ] if
|
||||
swap 4 * >fixnum alien-signed-4
|
||||
] bi-curry@ bi * +
|
||||
] 2curry each-integer
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
|
|
@ -18,6 +18,7 @@ compiler.tree.propagation.constraints
|
|||
compiler.tree.propagation.call-effect
|
||||
compiler.tree.propagation.transforms
|
||||
compiler.tree.propagation.simd ;
|
||||
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
|
||||
IN: compiler.tree.propagation.known-words
|
||||
|
||||
{ + - * / }
|
||||
|
@ -260,15 +261,9 @@ generic-comparison-ops [
|
|||
alien-unsigned-8
|
||||
} [
|
||||
dup name>> {
|
||||
{
|
||||
[ "alien-signed-" ?head ]
|
||||
[ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ]
|
||||
}
|
||||
{
|
||||
[ "alien-unsigned-" ?head ]
|
||||
[ string>number 8 * 2^ 1 - 0 swap [a,b] ]
|
||||
}
|
||||
} cond
|
||||
{ [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
|
||||
{ [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
|
||||
} cond [a,b]
|
||||
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
|
||||
'[ 2drop _ ] "outputs" set-word-prop
|
||||
] each
|
||||
|
|
|
@ -1,46 +1,45 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators fry
|
||||
USING: accessors byte-arrays combinators fry sequences
|
||||
compiler.tree.propagation.info cpu.architecture kernel words math
|
||||
math.intervals math.vectors.simd.intrinsics ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-v+-)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-sum)
|
||||
(simd-vabs)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-broadcast)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
|
||||
\ (simd-sum) [
|
||||
nip dup literal?>> [
|
||||
literal>> scalar-rep-of {
|
||||
{ float-rep [ float ] }
|
||||
{ double-rep [ float ] }
|
||||
{ int-rep [ integer ] }
|
||||
} case
|
||||
] [ drop real ] if
|
||||
<class-info>
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ assert-positive [
|
||||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ alien-vector { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
! If SIMD is not available, inline alien-vector and set-alien-vector
|
||||
! to get a speedup
|
||||
: inline-unless-intrinsic ( word -- )
|
||||
|
|
|
@ -22,8 +22,6 @@ SINGLETONS: float-rep double-rep ;
|
|||
|
||||
! On x86, floating point registers are really vector registers
|
||||
SINGLETONS:
|
||||
float-4-rep
|
||||
double-2-rep
|
||||
char-16-rep
|
||||
uchar-16-rep
|
||||
short-8-rep
|
||||
|
@ -31,9 +29,11 @@ ushort-8-rep
|
|||
int-4-rep
|
||||
uint-4-rep ;
|
||||
|
||||
UNION: vector-rep
|
||||
SINGLETONS:
|
||||
float-4-rep
|
||||
double-2-rep
|
||||
double-2-rep ;
|
||||
|
||||
UNION: int-vector-rep
|
||||
char-16-rep
|
||||
uchar-16-rep
|
||||
short-8-rep
|
||||
|
@ -41,6 +41,14 @@ ushort-8-rep
|
|||
int-4-rep
|
||||
uint-4-rep ;
|
||||
|
||||
UNION: float-vector-rep
|
||||
float-4-rep
|
||||
double-2-rep ;
|
||||
|
||||
UNION: vector-rep
|
||||
int-vector-rep
|
||||
float-vector-rep ;
|
||||
|
||||
UNION: representation
|
||||
any-rep
|
||||
tagged-rep
|
||||
|
@ -76,10 +84,15 @@ M: double-rep rep-size drop 8 ;
|
|||
M: stack-params rep-size drop cell ;
|
||||
M: vector-rep rep-size drop 16 ;
|
||||
|
||||
GENERIC: rep-component-type ( rep -- n )
|
||||
|
||||
! Methods defined in alien.c-types
|
||||
|
||||
GENERIC: scalar-rep-of ( rep -- rep' )
|
||||
|
||||
M: float-4-rep scalar-rep-of drop float-rep ;
|
||||
M: double-2-rep scalar-rep-of drop double-rep ;
|
||||
M: int-vector-rep scalar-rep-of drop int-rep ;
|
||||
|
||||
! Mapping from register class to machine registers
|
||||
HOOK: machine-registers cpu ( -- assoc )
|
||||
|
@ -167,15 +180,42 @@ HOOK: %unbox-vector cpu ( dst src rep -- )
|
|||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
|
||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
|
||||
|
||||
HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||
HOOK: %add-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
|
||||
HOOK: %mul-vector-reps cpu ( -- reps )
|
||||
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
|
||||
HOOK: %div-vector-reps cpu ( -- reps )
|
||||
HOOK: %min-vector-reps cpu ( -- reps )
|
||||
HOOK: %max-vector-reps cpu ( -- reps )
|
||||
HOOK: %sqrt-vector-reps cpu ( -- reps )
|
||||
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
|
||||
HOOK: %abs-vector-reps cpu ( -- reps )
|
||||
HOOK: %and-vector-reps cpu ( -- reps )
|
||||
HOOK: %or-vector-reps cpu ( -- reps )
|
||||
HOOK: %xor-vector-reps cpu ( -- reps )
|
||||
|
||||
HOOK: %unbox-alien cpu ( dst src -- )
|
||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||
|
|
|
@ -322,4 +322,4 @@ os windows? [
|
|||
4 "double" c-type (>>align)
|
||||
] unless
|
||||
|
||||
"cpu.x86.features" require
|
||||
check-sse
|
||||
|
|
|
@ -249,4 +249,4 @@ USE: vocabs.loader
|
|||
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
|
||||
} cond
|
||||
|
||||
"cpu.x86.features" require
|
||||
check-sse
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: system kernel math math.order math.parser namespaces
|
||||
alien.c-types alien.syntax combinators locals init io cpu.x86
|
||||
USING: system kernel memoize math math.order math.parser
|
||||
namespaces alien.c-types alien.syntax combinators locals init io
|
||||
compiler compiler.units accessors ;
|
||||
IN: cpu.x86.features
|
||||
|
||||
|
@ -13,7 +13,18 @@ FUNCTION: longlong read_timestamp_counter ( ) ;
|
|||
|
||||
PRIVATE>
|
||||
|
||||
ALIAS: sse-version sse_version
|
||||
MEMO: sse-version ( -- n )
|
||||
sse_version
|
||||
"sse-version" get string>number [ min ] when* ;
|
||||
|
||||
[ \ sse-version reset-memoized ] "cpu.x86.features" add-init-hook
|
||||
|
||||
: sse? ( -- ? ) sse-version 10 >= ;
|
||||
: sse2? ( -- ? ) sse-version 20 >= ;
|
||||
: sse3? ( -- ? ) sse-version 30 >= ;
|
||||
: ssse3? ( -- ? ) sse-version 33 >= ;
|
||||
: sse4.1? ( -- ? ) sse-version 41 >= ;
|
||||
: sse4.2? ( -- ? ) sse-version 42 >= ;
|
||||
|
||||
: sse-string ( version -- string )
|
||||
{
|
||||
|
@ -32,37 +43,3 @@ M: x86 instruction-count read_timestamp_counter ;
|
|||
|
||||
: count-instructions ( quot -- n )
|
||||
instruction-count [ call ] dip instruction-count swap - ; inline
|
||||
|
||||
USING: cpu.x86.features cpu.x86.features.private ;
|
||||
|
||||
:: install-sse-check ( version -- )
|
||||
[
|
||||
sse-version version < [
|
||||
"This image was built to use " write
|
||||
version sse-string write
|
||||
" but your CPU only supports " write
|
||||
sse-version sse-string write "." print
|
||||
"You will need to bootstrap Factor again." print
|
||||
flush
|
||||
1 exit
|
||||
] when
|
||||
] "cpu.x86" add-init-hook ;
|
||||
|
||||
: enable-sse ( version -- )
|
||||
{
|
||||
{ 00 [ ] }
|
||||
{ 10 [ ] }
|
||||
{ 20 [ enable-sse2 ] }
|
||||
{ 30 [ enable-sse3 ] }
|
||||
{ 33 [ enable-sse3 ] }
|
||||
{ 41 [ enable-sse3 ] }
|
||||
{ 42 [ enable-sse3 ] }
|
||||
} case ;
|
||||
|
||||
[ { sse_version } compile ] with-optimizer
|
||||
|
||||
"Checking for multimedia extensions: " write sse-version
|
||||
"sse-version" get [ string>number min ] when*
|
||||
[ sse-string write " detected" print ]
|
||||
[ install-sse-check ]
|
||||
[ enable-sse ] tri
|
||||
|
|
|
@ -4,7 +4,8 @@ USING: accessors assocs alien alien.c-types arrays strings
|
|||
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
|
||||
cpu.architecture kernel kernel.private math memory namespaces make
|
||||
sequences words system layouts combinators math.order fry locals
|
||||
compiler.constants vm byte-arrays
|
||||
compiler.constants byte-arrays io macros quotations cpu.x86.features
|
||||
cpu.x86.features.private compiler compiler.units init vm
|
||||
compiler.cfg.registers
|
||||
compiler.cfg.instructions
|
||||
compiler.cfg.intrinsics
|
||||
|
@ -248,12 +249,26 @@ M:: x86 %unbox-vector ( dst src rep -- )
|
|||
dst src byte-array-offset [+]
|
||||
rep copy-register ;
|
||||
|
||||
MACRO: available-reps ( alist -- )
|
||||
! Each SSE version adds new representations and supports
|
||||
! all old ones
|
||||
unzip { } [ append ] accumulate rest swap suffix
|
||||
[ [ 1quotation ] map ] bi@ zip
|
||||
reverse [ { } ] suffix
|
||||
'[ _ cond ] ;
|
||||
|
||||
M: x86 %broadcast-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
|
||||
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %broadcast-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||
rep {
|
||||
{
|
||||
|
@ -267,6 +282,11 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
|||
}
|
||||
} case ;
|
||||
|
||||
M: x86 %gather-vector-4-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||
rep {
|
||||
{
|
||||
|
@ -278,6 +298,11 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
|||
}
|
||||
} case ;
|
||||
|
||||
M: x86 %gather-vector-2-reps
|
||||
{
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ADDPS ] }
|
||||
|
@ -290,6 +315,36 @@ M: x86 %add-vector ( dst src1 src2 rep -- )
|
|||
{ uint-4-rep [ PADDD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %add-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ char-16-rep [ PADDSB ] }
|
||||
{ uchar-16-rep [ PADDUSB ] }
|
||||
{ short-8-rep [ PADDSW ] }
|
||||
{ ushort-8-rep [ PADDUSW ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %saturated-add-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ADDSUBPS ] }
|
||||
{ double-2-rep [ ADDSUBPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %add-sub-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ SUBPS ] }
|
||||
|
@ -302,42 +357,183 @@ M: x86 %sub-vector ( dst src1 src2 rep -- )
|
|||
{ uint-4-rep [ PSUBD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %sub-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ char-16-rep [ PSUBSB ] }
|
||||
{ uchar-16-rep [ PSUBUSB ] }
|
||||
{ short-8-rep [ PSUBSW ] }
|
||||
{ ushort-8-rep [ PSUBUSW ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %saturated-sub-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ MULPS ] }
|
||||
{ double-2-rep [ MULPD ] }
|
||||
{ int-4-rep [ PMULLW ] }
|
||||
{ short-8-rep [ PMULLW ] }
|
||||
{ ushort-8-rep [ PMULLW ] }
|
||||
{ int-4-rep [ PMULLD ] }
|
||||
{ uint-4-rep [ PMULLD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %mul-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-mul-vector-reps
|
||||
! No multiplication with saturation on x86
|
||||
{ } ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ DIVPS ] }
|
||||
{ double-2-rep [ DIVPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %div-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ char-16-rep [ PMINSB ] }
|
||||
{ uchar-16-rep [ PMINUB ] }
|
||||
{ short-8-rep [ PMINSW ] }
|
||||
{ ushort-8-rep [ PMINUW ] }
|
||||
{ int-4-rep [ PMINSD ] }
|
||||
{ uint-4-rep [ PMINUD ] }
|
||||
{ float-4-rep [ MINPS ] }
|
||||
{ double-2-rep [ MINPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %min-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ char-16-rep [ PMAXSB ] }
|
||||
{ uchar-16-rep [ PMAXUB ] }
|
||||
{ short-8-rep [ PMAXSW ] }
|
||||
{ ushort-8-rep [ PMAXUW ] }
|
||||
{ int-4-rep [ PMAXSD ] }
|
||||
{ uint-4-rep [ PMAXUD ] }
|
||||
{ float-4-rep [ MAXPS ] }
|
||||
{ double-2-rep [ MAXPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %max-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %abs-vector ( dst src rep -- )
|
||||
{
|
||||
{ char-16-rep [ PABSB ] }
|
||||
{ short-8-rep [ PABSW ] }
|
||||
{ int-4-rep [ PABSD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %abs-vector-reps
|
||||
{
|
||||
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sqrt-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ SQRTPS ] }
|
||||
{ double-2-rep [ SQRTPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
M: x86 %sqrt-vector-reps
|
||||
{
|
||||
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||
} case ;
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %and-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ANDPS ] }
|
||||
{ double-2-rep [ ANDPD ] }
|
||||
{ char-16-rep [ PAND ] }
|
||||
{ uchar-16-rep [ PAND ] }
|
||||
{ short-8-rep [ PAND ] }
|
||||
{ ushort-8-rep [ PAND ] }
|
||||
{ int-4-rep [ PAND ] }
|
||||
{ uint-4-rep [ PAND ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %and-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ ORPS ] }
|
||||
{ double-2-rep [ ORPD ] }
|
||||
{ char-16-rep [ POR ] }
|
||||
{ uchar-16-rep [ POR ] }
|
||||
{ short-8-rep [ POR ] }
|
||||
{ ushort-8-rep [ POR ] }
|
||||
{ int-4-rep [ POR ] }
|
||||
{ uint-4-rep [ POR ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %or-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ float-4-rep [ XORPS ] }
|
||||
{ double-2-rep [ XORPD ] }
|
||||
{ char-16-rep [ PXOR ] }
|
||||
{ uchar-16-rep [ PXOR ] }
|
||||
{ short-8-rep [ PXOR ] }
|
||||
{ ushort-8-rep [ PXOR ] }
|
||||
{ int-4-rep [ PXOR ] }
|
||||
{ uint-4-rep [ PXOR ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %xor-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unbox-alien ( dst src -- )
|
||||
alien-offset [+] MOV ;
|
||||
|
@ -767,15 +963,29 @@ M: x86 small-enough? ( n -- ? )
|
|||
#! set up by the caller.
|
||||
stack-frame get total-size>> + stack@ ;
|
||||
|
||||
: enable-sse2 ( -- )
|
||||
enable-float-intrinsics
|
||||
enable-fsqrt
|
||||
enable-float-min/max
|
||||
enable-sse2-simd ;
|
||||
|
||||
: enable-sse3 ( -- )
|
||||
enable-sse2
|
||||
enable-sse3-simd ;
|
||||
|
||||
enable-simd
|
||||
enable-min/max
|
||||
enable-fixnum-log2
|
||||
enable-fixnum-log2
|
||||
|
||||
:: install-sse2-check ( -- )
|
||||
[
|
||||
sse-version 20 < [
|
||||
"This image was built to use SSE2 but your CPU does not support it." print
|
||||
"You will need to bootstrap Factor again." print
|
||||
flush
|
||||
1 exit
|
||||
] when
|
||||
] "cpu.x86" add-init-hook ;
|
||||
|
||||
: enable-sse2 ( version -- )
|
||||
20 >= [
|
||||
enable-float-intrinsics
|
||||
enable-fsqrt
|
||||
enable-float-min/max
|
||||
install-sse2-check
|
||||
] when ;
|
||||
|
||||
: check-sse ( -- )
|
||||
[ { sse_version } compile ] with-optimizer
|
||||
"Checking for multimedia extensions: " write sse-version
|
||||
[ sse-string write " detected" print ] [ enable-sse2 ] bi ;
|
||||
|
|
|
@ -31,9 +31,7 @@ M: x87-env (set-fp-env-register)
|
|||
set_x87_env ;
|
||||
|
||||
M: x86 (fp-env-registers)
|
||||
sse-version 20 >=
|
||||
[ <sse-env> <x87-env> 2array ]
|
||||
[ <x87-env> 1array ] if ;
|
||||
sse2? [ <sse-env> <x87-env> 2array ] [ <x87-env> 1array ] if ;
|
||||
|
||||
CONSTANT: sse-exception-flag-bits HEX: 3f
|
||||
CONSTANT: sse-exception-flag>bit
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
USING: cpu.architecture math.vectors.simd
|
||||
math.vectors.simd.intrinsics accessors math.vectors.simd.alien
|
||||
kernel classes.struct tools.test compiler sequences byte-arrays
|
||||
alien math kernel.private specialized-arrays combinators ;
|
||||
SPECIALIZED-ARRAY: float
|
||||
IN: math.vectors.simd.alien.tests
|
||||
|
||||
! Vector alien intrinsics
|
||||
[ float-4{ 1 2 3 4 } ] [
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
underlying>> 0 float-4-rep alien-vector
|
||||
] compile-call float-4 boa
|
||||
] unit-test
|
||||
|
||||
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
|
||||
16 [ 1 ] B{ } replicate-as 16 <byte-array>
|
||||
[
|
||||
0 [
|
||||
{ byte-array c-ptr fixnum } declare
|
||||
float-4-rep set-alien-vector
|
||||
] compile-call
|
||||
] keep
|
||||
] unit-test
|
||||
|
||||
[ float-array{ 1 2 3 4 } ] [
|
||||
[
|
||||
float-array{ 1 2 3 4 } underlying>>
|
||||
float-array{ 4 3 2 1 } clone
|
||||
[ underlying>> 0 float-4-rep set-alien-vector ] keep
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
STRUCT: simd-struct
|
||||
{ x float-4 }
|
||||
{ y double-2 }
|
||||
{ z double-4 }
|
||||
{ w float-8 } ;
|
||||
|
||||
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
[
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] compile-call
|
||||
] unit-test
|
|
@ -1,42 +0,0 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien accessors alien.c-types byte-arrays compiler.units
|
||||
cpu.architecture locals kernel math math.vectors.simd
|
||||
math.vectors.simd.intrinsics ;
|
||||
IN: math.vectors.simd.alien
|
||||
|
||||
:: define-simd-128-type ( class rep -- )
|
||||
<c-type>
|
||||
byte-array >>class
|
||||
class >>boxed-class
|
||||
[ rep alien-vector class boa ] >>getter
|
||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||
16 >>size
|
||||
8 >>align
|
||||
rep >>rep
|
||||
class name>> typedef ;
|
||||
|
||||
:: define-simd-256-type ( class rep -- )
|
||||
<c-type>
|
||||
class >>class
|
||||
class >>boxed-class
|
||||
[
|
||||
[ rep alien-vector ]
|
||||
[ 16 + >fixnum rep alien-vector ] 2bi
|
||||
class boa
|
||||
] >>getter
|
||||
[
|
||||
[ [ underlying1>> ] 2dip rep set-alien-vector ]
|
||||
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
|
||||
3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
8 >>align
|
||||
rep >>rep
|
||||
class name>> typedef ;
|
||||
[
|
||||
float-4 float-4-rep define-simd-128-type
|
||||
double-2 double-2-rep define-simd-128-type
|
||||
float-8 float-4-rep define-simd-256-type
|
||||
double-4 double-2-rep define-simd-256-type
|
||||
] with-compilation-unit
|
|
@ -1,27 +1,124 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types byte-arrays classes functors
|
||||
kernel math parser prettyprint.custom sequences
|
||||
sequences.private literals ;
|
||||
USING: accessors alien.c-types assocs byte-arrays classes
|
||||
effects fry functors generalizations kernel literals locals
|
||||
math math.functions math.vectors math.vectors.simd.intrinsics
|
||||
math.vectors.specialization parser prettyprint.custom sequences
|
||||
sequences.private strings words definitions macros cpu.architecture
|
||||
namespaces arrays quotations ;
|
||||
QUALIFIED-WITH: math m
|
||||
IN: math.vectors.simd.functor
|
||||
|
||||
ERROR: bad-length got expected ;
|
||||
|
||||
MACRO: simd-boa ( rep class -- simd-array )
|
||||
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
|
||||
|
||||
:: define-boa-custom-inlining ( word rep class -- )
|
||||
word [
|
||||
drop
|
||||
rep rep rep-gather-word supported-simd-op? [
|
||||
[ rep (simd-boa) class boa ]
|
||||
] [ word def>> ] if
|
||||
] "custom-inlining" set-word-prop ;
|
||||
|
||||
: simd-with ( rep class x -- simd-array )
|
||||
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
|
||||
|
||||
:: define-with-custom-inlining ( word rep class -- )
|
||||
word [
|
||||
drop
|
||||
rep \ (simd-broadcast) supported-simd-op? [
|
||||
[ rep rep-coerce rep (simd-broadcast) class boa ]
|
||||
] [ word def>> ] if
|
||||
] "custom-inlining" set-word-prop ;
|
||||
|
||||
: boa-effect ( rep n -- effect )
|
||||
[ rep-components ] dip *
|
||||
[ CHAR: a + 1string ] map
|
||||
{ "simd-vector" } <effect> ;
|
||||
|
||||
: supported-simd-ops ( assoc rep -- assoc' )
|
||||
[ simd-ops get ] dip
|
||||
'[ nip _ swap supported-simd-op? ] assoc-filter
|
||||
'[ drop _ key? ] assoc-filter ;
|
||||
|
||||
ERROR: bad-schema schema ;
|
||||
|
||||
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
|
||||
[ simd-ops get ] dip '[
|
||||
1quotation
|
||||
over word-schema _ ?at [ bad-schema ] unless
|
||||
[ ] 2sequence
|
||||
] assoc-map ;
|
||||
|
||||
:: high-level-ops ( ctor elt-class -- assoc )
|
||||
! Some SIMD operations are defined in terms of others.
|
||||
{
|
||||
{ vneg [ [ dup v- ] keep v- ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
{ v+n [ ctor execute v+ ] }
|
||||
{ n-v [ [ ctor execute ] dip v- ] }
|
||||
{ v-n [ ctor execute v- ] }
|
||||
{ n*v [ [ ctor execute ] dip v* ] }
|
||||
{ v*n [ ctor execute v* ] }
|
||||
{ n/v [ [ ctor execute ] dip v/ ] }
|
||||
{ v/n [ ctor execute v/ ] }
|
||||
{ norm-sq [ dup v. assert-positive ] }
|
||||
{ norm [ norm-sq sqrt ] }
|
||||
{ normalize [ dup norm v/n ] }
|
||||
}
|
||||
! To compute dot product and distance with integer vectors, we
|
||||
! have to do things less efficiently, with integer overflow checks,
|
||||
! in the general case.
|
||||
elt-class m:float = [
|
||||
{
|
||||
{ distance [ v- norm ] }
|
||||
{ v. [ v* sum ] }
|
||||
} append
|
||||
] when ;
|
||||
|
||||
:: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
|
||||
rep rep-component-type c-type-boxed-class :> elt-class
|
||||
class
|
||||
elt-class
|
||||
{
|
||||
{ { +vector+ +vector+ -> +vector+ } vv->v }
|
||||
{ { +vector+ -> +vector+ } v->v }
|
||||
{ { +vector+ -> +scalar+ } v->n }
|
||||
{ { +vector+ -> +nonnegative+ } v->n }
|
||||
} low-level-ops
|
||||
rep supported-simd-ops
|
||||
ctor elt-class high-level-ops assoc-union
|
||||
specialize-vector-words ;
|
||||
|
||||
:: define-simd-128-type ( class rep -- )
|
||||
<c-type>
|
||||
byte-array >>class
|
||||
class >>boxed-class
|
||||
[ rep alien-vector class boa ] >>getter
|
||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||
16 >>size
|
||||
8 >>align
|
||||
rep >>rep
|
||||
class typedef ;
|
||||
|
||||
FUNCTOR: define-simd-128 ( T -- )
|
||||
|
||||
T-TYPE IS ${T}
|
||||
|
||||
N [ 16 T-TYPE heap-size /i ]
|
||||
N [ 16 T heap-size /i ]
|
||||
|
||||
A DEFINES-CLASS ${T}-${N}
|
||||
A-boa DEFINES ${A}-boa
|
||||
A-with DEFINES ${A}-with
|
||||
>A DEFINES >${A}
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
NTH [ T-TYPE dup c-type-getter-boxer array-accessor ]
|
||||
SET-NTH [ T-TYPE dup c-setter array-accessor ]
|
||||
NTH [ T dup c-type-getter-boxer array-accessor ]
|
||||
SET-NTH [ T dup c-setter array-accessor ]
|
||||
|
||||
A-rep IS ${A}-rep
|
||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||
|
||||
WHERE
|
||||
|
@ -51,6 +148,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
|
|||
|
||||
M: A byte-length underlying>> length ; inline
|
||||
|
||||
M: A element-type drop A-rep rep-component-type ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
|
@ -59,6 +158,16 @@ M: A pprint* pprint-object ;
|
|||
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
|
||||
|
||||
\ A-with \ A-rep \ A define-with-custom-inlining
|
||||
|
||||
\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
|
||||
|
||||
\ A-rep rep-gather-word [
|
||||
\ A-boa \ A-rep \ A define-boa-custom-inlining
|
||||
] when
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
<PRIVATE
|
||||
|
@ -66,31 +175,62 @@ INSTANCE: A sequence
|
|||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-v->v-op ( v1 quot -- v2 )
|
||||
[ underlying>> A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v quot -- n )
|
||||
[ underlying>> A-rep ] dip call ; inline
|
||||
|
||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||
\ A \ A-rep define-simd-128-type
|
||||
|
||||
PRIVATE>
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
! Synthesize 256-bit vectors from a pair of 128-bit vectors
|
||||
SLOT: underlying1
|
||||
SLOT: underlying2
|
||||
|
||||
:: define-simd-256-type ( class rep -- )
|
||||
<c-type>
|
||||
class >>class
|
||||
class >>boxed-class
|
||||
[
|
||||
[ rep alien-vector ]
|
||||
[ 16 + >fixnum rep alien-vector ] 2bi
|
||||
class boa
|
||||
] >>getter
|
||||
[
|
||||
[ [ underlying1>> ] 2dip rep set-alien-vector ]
|
||||
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
|
||||
3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
8 >>align
|
||||
rep >>rep
|
||||
class typedef ;
|
||||
|
||||
FUNCTOR: define-simd-256 ( T -- )
|
||||
|
||||
T-TYPE IS ${T}
|
||||
|
||||
N [ 32 T-TYPE heap-size /i ]
|
||||
N [ 32 T heap-size /i ]
|
||||
|
||||
N/2 [ N 2 / ]
|
||||
A/2 IS ${T}-${N/2}
|
||||
A/2-boa IS ${A/2}-boa
|
||||
A/2-with IS ${A/2}-with
|
||||
|
||||
A DEFINES-CLASS ${T}-${N}
|
||||
A-boa DEFINES ${A}-boa
|
||||
A-with DEFINES ${A}-with
|
||||
>A DEFINES >${A}
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
A-deref DEFINES-PRIVATE ${A}-deref
|
||||
|
||||
A-rep IS ${A/2}-rep
|
||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||
|
||||
WHERE
|
||||
|
@ -129,6 +269,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
|
|||
|
||||
M: A byte-length drop 32 ; inline
|
||||
|
||||
M: A element-type drop A-rep rep-component-type ;
|
||||
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
@ -137,6 +279,16 @@ M: A >pprint-sequence ;
|
|||
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
: A-with ( x -- simd-array )
|
||||
[ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
|
||||
\ A boa ; inline
|
||||
|
||||
: A-boa ( ... -- simd-array )
|
||||
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
|
||||
\ A boa ; inline
|
||||
|
||||
\ A-rep 2 boa-effect \ A-boa set-stack-effect
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||
|
@ -144,8 +296,15 @@ INSTANCE: A sequence
|
|||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v1 combine-quot reduce-quot -- v2 )
|
||||
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
|
||||
dip call ; inline
|
||||
: A-v->v-op ( v1 combine-quot -- v2 )
|
||||
[ [ underlying1>> A-rep ] dip call ]
|
||||
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v1 combine-quot -- v2 )
|
||||
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
|
||||
|
||||
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
|
||||
\ A \ A-rep define-simd-256-type
|
||||
|
||||
;FUNCTOR
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
IN: math.vectors.simd.intrinsics.tests
|
||||
USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
|
||||
|
||||
[ 16 ] [ uchar-16-rep rep-components ] unit-test
|
||||
[ 16 ] [ char-16-rep rep-components ] unit-test
|
||||
[ 8 ] [ ushort-8-rep rep-components ] unit-test
|
||||
[ 8 ] [ short-8-rep rep-components ] unit-test
|
||||
[ 4 ] [ uint-4-rep rep-components ] unit-test
|
||||
[ 4 ] [ int-4-rep rep-components ] unit-test
|
||||
[ 4 ] [ float-4-rep rep-components ] unit-test
|
||||
[ 2 ] [ double-2-rep rep-components ] unit-test
|
||||
|
||||
{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
|
||||
{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
|
||||
{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
|
||||
{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as
|
||||
|
||||
|
|
@ -1,18 +1,48 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel alien alien.data cpu.architecture libc ;
|
||||
USING: alien alien.c-types alien.data assocs combinators
|
||||
cpu.architecture fry generalizations kernel libc macros math
|
||||
sequences effects accessors namespaces lexer parser vocabs.parser
|
||||
words arrays math.vectors ;
|
||||
IN: math.vectors.simd.intrinsics
|
||||
|
||||
ERROR: bad-simd-call ;
|
||||
|
||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
|
||||
<<
|
||||
|
||||
: simd-effect ( word -- effect )
|
||||
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
|
||||
|
||||
SYMBOL: simd-ops
|
||||
|
||||
V{ } clone simd-ops set-global
|
||||
|
||||
SYNTAX: SIMD-OP:
|
||||
scan-word dup name>> "(simd-" ")" surround create-in
|
||||
[ nip [ bad-simd-call ] define ]
|
||||
[ [ simd-effect ] dip set-stack-effect ]
|
||||
[ 2array simd-ops get push ]
|
||||
2tri ;
|
||||
|
||||
>>
|
||||
|
||||
SIMD-OP: v+
|
||||
SIMD-OP: v-
|
||||
SIMD-OP: v+-
|
||||
SIMD-OP: vs+
|
||||
SIMD-OP: vs-
|
||||
SIMD-OP: vs*
|
||||
SIMD-OP: v*
|
||||
SIMD-OP: v/
|
||||
SIMD-OP: vmin
|
||||
SIMD-OP: vmax
|
||||
SIMD-OP: vsqrt
|
||||
SIMD-OP: sum
|
||||
SIMD-OP: vabs
|
||||
SIMD-OP: vbitand
|
||||
SIMD-OP: vbitor
|
||||
SIMD-OP: vbitxor
|
||||
|
||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
|
@ -26,3 +56,61 @@ ERROR: bad-simd-call ;
|
|||
! Inefficient version for when intrinsics are missing
|
||||
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
||||
|
||||
<<
|
||||
|
||||
: rep-components ( rep -- n )
|
||||
16 swap rep-component-type heap-size /i ; foldable
|
||||
|
||||
: rep-coercer ( rep -- quot )
|
||||
{
|
||||
{ [ dup int-vector-rep? ] [ [ >fixnum ] ] }
|
||||
{ [ dup float-vector-rep? ] [ [ >float ] ] }
|
||||
} cond nip ; foldable
|
||||
|
||||
: rep-coerce ( value rep -- value' )
|
||||
rep-coercer call( value -- value' ) ; inline
|
||||
|
||||
CONSTANT: rep-gather-words
|
||||
{
|
||||
{ 2 (simd-gather-2) }
|
||||
{ 4 (simd-gather-4) }
|
||||
}
|
||||
|
||||
: rep-gather-word ( rep -- word )
|
||||
rep-components rep-gather-words at ;
|
||||
|
||||
>>
|
||||
|
||||
MACRO: (simd-boa) ( rep -- quot )
|
||||
{
|
||||
[ rep-coercer ]
|
||||
[ rep-components ]
|
||||
[ ]
|
||||
[ rep-gather-word ]
|
||||
} cleave
|
||||
'[ _ _ napply _ _ execute ] ;
|
||||
|
||||
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
|
||||
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
||||
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
|
||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ %abs-vector-reps ] }
|
||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
|
||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||
} case member? ;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: help.markup help.syntax sequences math math.vectors
|
||||
kernel.private classes.tuple.private
|
||||
classes.tuple.private
|
||||
math.vectors.simd.intrinsics cpu.architecture ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
|
@ -17,23 +17,49 @@ $nl
|
|||
"There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
|
||||
"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs."
|
||||
"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
|
||||
$nl
|
||||
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance."
|
||||
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
|
||||
$nl
|
||||
"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
|
||||
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
|
||||
$nl
|
||||
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
|
||||
$nl
|
||||
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
|
||||
$nl
|
||||
"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types."
|
||||
$nl
|
||||
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
|
||||
$nl
|
||||
"The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
|
||||
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8."
|
||||
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
|
||||
$nl
|
||||
"The following vector types are defined:"
|
||||
{ $subsection float-4 }
|
||||
{ $subsection double-2 }
|
||||
{ $subsection float-8 }
|
||||
{ $subsection double-4 }
|
||||
"For each vector type, several words are defined:"
|
||||
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
|
||||
{ $subsection POSTPONE: SIMD: }
|
||||
"The following vector types are supported:"
|
||||
{ $code
|
||||
"char-16"
|
||||
"uchar-16"
|
||||
"char-32"
|
||||
"uchar-32"
|
||||
"short-8"
|
||||
"ushort-8"
|
||||
"short-16"
|
||||
"ushort-16"
|
||||
"int-4"
|
||||
"uint-4"
|
||||
"int-8"
|
||||
"uint-8"
|
||||
"float-4"
|
||||
"float-8"
|
||||
"double-2"
|
||||
"double-4"
|
||||
} ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
|
||||
"For each SIMD vector type, several words are defined:"
|
||||
{ $table
|
||||
{ "Word" "Stack effect" "Description" }
|
||||
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
|
||||
|
@ -41,24 +67,6 @@ $nl
|
|||
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
|
||||
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
|
||||
}
|
||||
"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
|
||||
$nl
|
||||
"Operations on " { $link float-4 } " instances:"
|
||||
{ $subsection float-4-with }
|
||||
{ $subsection float-4-boa }
|
||||
{ $subsection POSTPONE: float-4{ }
|
||||
"Operations on " { $link double-2 } " instances:"
|
||||
{ $subsection double-2-with }
|
||||
{ $subsection double-2-boa }
|
||||
{ $subsection POSTPONE: double-2{ }
|
||||
"Operations on " { $link float-8 } " instances:"
|
||||
{ $subsection float-8-with }
|
||||
{ $subsection float-8-boa }
|
||||
{ $subsection POSTPONE: float-8{ }
|
||||
"Operations on " { $link double-4 } " instances:"
|
||||
{ $subsection double-4-with }
|
||||
{ $subsection double-4-boa }
|
||||
{ $subsection POSTPONE: double-4{ }
|
||||
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
|
||||
{ $see-also "c-types-specs" } ;
|
||||
|
||||
|
@ -84,6 +92,8 @@ SYMBOLS: x y ;
|
|||
{ $code
|
||||
"""USING: compiler.tree.debugger kernel.private
|
||||
math.vectors math.vectors.simd ;
|
||||
SIMD: float-4
|
||||
IN: simd-demo
|
||||
|
||||
: interpolate ( v a b -- w )
|
||||
{ float-4 float-4 float-4 } declare
|
||||
|
@ -96,6 +106,8 @@ $nl
|
|||
{ $code
|
||||
"""USING: compiler.tree.debugger hints
|
||||
math.vectors math.vectors.simd ;
|
||||
SIMD: float-4
|
||||
IN: simd-demo
|
||||
|
||||
: interpolate ( v a b -- w )
|
||||
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
|
||||
|
@ -110,6 +122,7 @@ $nl
|
|||
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
|
||||
{ $code
|
||||
"""USING: compiler.tree.debugger math.vectors math.vectors.simd ;
|
||||
SIMD: float-4
|
||||
IN: simd-demo
|
||||
|
||||
STRUCT: actor
|
||||
|
@ -150,106 +163,37 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
|
|||
}
|
||||
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
|
||||
$nl
|
||||
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
|
||||
{ $subsection (simd-v+) }
|
||||
{ $subsection (simd-v-) }
|
||||
{ $subsection (simd-v/) }
|
||||
{ $subsection (simd-vmin) }
|
||||
{ $subsection (simd-vmax) }
|
||||
{ $subsection (simd-vsqrt) }
|
||||
{ $subsection (simd-sum) }
|
||||
{ $subsection (simd-broadcast) }
|
||||
{ $subsection (simd-gather-2) }
|
||||
{ $subsection (simd-gather-4) }
|
||||
"It is best to avoid calling SIMD primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
|
||||
$nl
|
||||
"There are two primitives which are used to implement accessing SIMD vector fields of " { $link "classes.struct" } ":"
|
||||
{ $subsection alien-vector }
|
||||
{ $subsection set-alien-vector }
|
||||
"For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
|
||||
"Struct classes may contain fields which store SIMD data; use one of the following C type names:"
|
||||
{ $code
|
||||
"""float-4
|
||||
double-2
|
||||
float-8
|
||||
double-4""" }
|
||||
"Passing SIMD data as function parameters is not yet supported." ;
|
||||
"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
|
||||
$nl
|
||||
"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
|
||||
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
|
||||
$nl
|
||||
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
||||
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
|
||||
{ $subsection "math.vectors.simd.intro" }
|
||||
{ $subsection "math.vectors.simd.types" }
|
||||
{ $subsection "math.vectors.simd.words" }
|
||||
{ $subsection "math.vectors.simd.support" }
|
||||
{ $subsection "math.vectors.simd.accuracy" }
|
||||
{ $subsection "math.vectors.simd.efficiency" }
|
||||
{ $subsection "math.vectors.simd.alien" }
|
||||
{ $subsection "math.vectors.simd.intrinsics" } ;
|
||||
|
||||
! ! ! float-4
|
||||
|
||||
HELP: float-4
|
||||
{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ;
|
||||
|
||||
HELP: float-4-with
|
||||
{ $values { "x" float } { "simd-array" float-4 } }
|
||||
{ $description "Creates a new vector with all four components equal to a scalar." } ;
|
||||
|
||||
HELP: float-4-boa
|
||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } }
|
||||
{ $description "Creates a new vector from four scalar components." } ;
|
||||
|
||||
HELP: float-4{
|
||||
{ $syntax "float-4{ a b c d }" }
|
||||
{ $description "Literal syntax for a " { $link float-4 } "." } ;
|
||||
|
||||
! ! ! double-2
|
||||
|
||||
HELP: double-2
|
||||
{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ;
|
||||
|
||||
HELP: double-2-with
|
||||
{ $values { "x" float } { "simd-array" double-2 } }
|
||||
{ $description "Creates a new vector with both components equal to a scalar." } ;
|
||||
|
||||
HELP: double-2-boa
|
||||
{ $values { "a" float } { "b" float } { "simd-array" double-2 } }
|
||||
{ $description "Creates a new vector from two scalar components." } ;
|
||||
|
||||
HELP: double-2{
|
||||
{ $syntax "double-2{ a b }" }
|
||||
{ $description "Literal syntax for a " { $link double-2 } "." } ;
|
||||
|
||||
! ! ! float-8
|
||||
|
||||
HELP: float-8
|
||||
{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ;
|
||||
|
||||
HELP: float-8-with
|
||||
{ $values { "x" float } { "simd-array" float-8 } }
|
||||
{ $description "Creates a new vector with all eight components equal to a scalar." } ;
|
||||
|
||||
HELP: float-8-boa
|
||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } }
|
||||
{ $description "Creates a new vector from eight scalar components." } ;
|
||||
|
||||
HELP: float-8{
|
||||
{ $syntax "float-8{ a b c d e f g h }" }
|
||||
{ $description "Literal syntax for a " { $link float-8 } "." } ;
|
||||
|
||||
! ! ! double-4
|
||||
|
||||
HELP: double-4
|
||||
{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ;
|
||||
|
||||
HELP: double-4-with
|
||||
{ $values { "x" float } { "simd-array" double-4 } }
|
||||
{ $description "Creates a new vector with all four components equal to a scalar." } ;
|
||||
|
||||
HELP: double-4-boa
|
||||
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } }
|
||||
{ $description "Creates a new vector from four scalar components." } ;
|
||||
|
||||
HELP: double-4{
|
||||
{ $syntax "double-4{ a b c d }" }
|
||||
{ $description "Literal syntax for a " { $link double-4 } "." } ;
|
||||
HELP: SIMD:
|
||||
{ $syntax "SIMD: type-length" }
|
||||
{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
|
||||
{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
|
||||
|
||||
ABOUT: "math.vectors.simd"
|
||||
|
|
|
@ -1,8 +1,30 @@
|
|||
USING: accessors arrays classes compiler compiler.tree.debugger
|
||||
effects fry io kernel kernel.private math math.functions
|
||||
math.private math.vectors math.vectors.simd
|
||||
math.vectors.simd.private prettyprint random sequences system
|
||||
tools.test vocabs assocs compiler.cfg.debugger words
|
||||
locals math.vectors.specialization combinators cpu.architecture
|
||||
math.vectors.simd.intrinsics namespaces byte-arrays alien
|
||||
specialized-arrays classes.struct ;
|
||||
FROM: alien.c-types => c-type-boxed-class ;
|
||||
SPECIALIZED-ARRAY: float
|
||||
SIMD: char-16
|
||||
SIMD: uchar-16
|
||||
SIMD: char-32
|
||||
SIMD: uchar-32
|
||||
SIMD: short-8
|
||||
SIMD: ushort-8
|
||||
SIMD: short-16
|
||||
SIMD: ushort-16
|
||||
SIMD: int-4
|
||||
SIMD: uint-4
|
||||
SIMD: int-8
|
||||
SIMD: uint-8
|
||||
SIMD: float-4
|
||||
SIMD: float-8
|
||||
SIMD: double-2
|
||||
SIMD: double-4
|
||||
IN: math.vectors.simd.tests
|
||||
USING: math math.vectors.simd math.vectors.simd.private
|
||||
math.vectors math.functions math.private kernel.private compiler
|
||||
sequences tools.test compiler.tree.debugger accessors kernel
|
||||
system ;
|
||||
|
||||
[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
|
||||
|
||||
|
@ -12,344 +34,6 @@ system ;
|
|||
|
||||
[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
|
||||
|
||||
[ float-4{ 12 12 12 12 } ] [
|
||||
12 [ float-4-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 1 2 3 4 } ] [
|
||||
1 2 3 4 [ float-4-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 11 22 33 44 } ] [
|
||||
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
|
||||
[ { float-4 float-4 } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ -9 -18 -27 -36 } ] [
|
||||
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
|
||||
[ { float-4 float-4 } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 10 40 90 160 } ] [
|
||||
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
|
||||
[ { float-4 float-4 } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 10 100 1000 10000 } ] [
|
||||
float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
|
||||
[ { float-4 float-4 } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ -10 -20 -30 -40 } ] [
|
||||
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
|
||||
[ { float-4 float-4 } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 10 20 30 40 } ] [
|
||||
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
|
||||
[ { float-4 float-4 } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 10.0 ] [
|
||||
float-4{ 1 2 3 4 }
|
||||
[ { float-4 } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 13.0 ] [
|
||||
float-4{ 1 2 3 4 }
|
||||
[ { float-4 } declare sum 3.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 8.0 ] [
|
||||
float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
|
||||
[ { float-4 float-4 } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 5 10 15 20 } ] [
|
||||
5.0 float-4{ 1 2 3 4 }
|
||||
[ { float float-4 } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 5 10 15 20 } ] [
|
||||
float-4{ 1 2 3 4 } 5.0
|
||||
[ { float float-4 } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 10 5 2 5 } ] [
|
||||
10.0 float-4{ 1 2 5 2 }
|
||||
[ { float float-4 } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 0.5 1 1.5 2 } ] [
|
||||
float-4{ 1 2 3 4 } 2
|
||||
[ { float float-4 } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-4{ 1 0 0 0 } ] [
|
||||
float-4{ 10 0 0 0 }
|
||||
[ { float-4 } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 30.0 ] [
|
||||
float-4{ 1 2 3 4 }
|
||||
[ { float-4 } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
float-4{ 1 0 0 0 }
|
||||
float-4{ 0 1 0 0 }
|
||||
[ { float-4 float-4 } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 12 12 } ] [
|
||||
12 [ double-2-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 1 2 } ] [
|
||||
1 2 [ double-2-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 11 22 } ] [
|
||||
double-2{ 1 2 } double-2{ 10 20 }
|
||||
[ { double-2 double-2 } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ -9 -18 } ] [
|
||||
double-2{ 1 2 } double-2{ 10 20 }
|
||||
[ { double-2 double-2 } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 10 40 } ] [
|
||||
double-2{ 1 2 } double-2{ 10 20 }
|
||||
[ { double-2 double-2 } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 10 100 } ] [
|
||||
double-2{ 100 2000 } double-2{ 10 20 }
|
||||
[ { double-2 double-2 } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ -10 -20 } ] [
|
||||
double-2{ -10 20 } double-2{ 10 -20 }
|
||||
[ { double-2 double-2 } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 10 20 } ] [
|
||||
double-2{ -10 20 } double-2{ 10 -20 }
|
||||
[ { double-2 double-2 } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 3.0 ] [
|
||||
double-2{ 1 2 }
|
||||
[ { double-2 } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 7.0 ] [
|
||||
double-2{ 1 2 }
|
||||
[ { double-2 } declare sum 4.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 16.0 ] [
|
||||
double-2{ 1 2 } double-2{ 2 7 }
|
||||
[ { double-2 double-2 } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 5 10 } ] [
|
||||
5.0 double-2{ 1 2 }
|
||||
[ { float double-2 } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 5 10 } ] [
|
||||
double-2{ 1 2 } 5.0
|
||||
[ { float double-2 } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 10 5 } ] [
|
||||
10.0 double-2{ 1 2 }
|
||||
[ { float double-2 } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 0.5 1 } ] [
|
||||
double-2{ 1 2 } 2
|
||||
[ { float double-2 } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-2{ 0 0 } ] [ double-2 new ] unit-test
|
||||
|
||||
[ double-2{ 1 0 } ] [
|
||||
double-2{ 10 0 }
|
||||
[ { double-2 } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 5.0 ] [
|
||||
double-2{ 1 2 }
|
||||
[ { double-2 } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
double-2{ 1 0 }
|
||||
double-2{ 0 1 }
|
||||
[ { double-2 double-2 } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
|
||||
|
||||
[ double-4{ 1 2 3 4 } ] [
|
||||
1 2 3 4 double-4-boa
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 1 1 1 1 } ] [
|
||||
1 double-4-with
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 0 1 2 3 } ] [
|
||||
1 double-4-with [ * ] map-index
|
||||
] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
|
||||
|
||||
[ double-4{ 12 12 12 12 } ] [
|
||||
12 [ double-4-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 1 2 3 4 } ] [
|
||||
1 2 3 4 [ double-4-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 11 22 33 44 } ] [
|
||||
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
|
||||
[ { double-4 double-4 } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ -9 -18 -27 -36 } ] [
|
||||
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
|
||||
[ { double-4 double-4 } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 10 40 90 160 } ] [
|
||||
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
|
||||
[ { double-4 double-4 } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 10 100 1000 10000 } ] [
|
||||
double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
|
||||
[ { double-4 double-4 } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ -10 -20 -30 -40 } ] [
|
||||
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
|
||||
[ { double-4 double-4 } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 10 20 30 40 } ] [
|
||||
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
|
||||
[ { double-4 double-4 } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 10.0 ] [
|
||||
double-4{ 1 2 3 4 }
|
||||
[ { double-4 } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 13.0 ] [
|
||||
double-4{ 1 2 3 4 }
|
||||
[ { double-4 } declare sum 3.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 8.0 ] [
|
||||
double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
|
||||
[ { double-4 double-4 } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 5 10 15 20 } ] [
|
||||
5.0 double-4{ 1 2 3 4 }
|
||||
[ { float double-4 } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 5 10 15 20 } ] [
|
||||
double-4{ 1 2 3 4 } 5.0
|
||||
[ { float double-4 } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 10 5 2 5 } ] [
|
||||
10.0 double-4{ 1 2 5 2 }
|
||||
[ { float double-4 } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 0.5 1 1.5 2 } ] [
|
||||
double-4{ 1 2 3 4 } 2
|
||||
[ { float double-4 } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ double-4{ 1 0 0 0 } ] [
|
||||
double-4{ 10 0 0 0 }
|
||||
[ { double-4 } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 30.0 ] [
|
||||
double-4{ 1 2 3 4 }
|
||||
[ { double-4 } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
double-4{ 1 0 0 0 }
|
||||
double-4{ 0 1 0 0 }
|
||||
[ { double-4 double-4 } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
||||
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
|
||||
|
||||
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
|
||||
|
||||
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
|
||||
|
||||
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
|
||||
|
||||
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
|
||||
|
||||
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
|
||||
|
||||
[ float-8{ 3 6 9 12 15 18 21 24 } ] [
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
float-8{ 2 4 6 8 10 12 14 16 }
|
||||
[ { float-8 float-8 } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
float-8{ 2 4 6 8 10 12 14 16 }
|
||||
[ { float-8 float-8 } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
|
||||
-0.5
|
||||
float-8{ 2 4 6 8 10 12 14 16 }
|
||||
[ { float float-8 } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
|
||||
float-8{ 2 4 6 8 10 12 14 16 }
|
||||
-0.5
|
||||
[ { float-8 float } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-8{ 256 128 64 32 16 8 4 2 } ] [
|
||||
256.0
|
||||
float-8{ 1 2 4 8 16 32 64 128 }
|
||||
[ { float float-8 } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
|
||||
float-8{ 2 4 6 8 10 12 14 16 }
|
||||
-2.0
|
||||
[ { float-8 float } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
! Test puns; only on x86
|
||||
cpu x86? [
|
||||
[ double-2{ 4 1024 } ] [
|
||||
|
@ -362,3 +46,201 @@ cpu x86? [
|
|||
[ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
|
||||
] unit-test
|
||||
] when
|
||||
|
||||
! Fuzz testing
|
||||
CONSTANT: simd-classes
|
||||
{
|
||||
char-16
|
||||
uchar-16
|
||||
char-32
|
||||
uchar-32
|
||||
short-8
|
||||
ushort-8
|
||||
short-16
|
||||
ushort-16
|
||||
int-4
|
||||
uint-4
|
||||
int-8
|
||||
uint-8
|
||||
float-4
|
||||
float-8
|
||||
double-2
|
||||
double-4
|
||||
}
|
||||
|
||||
: with-ctors ( -- seq )
|
||||
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
|
||||
|
||||
: boa-ctors ( -- seq )
|
||||
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
|
||||
|
||||
: check-optimizer ( seq inputs quot eq-quot -- )
|
||||
'[
|
||||
@
|
||||
[ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
|
||||
[ [ call ] dip call ]
|
||||
[ [ call ] dip compile-call ] 2tri @ not
|
||||
] filter ; inline
|
||||
|
||||
"== Checking -new constructors" print
|
||||
|
||||
[ { } ] [
|
||||
simd-classes [ [ [ ] ] dip '[ _ new ] ] [ = ] check-optimizer
|
||||
] unit-test
|
||||
|
||||
[ { } ] [
|
||||
simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
|
||||
] unit-test
|
||||
|
||||
"== Checking -with constructors" print
|
||||
|
||||
[ { } ] [
|
||||
with-ctors [
|
||||
[ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
|
||||
] [ = ] check-optimizer
|
||||
] unit-test
|
||||
|
||||
"== Checking -boa constructors" print
|
||||
|
||||
[ { } ] [
|
||||
boa-ctors [
|
||||
dup stack-effect in>> length
|
||||
[ nip [ 1000 random ] [ ] replicate-as ]
|
||||
[ fixnum <array> swap '[ _ declare _ execute ] ]
|
||||
2bi
|
||||
] [ = ] check-optimizer
|
||||
] unit-test
|
||||
|
||||
"== Checking vector operations" print
|
||||
|
||||
: random-vector ( class -- vec )
|
||||
new [ drop 1000 random ] map ;
|
||||
|
||||
:: check-vector-op ( word inputs class elt-class -- inputs quot )
|
||||
inputs [
|
||||
[
|
||||
{
|
||||
{ +vector+ [ class random-vector ] }
|
||||
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
|
||||
} case
|
||||
] [ ] map-as
|
||||
] [
|
||||
[
|
||||
{
|
||||
{ +vector+ [ class ] }
|
||||
{ +scalar+ [ elt-class ] }
|
||||
} case
|
||||
] map
|
||||
] bi
|
||||
word '[ _ declare _ execute ] ;
|
||||
|
||||
: remove-float-words ( alist -- alist' )
|
||||
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
|
||||
|
||||
: ops-to-check ( elt-class -- alist )
|
||||
[ vector-words >alist ] dip
|
||||
float = [ remove-float-words ] unless ;
|
||||
|
||||
: check-vector-ops ( class elt-class compare-quot -- )
|
||||
[
|
||||
[ nip ops-to-check ] 2keep
|
||||
'[ first2 inputs _ _ check-vector-op ]
|
||||
] dip check-optimizer ; inline
|
||||
|
||||
: approx= ( x y -- ? )
|
||||
{
|
||||
{ [ 2dup [ float? ] both? ] [ -1.e8 ~ ] }
|
||||
{ [ 2dup [ sequence? ] both? ] [
|
||||
[
|
||||
{
|
||||
{ [ 2dup [ fp-nan? ] both? ] [ 2drop t ] }
|
||||
{ [ 2dup [ fp-nan? ] either? not ] [ -1.e8 ~ ] }
|
||||
} cond
|
||||
] 2all?
|
||||
] }
|
||||
} cond ;
|
||||
|
||||
: simd-classes&reps ( -- alist )
|
||||
simd-classes [
|
||||
{
|
||||
{ [ dup name>> "float" head? ] [ float [ approx= ] ] }
|
||||
{ [ dup name>> "double" tail? ] [ float [ = ] ] }
|
||||
[ fixnum [ = ] ]
|
||||
} cond 3array
|
||||
] map ;
|
||||
|
||||
simd-classes&reps [
|
||||
[ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
|
||||
] each
|
||||
|
||||
! Other regressions
|
||||
[ 8000000 ] [
|
||||
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
|
||||
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
|
||||
] unit-test
|
||||
|
||||
! Vector alien intrinsics
|
||||
[ float-4{ 1 2 3 4 } ] [
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
underlying>> 0 float-4-rep alien-vector
|
||||
] compile-call float-4 boa
|
||||
] unit-test
|
||||
|
||||
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
|
||||
16 [ 1 ] B{ } replicate-as 16 <byte-array>
|
||||
[
|
||||
0 [
|
||||
{ byte-array c-ptr fixnum } declare
|
||||
float-4-rep set-alien-vector
|
||||
] compile-call
|
||||
] keep
|
||||
] unit-test
|
||||
|
||||
[ float-array{ 1 2 3 4 } ] [
|
||||
[
|
||||
float-array{ 1 2 3 4 } underlying>>
|
||||
float-array{ 4 3 2 1 } clone
|
||||
[ underlying>> 0 float-4-rep set-alien-vector ] keep
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
STRUCT: simd-struct
|
||||
{ x float-4 }
|
||||
{ y double-2 }
|
||||
{ z double-4 }
|
||||
{ w float-8 } ;
|
||||
|
||||
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
[
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
[ ] [ char-16 new 1array stack. ] unit-test
|
||||
|
|
|
@ -1,185 +1,32 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types byte-arrays cpu.architecture
|
||||
kernel math math.functions math.vectors
|
||||
math.vectors.simd.functor math.vectors.simd.intrinsics
|
||||
math.vectors.specialization parser prettyprint.custom sequences
|
||||
sequences.private locals assocs words fry ;
|
||||
FROM: alien.c-types => float ;
|
||||
QUALIFIED-WITH: math m
|
||||
USING: alien.c-types combinators fry kernel lexer math math.parser
|
||||
math.vectors.simd.functor sequences splitting vocabs.generated
|
||||
vocabs.loader vocabs.parser words ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
<<
|
||||
|
||||
DEFER: float-4
|
||||
DEFER: double-2
|
||||
DEFER: float-8
|
||||
DEFER: double-4
|
||||
|
||||
"double" define-simd-128
|
||||
"float" define-simd-128
|
||||
"double" define-simd-256
|
||||
"float" define-simd-256
|
||||
|
||||
>>
|
||||
|
||||
: float-4-with ( x -- simd-array )
|
||||
[ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
|
||||
|
||||
: float-4-boa ( a b c d -- simd-array )
|
||||
\ float-4 new 4sequence ;
|
||||
|
||||
: double-2-with ( x -- simd-array )
|
||||
[ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
|
||||
|
||||
: double-2-boa ( a b -- simd-array )
|
||||
\ double-2 new 2sequence ;
|
||||
|
||||
! More efficient expansions for the above, used when SIMD is
|
||||
! actually available.
|
||||
|
||||
<<
|
||||
|
||||
\ float-4-with [
|
||||
drop
|
||||
\ (simd-broadcast) "intrinsic" word-prop [
|
||||
[ >float float-4-rep (simd-broadcast) \ float-4 boa ]
|
||||
] [ \ float-4-with def>> ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
\ float-4-boa [
|
||||
drop
|
||||
\ (simd-gather-4) "intrinsic" word-prop [
|
||||
[| a b c d |
|
||||
a >float b >float c >float d >float
|
||||
float-4-rep (simd-gather-4) \ float-4 boa
|
||||
]
|
||||
] [ \ float-4-boa def>> ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
\ double-2-with [
|
||||
drop
|
||||
\ (simd-broadcast) "intrinsic" word-prop [
|
||||
[ >float double-2-rep (simd-broadcast) \ double-2 boa ]
|
||||
] [ \ double-2-with def>> ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
\ double-2-boa [
|
||||
drop
|
||||
\ (simd-gather-4) "intrinsic" word-prop [
|
||||
[ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
|
||||
] [ \ double-2-boa def>> ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
>>
|
||||
|
||||
: float-8-with ( x -- simd-array )
|
||||
[ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
|
||||
\ float-8 boa ; inline
|
||||
|
||||
:: float-8-boa ( a b c d e f g h -- simd-array )
|
||||
a b c d float-4-boa
|
||||
e f g h float-4-boa
|
||||
[ underlying>> ] bi@
|
||||
\ float-8 boa ; inline
|
||||
|
||||
: double-4-with ( x -- simd-array )
|
||||
[ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
|
||||
\ double-4 boa ; inline
|
||||
|
||||
:: double-4-boa ( a b c d -- simd-array )
|
||||
a b double-2-boa
|
||||
c d double-2-boa
|
||||
[ underlying>> ] bi@
|
||||
\ double-4 boa ; inline
|
||||
|
||||
<<
|
||||
ERROR: bad-vector-size bits ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! Filter out operations that are not available, eg horizontal adds
|
||||
! on SSE2. Fallback code in math.vectors is used in that case.
|
||||
: simd-vocab ( type -- vocab )
|
||||
"math.vectors.simd.instances." prepend ;
|
||||
|
||||
: supported-simd-ops ( assoc -- assoc' )
|
||||
{
|
||||
{ v+ (simd-v+) }
|
||||
{ v- (simd-v-) }
|
||||
{ v* (simd-v*) }
|
||||
{ v/ (simd-v/) }
|
||||
{ vmin (simd-vmin) }
|
||||
{ vmax (simd-vmax) }
|
||||
{ sum (simd-sum) }
|
||||
} [ nip "intrinsic" word-prop ] assoc-filter
|
||||
'[ drop _ key? ] assoc-filter ;
|
||||
|
||||
! Some SIMD operations are defined in terms of others.
|
||||
|
||||
:: high-level-ops ( ctor -- assoc )
|
||||
{
|
||||
{ vneg [ [ dup v- ] keep v- ] }
|
||||
{ v. [ v* sum ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
{ v+n [ ctor execute v+ ] }
|
||||
{ n-v [ [ ctor execute ] dip v- ] }
|
||||
{ v-n [ ctor execute v- ] }
|
||||
{ n*v [ [ ctor execute ] dip v* ] }
|
||||
{ v*n [ ctor execute v* ] }
|
||||
{ n/v [ [ ctor execute ] dip v/ ] }
|
||||
{ v/n [ ctor execute v/ ] }
|
||||
{ norm-sq [ dup v. assert-positive ] }
|
||||
{ norm [ norm-sq sqrt ] }
|
||||
{ normalize [ dup norm v/n ] }
|
||||
{ distance [ v- norm ] }
|
||||
} ;
|
||||
|
||||
:: simd-vector-words ( class ctor elt-type assoc -- )
|
||||
class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
|
||||
specialize-vector-words ;
|
||||
: parse-simd-name ( string -- c-type quot )
|
||||
"-" split1
|
||||
[ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
|
||||
* 8 * {
|
||||
{ 128 [ [ define-simd-128 ] ] }
|
||||
{ 256 [ [ define-simd-256 ] ] }
|
||||
[ bad-vector-size ]
|
||||
} case ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
\ float-4 \ float-4-with m:float H{
|
||||
{ v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] float-4-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] float-4-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] float-4-v->n-op ] }
|
||||
} simd-vector-words
|
||||
: define-simd-vocab ( type -- vocab )
|
||||
[ simd-vocab ]
|
||||
[ '[ _ parse-simd-name call( type -- ) ] ] bi
|
||||
generate-vocab ;
|
||||
|
||||
\ double-2 \ double-2-with m:float H{
|
||||
{ v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] double-2-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] double-2-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] double-2-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
\ float-8 \ float-8-with m:float H{
|
||||
{ v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] float-8-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] float-8-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
\ double-4 \ double-4-with m:float H{
|
||||
{ v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] double-4-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] double-4-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
|
||||
{ sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
>>
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
"math.vectors.simd.alien" require
|
||||
SYNTAX: SIMD:
|
||||
scan define-simd-vocab use-vocab ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Single-instruction-multiple-data parallel vector operations
|
|
@ -53,10 +53,14 @@ H{
|
|||
{ norm-sq { +vector+ -> +nonnegative+ } }
|
||||
{ normalize { +vector+ -> +vector+ } }
|
||||
{ v* { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs* { +vector+ +vector+ -> +vector+ } }
|
||||
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v- { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v. { +vector+ +vector+ -> +scalar+ } }
|
||||
{ v/ { +vector+ +vector+ -> +vector+ } }
|
||||
|
@ -68,6 +72,11 @@ H{
|
|||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vtruncate { +vector+ -> +vector+ } }
|
||||
{ sum { +vector+ -> +scalar+ } }
|
||||
{ vabs { +vector+ -> +vector+ } }
|
||||
{ vsqrt { +vector+ -> +vector+ } }
|
||||
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||
}
|
||||
|
||||
PREDICATE: vector-word < word vector-words key? ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: help.markup help.syntax math sequences ;
|
||||
USING: help.markup help.syntax math math.functions sequences ;
|
||||
IN: math.vectors
|
||||
|
||||
ARTICLE: "math-vectors" "Vector arithmetic"
|
||||
|
@ -14,18 +14,46 @@ $nl
|
|||
{ $subsection n+v }
|
||||
{ $subsection v-n }
|
||||
{ $subsection n-v }
|
||||
"Combining two vectors to form another vector with " { $link 2map } ":"
|
||||
"Vector unary operations:"
|
||||
{ $subsection vneg }
|
||||
{ $subsection vabs }
|
||||
{ $subsection vsqrt }
|
||||
{ $subsection vfloor }
|
||||
{ $subsection vceiling }
|
||||
{ $subsection vtruncate }
|
||||
"Vector/vector binary operations:"
|
||||
{ $subsection v+ }
|
||||
{ $subsection v- }
|
||||
{ $subsection v+- }
|
||||
{ $subsection v* }
|
||||
{ $subsection v/ }
|
||||
"Saturated arithmetic (only on " { $link "specialized-arrays" } "):"
|
||||
{ $subsection vs+ }
|
||||
{ $subsection vs- }
|
||||
{ $subsection vs* }
|
||||
"Comparisons:"
|
||||
{ $subsection vmax }
|
||||
{ $subsection vmin }
|
||||
"Bitwise operations:"
|
||||
{ $subsection vbitand }
|
||||
{ $subsection vbitor }
|
||||
{ $subsection vbitxor }
|
||||
"Inner product and norm:"
|
||||
{ $subsection v. }
|
||||
{ $subsection norm }
|
||||
{ $subsection norm-sq }
|
||||
{ $subsection normalize } ;
|
||||
{ $subsection normalize }
|
||||
"Comparing vectors:"
|
||||
{ $subsection distance }
|
||||
{ $subsection v~ }
|
||||
"Other functions:"
|
||||
{ $subsection vsupremum }
|
||||
{ $subsection vinfimum }
|
||||
{ $subsection trilerp }
|
||||
{ $subsection bilerp }
|
||||
{ $subsection vlerp }
|
||||
{ $subsection vnlerp }
|
||||
{ $subsection vbilerp } ;
|
||||
|
||||
ABOUT: "math-vectors"
|
||||
|
||||
|
@ -33,6 +61,43 @@ HELP: vneg
|
|||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Negates each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: vabs
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of non-negative real numbers" } }
|
||||
{ $description "Takes the absolute value of each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: vsqrt
|
||||
{ $values { "u" "a sequence of non-negative real numbers" } { "v" "a sequence of non-negative real numbers" } }
|
||||
{ $description "Takes the square root of each element of " { $snippet "u" } "." }
|
||||
{ $warning "For performance reasons, this does not work with negative inputs, unlike " { $link sqrt } "." } ;
|
||||
|
||||
HELP: vfloor
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
|
||||
{ $description "Takes the " { $link floor } " of each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: vceiling
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
|
||||
{ $description "Takes the " { $link ceiling } " of each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: vtruncate
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
|
||||
{ $description "Truncates each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: n+v
|
||||
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: v+n
|
||||
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: n-v
|
||||
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ;
|
||||
|
||||
HELP: v-n
|
||||
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ;
|
||||
|
||||
HELP: n*v
|
||||
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
|
||||
|
@ -43,11 +108,13 @@ HELP: v*n
|
|||
|
||||
HELP: n/v
|
||||
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } ;
|
||||
{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." }
|
||||
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
|
||||
|
||||
HELP: v/n
|
||||
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
|
||||
{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." }
|
||||
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
|
||||
|
||||
HELP: v+
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
|
@ -57,6 +124,17 @@ HELP: v-
|
|||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
|
||||
|
||||
HELP: v+-
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
|
||||
{ $examples
|
||||
{ $example
|
||||
"USING: math.vectors prettyprint ;"
|
||||
"{ 1 2 3 } { 2 3 2 } v+- ."
|
||||
"{ -1 5 1 }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: [v-]
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;
|
||||
|
@ -68,7 +146,7 @@ HELP: v*
|
|||
HELP: v/
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Divides " { $snippet "u" } " by " { $snippet "v" } " component-wise." }
|
||||
{ $errors "Throws an error if an integer division by zero occurs." } ;
|
||||
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
|
||||
|
||||
HELP: vmax
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
|
@ -85,9 +163,52 @@ HELP: v.
|
|||
{ $description "Computes the real-valued dot product." }
|
||||
{ $notes
|
||||
"This word can also take complex number sequences as input, however mathematically it will compute the wrong result. The complex-valued dot product is defined differently:"
|
||||
{ $snippet "0 [ conjugate * + ] 2reduce" }
|
||||
{ $code "0 [ conjugate * + ] 2reduce" }
|
||||
} ;
|
||||
|
||||
HELP: vs+
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
|
||||
{ $examples
|
||||
"With saturation:"
|
||||
{ $example
|
||||
"USING: math.vectors prettyprint specialized-arrays ;"
|
||||
"SPECIALIZED-ARRAY: uchar"
|
||||
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
|
||||
"uchar-array{ 170 255 220 }"
|
||||
}
|
||||
"Without saturation:"
|
||||
{ $example
|
||||
"USING: math.vectors prettyprint specialized-arrays ;"
|
||||
"SPECIALIZED-ARRAY: uchar"
|
||||
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
|
||||
"uchar-array{ 170 14 220 }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: vs-
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
|
||||
|
||||
HELP: vs*
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
|
||||
|
||||
HELP: vbitand
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
|
||||
{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
|
||||
|
||||
HELP: vbitor
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
|
||||
{ $notes "Unlike " { $link bitor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
|
||||
|
||||
HELP: vbitxor
|
||||
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
|
||||
{ $description "Takes the bitwise exclusive or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
|
||||
{ $notes "Unlike " { $link bitxor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
|
||||
|
||||
HELP: norm-sq
|
||||
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||
{ $description "Computes the squared length of a mathematical vector." } ;
|
||||
|
@ -100,6 +221,10 @@ HELP: normalize
|
|||
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
|
||||
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
|
||||
|
||||
HELP: distance
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||
{ $description "Outputs the Euclidean distance between two vectors." } ;
|
||||
|
||||
HELP: set-axis
|
||||
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
|
||||
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }
|
||||
|
@ -108,3 +233,5 @@ HELP: set-axis
|
|||
{ 2map v+ v- v* v/ } related-words
|
||||
|
||||
{ 2reduce v. } related-words
|
||||
|
||||
{ vs+ vs- vs* } related-words
|
||||
|
|
|
@ -17,4 +17,6 @@ USING: math.vectors tools.test ;
|
|||
|
||||
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
|
||||
|
||||
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
|
||||
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
|
||||
|
||||
[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test
|
|
@ -1,9 +1,12 @@
|
|||
! Copyright (C) 2005, 2008 Slava Pestov.
|
||||
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays kernel sequences math math.functions hints
|
||||
math.order ;
|
||||
USING: arrays alien.c-types kernel sequences math math.functions
|
||||
hints math.order math.libm fry combinators ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors
|
||||
|
||||
GENERIC: element-type ( obj -- c-type )
|
||||
|
||||
: vneg ( u -- v ) [ neg ] map ;
|
||||
|
||||
: v+n ( u n -- v ) [ + ] curry map ;
|
||||
|
@ -24,9 +27,43 @@ IN: math.vectors
|
|||
: vmax ( u v -- w ) [ max ] 2map ;
|
||||
: vmin ( u v -- w ) [ min ] 2map ;
|
||||
|
||||
: vfloor ( v -- _v_ ) [ floor ] map ;
|
||||
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
||||
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
||||
: v+- ( u v -- w )
|
||||
[ t ] 2dip
|
||||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||
nip ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: 2saturate-map ( u v quot -- w )
|
||||
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
||||
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
||||
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
||||
|
||||
: vabs ( u -- v ) [ abs ] map ;
|
||||
: vsqrt ( u -- v ) [ >float fsqrt ] map ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: fp-bitwise-op ( x y seq quot -- z )
|
||||
swap element-type {
|
||||
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
|
||||
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
|
||||
[ drop call ]
|
||||
} case ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
|
||||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||
|
||||
: vfloor ( u -- v ) [ floor ] map ;
|
||||
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||
|
||||
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ;
|
||||
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ;
|
||||
|
|
|
@ -173,6 +173,7 @@ M: tuple pprint*
|
|||
] when ;
|
||||
|
||||
: pprint-elements ( seq -- )
|
||||
>array
|
||||
do-length-limit
|
||||
[ [ pprint* ] each ] dip
|
||||
[ "~" swap number>string " more~" 3append text ] when* ;
|
||||
|
|
|
@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors
|
|||
kernel arrays combinators compiler compiler.units classes.struct
|
||||
combinators.smart compiler.tree.debugger math libc destructors
|
||||
sequences.private multiline eval words vocabs namespaces
|
||||
assocs prettyprint alien.data ;
|
||||
assocs prettyprint alien.data math.vectors ;
|
||||
FROM: alien.c-types => float ;
|
||||
|
||||
SPECIALIZED-ARRAY: int
|
||||
|
@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort
|
|||
SPECIALIZED-ARRAY: char
|
||||
SPECIALIZED-ARRAY: uint
|
||||
SPECIALIZED-ARRAY: float
|
||||
SPECIALIZED-ARRAY: ulonglong
|
||||
|
||||
[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
|
||||
|
||||
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien alien.c-types alien.data alien.parser assocs
|
||||
byte-arrays classes compiler.units functors kernel lexer libc math
|
||||
math.vectors.specialization namespaces parser prettyprint.custom
|
||||
sequences sequences.private strings summary vocabs vocabs.loader
|
||||
vocabs.parser words fry combinators ;
|
||||
USING: accessors alien alien.c-types alien.data alien.parser
|
||||
assocs byte-arrays classes compiler.units functors kernel lexer
|
||||
libc math math.vectors math.vectors.specialization namespaces
|
||||
parser prettyprint.custom sequences sequences.private strings
|
||||
summary vocabs vocabs.loader vocabs.parser vocabs.generated
|
||||
words fry combinators present ;
|
||||
IN: specialized-arrays
|
||||
|
||||
MIXIN: specialized-array
|
||||
|
@ -53,14 +54,14 @@ TUPLE: A
|
|||
|
||||
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
|
||||
|
||||
: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline
|
||||
: <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
|
||||
|
||||
: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline
|
||||
: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
|
||||
|
||||
: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline
|
||||
: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
|
||||
|
||||
: byte-array>A ( byte-array -- specialized-array )
|
||||
dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless
|
||||
dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
|
||||
<direct-A> ; inline
|
||||
|
||||
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
|
||||
|
@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ;
|
|||
|
||||
M: A resize
|
||||
[
|
||||
[ T heap-size * ] [ underlying>> ] bi*
|
||||
[ \ T heap-size * ] [ underlying>> ] bi*
|
||||
resize-byte-array
|
||||
] [ drop ] 2bi
|
||||
<direct-A> ; inline
|
||||
|
||||
M: A byte-length length T heap-size * ; inline
|
||||
M: A byte-length length \ T heap-size * ; inline
|
||||
|
||||
M: A element-type drop \ T ; inline
|
||||
|
||||
M: A direct-array-syntax drop \ A@ ;
|
||||
|
||||
|
@ -116,24 +119,15 @@ M: word (underlying-type) "c-type" word-prop ;
|
|||
} cond ;
|
||||
|
||||
: underlying-type-name ( c-type -- name )
|
||||
underlying-type dup word? [ name>> ] when ;
|
||||
underlying-type present ;
|
||||
|
||||
: specialized-array-vocab ( c-type -- vocab )
|
||||
"specialized-arrays.instances." prepend ;
|
||||
present "specialized-arrays.instances." prepend ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: generate-vocab ( vocab-name quot -- vocab )
|
||||
[ dup vocab [ ] ] dip '[
|
||||
[
|
||||
[
|
||||
_ with-current-vocab
|
||||
] with-compilation-unit
|
||||
] keep
|
||||
] ?if ; inline
|
||||
|
||||
: define-array-vocab ( type -- vocab )
|
||||
underlying-type-name
|
||||
underlying-type
|
||||
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
|
||||
generate-vocab ;
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@ ARTICLE: "specialized-vector-words" "Specialized vector words"
|
|||
}
|
||||
"Behind the scenes, these words are placed in a vocabulary named " { $snippet "specialized-vectors.instances.T" } ", however this vocabulary should not be placed in a " { $link POSTPONE: USING: } " form directly. Instead, always use " { $link POSTPONE: SPECIALIZED-VECTOR: } ". This ensures that the vocabulary can get generated the first time it is needed." ;
|
||||
|
||||
ARTICLE: "specialized-vector-c" "Passing specialized arrays to C functions"
|
||||
"Each specialized array has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ;
|
||||
ARTICLE: "specialized-vector-c" "Passing specialized vectors to C functions"
|
||||
"Each specialized vector has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ;
|
||||
|
||||
ARTICLE: "specialized-vectors" "Specialized vectors"
|
||||
"The " { $vocab-link "specialized-vectors" } " vocabulary implements resizable sequence types for storing machine values in a space-efficient manner without boxing."
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors alien.c-types assocs compiler.units functors
|
||||
growable kernel lexer namespaces parser prettyprint.custom
|
||||
sequences specialized-arrays specialized-arrays.private strings
|
||||
vocabs vocabs.parser fry ;
|
||||
vocabs vocabs.parser vocabs.generated fry ;
|
||||
QUALIFIED: vectors.functor
|
||||
IN: specialized-vectors
|
||||
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: compiler.units fry kernel vocabs vocabs.parser ;
|
||||
IN: vocabs.generated
|
||||
|
||||
: generate-vocab ( vocab-name quot -- vocab )
|
||||
[ dup vocab [ ] ] dip '[
|
||||
[
|
||||
[
|
||||
_ with-current-vocab
|
||||
] with-compilation-unit
|
||||
] keep
|
||||
] ?if ; inline
|
|
@ -434,11 +434,15 @@ HELP: byte-array>bignum
|
|||
{ $description "Converts a byte-array, interpreted as little-endian, into a bignum integer. User code should call " { $link le> } " or " { $link be> } " instead." } ;
|
||||
|
||||
ARTICLE: "division-by-zero" "Division by zero"
|
||||
"Floating point division never raises an error if the denominator is zero. This means that if at least one of the two inputs to " { $link / } ", " { $link /f } " or " { $link mod } " is a float, the result will be a floating point infinity or not a number value."
|
||||
"Behavior of division operations when a denominator of zero is used depends on the data types in question, as well as the platform being used."
|
||||
$nl
|
||||
"Floating point division only throws an error if the appropriate traps are enabled in the floating point environment. If traps are disabled, a Not-a-number value or an infinity is output, depending on whether the numerator is zero or non-zero."
|
||||
$nl
|
||||
"Floating point traps are disabled by default and the " { $vocab-link "math.floats.env" } " vocabulary provides words to enable them. Floating point division is performed by " { $link / } ", " { $link /f } " or " { $link mod } " if at least one of the two inputs is a float. Floating point division is always performed by " { $link /f } "."
|
||||
$nl
|
||||
"The behavior of integer division is hardware specific. On x86 processors, " { $link /i } " and " { $link mod } " raise an error if both inputs are integers and the denominator is zero. On PowerPC, integer division by zero yields a result of zero."
|
||||
$nl
|
||||
"On the other hand, the " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ;
|
||||
"The " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ;
|
||||
|
||||
ARTICLE: "number-protocol" "Number protocol"
|
||||
"Math operations obey certain numerical upgrade rules. If one of the inputs is a bignum and the other is a fixnum, the latter is first coerced to a bignum; if one of the inputs is a float, the other is coerced to a float."
|
||||
|
@ -459,7 +463,8 @@ $nl
|
|||
{ $subsection > }
|
||||
{ $subsection >= }
|
||||
"Numbers can be compared for equality using " { $link = } ", or a less precise test which disregards types:"
|
||||
{ $subsection number= } ;
|
||||
{ $subsection number= }
|
||||
{ $see-also "math.floats.compare" } ;
|
||||
|
||||
ARTICLE: "modular-arithmetic" "Modular arithmetic"
|
||||
{ $subsection mod }
|
||||
|
|
|
@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants
|
|||
math.functions math.vectors math.vectors.simd prettyprint
|
||||
combinators.smart sequences hints classes.struct
|
||||
specialized-arrays ;
|
||||
SIMD: double-4
|
||||
IN: benchmark.nbody-simd
|
||||
|
||||
: solar-mass ( -- x ) 4 pi sq * ; inline
|
||||
|
|
|
@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp
|
|||
io.encodings.binary kernel math math.constants math.functions
|
||||
math.vectors math.vectors.simd math.parser make sequences
|
||||
sequences.private words hints classes.struct ;
|
||||
SIMD: double-4
|
||||
IN: benchmark.raytracer-simd
|
||||
|
||||
! parameters
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel io math math.functions math.parser math.vectors
|
||||
math.vectors.simd sequences specialized-arrays ;
|
||||
SIMD: float-4
|
||||
SPECIALIZED-ARRAY: float-4
|
||||
IN: benchmark.simd-1
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ USING: mason.child mason.config tools.test namespaces io kernel sequences ;
|
|||
] with-scope
|
||||
] unit-test
|
||||
|
||||
[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" "-sse-version=30" } ] [
|
||||
[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" } ] [
|
||||
[
|
||||
"winnt" target-os set
|
||||
"x86.32" target-cpu set
|
||||
|
|
|
@ -34,7 +34,6 @@ IN: mason.child
|
|||
factor-vm ,
|
||||
"-i=" boot-image-name append ,
|
||||
"-no-user-init" ,
|
||||
target-cpu get { "x86.32" "x86.64" } member? [ "-sse-version=30" , ] when
|
||||
] { } make ;
|
||||
|
||||
: boot ( -- )
|
||||
|
|
|
@ -66,12 +66,12 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
|
|||
DEF(bool,sse_version,(void)):
|
||||
mov $0x1,RETURN_REG
|
||||
cpuid
|
||||
/* test $0x100000,%ecx
|
||||
test $0x100000,%ecx
|
||||
jnz sse_42
|
||||
test $0x80000,%ecx
|
||||
jnz sse_41
|
||||
test $0x200,%ecx
|
||||
jnz ssse_3 */
|
||||
jnz ssse_3
|
||||
test $0x1,%ecx
|
||||
jnz sse_3
|
||||
test $0x4000000,%edx
|
||||
|
|
Loading…
Reference in New Issue