Merge branch 'master' of git://factorcode.org/git/factor

Conflicts:
	basis/math/vectors/simd/simd-docs.factor
db4
Doug Coleman 2009-09-23 10:11:49 -05:00
commit 810bd63820
44 changed files with 1290 additions and 934 deletions

View File

@ -1,5 +1,6 @@
USING: alien alien.syntax alien.c-types kernel tools.test
sequences system libc alien.strings io.encodings.utf8 ;
sequences system libc alien.strings io.encodings.utf8
math.constants ;
IN: alien.c-types.tests
CONSTANT: xyz 123
@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE
os windows? cpu x86.64? and [
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
] when
[ 0 ] [ -10 uchar c-type-clamp ] unit-test
[ 12 ] [ 12 uchar c-type-clamp ] unit-test
[ -10 ] [ -10 char c-type-clamp ] unit-test
[ 127 ] [ 230 char c-type-clamp ] unit-test
[ t ] [ pi dup float c-type-clamp = ] unit-test

View File

@ -1,11 +1,12 @@
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays arrays assocs kernel kernel.private math
namespaces make parser sequences strings words splitting math.parser
cpu.architecture alien alien.accessors alien.strings quotations
layouts system compiler.units io io.files io.encodings.binary
io.streams.memory accessors combinators effects continuations fry
classes vocabs vocabs.loader words.symbol ;
math.order math.parser namespaces make parser sequences strings
words splitting cpu.architecture alien alien.accessors
alien.strings quotations layouts system compiler.units io
io.files io.encodings.binary io.streams.memory accessors
combinators effects continuations fry classes vocabs
vocabs.loader words.symbol ;
QUALIFIED: math
IN: alien.c-types
@ -472,3 +473,25 @@ SYMBOLS:
\ ulong \ size_t typedef
] with-compilation-unit
M: char-16-rep rep-component-type drop char ;
M: uchar-16-rep rep-component-type drop uchar ;
M: short-8-rep rep-component-type drop short ;
M: ushort-8-rep rep-component-type drop ushort ;
M: int-4-rep rep-component-type drop int ;
M: uint-4-rep rep-component-type drop uint ;
M: float-4-rep rep-component-type drop float ;
M: double-2-rep rep-component-type drop double ;
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
: c-type-interval ( c-type -- from to )
{
{ [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
{ [ dup { char short int long longlong } memq? ] [ signed-interval ] }
{ [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
} cond ; foldable
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Slava Pestov
! copyright (C) 2008 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays alien.c-types alien.data kernel
continuations destructors sequences io openssl openssl.libcrypto
@ -23,10 +23,10 @@ TUPLE: evp-md-context < disposable handle ;
: <evp-md-context> ( -- ctx )
evp-md-context new-disposable
EVP_MD_CTX <struct> dup EVP_MD_CTX_init >>handle ;
EVP_MD_CTX_create >>handle ;
M: evp-md-context dispose*
handle>> EVP_MD_CTX_cleanup drop ;
handle>> EVP_MD_CTX_destroy ;
: with-evp-md-context ( quot -- )
maybe-init-ssl [ <evp-md-context> ] dip with-disposal ; inline

View File

@ -305,16 +305,36 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-add-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##add-sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##div-vector
def: dst
use: src1 src2
@ -330,14 +350,34 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
literal: rep ;
PURE-INSN: ##abs-vector
def: dst
use: src
literal: rep ;
PURE-INSN: ##sqrt-vector
def: dst
use: src
literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
PURE-INSN: ##and-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##or-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##xor-vector
def: dst
use: src1 src2
literal: rep ;
! Boxing and unboxing aliens

View File

@ -151,27 +151,31 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ;
: enable-sse2-simd ( -- )
: enable-simd ( -- )
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
: enable-sse3-simd ( -- )
{
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ;

View File

@ -1,8 +1,9 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs kernel locals fry
USING: accessors assocs kernel locals fry sequences
cpu.architecture
compiler.cfg.rpo
compiler.cfg.def-use
compiler.cfg.utilities
compiler.cfg.registers
compiler.cfg.instructions
@ -13,10 +14,19 @@ IN: compiler.cfg.ssa.cssa
! selection, so it must keep track of representations when introducing
! new values.
: insert-copy? ( bb vreg -- ? )
! If the last instruction defines a value (which means it is
! ##fixnum-add, ##fixnum-sub or ##fixnum-mul) then we don't
! need to insert a copy since in fact doing so will result
! in incorrect code.
[ instructions>> last defs-vreg ] dip eq? not ;
:: insert-copy ( bb src rep -- bb dst )
rep next-vreg-rep :> dst
bb [ dst src rep src rep-of emit-conversion ] add-instructions
bb dst ;
bb src insert-copy? [
rep next-vreg-rep :> dst
bb [ dst src rep src rep-of emit-conversion ] add-instructions
bb dst
] [ bb src ] if ;
: convert-phi ( ##phi -- )
dup dst>> rep-of '[ [ _ insert-copy ] assoc-map ] change-inputs drop ;

View File

@ -47,11 +47,18 @@ UNION: two-operand-insn
##min-float
##max-float
##add-vector
##saturated-add-vector
##add-sub-vector
##sub-vector
##saturated-sub-vector
##mul-vector
##saturated-mul-vector
##div-vector
##min-vector
##max-vector ;
##max-vector
##and-vector
##or-vector
##xor-vector ;
GENERIC: convert-two-operand* ( insn -- )

View File

@ -169,13 +169,21 @@ CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##saturated-add-vector %saturated-add-vector
CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##abs-vector %abs-vector
CODEGEN: ##and-vector %and-vector
CODEGEN: ##or-vector %or-vector
CODEGEN: ##xor-vector %xor-vector
CODEGEN: ##box-alien %box-alien
CODEGEN: ##box-displaced-alien %box-displaced-alien
CODEGEN: ##unbox-alien %unbox-alien

View File

@ -3,7 +3,7 @@ math hashtables.private math.private namespaces sequences tools.test
namespaces.private slots.private sequences.private byte-arrays alien
alien.accessors layouts words definitions compiler.units io
combinators vectors grouping make alien.c-types combinators.short-circuit
math.order math.libm math.parser ;
math.order math.libm math.parser alien.c-types ;
FROM: math => float ;
QUALIFIED: namespaces.private
IN: compiler.tests.codegen
@ -431,3 +431,21 @@ cell 4 = [
] curry each-integer
] compile-call
] unit-test
TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-only } ;
[ 2 ] [
little-endian?
T{ myseq f B{ 1 0 0 0 } B{ 1 0 0 0 } }
T{ myseq f B{ 0 0 0 1 } B{ 0 0 0 1 } } ?
[
{ myseq } declare
[ 0 2 ] dip dup
[
[
over 1 < [ underlying1>> ] [ [ 1 - ] dip underlying2>> ] if
swap 4 * >fixnum alien-signed-4
] bi-curry@ bi * +
] 2curry each-integer
] compile-call
] unit-test

View File

@ -18,6 +18,7 @@ compiler.tree.propagation.constraints
compiler.tree.propagation.call-effect
compiler.tree.propagation.transforms
compiler.tree.propagation.simd ;
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
IN: compiler.tree.propagation.known-words
{ + - * / }
@ -260,15 +261,9 @@ generic-comparison-ops [
alien-unsigned-8
} [
dup name>> {
{
[ "alien-signed-" ?head ]
[ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ]
}
{
[ "alien-unsigned-" ?head ]
[ string>number 8 * 2^ 1 - 0 swap [a,b] ]
}
} cond
{ [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
{ [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
} cond [a,b]
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
'[ 2drop _ ] "outputs" set-word-prop
] each

View File

@ -1,46 +1,45 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators fry
USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd.intrinsics ;
IN: compiler.tree.propagation.simd
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
{
(simd-v+)
(simd-v-)
(simd-v+-)
(simd-v*)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-sum)
(simd-vabs)
(simd-vsqrt)
(simd-vbitand)
(simd-vbitor)
(simd-vbitxor)
(simd-broadcast)
(simd-gather-2)
(simd-gather-4)
alien-vector
} [ { byte-array } "default-output-classes" set-word-prop ] each
\ (simd-sum) [
nip dup literal?>> [
literal>> scalar-rep-of {
{ float-rep [ float ] }
{ double-rep [ float ] }
{ int-rep [ integer ] }
} case
] [ drop real ] if
<class-info>
] "outputs" set-word-prop
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
\ alien-vector { byte-array } "default-output-classes" set-word-prop
! If SIMD is not available, inline alien-vector and set-alien-vector
! to get a speedup
: inline-unless-intrinsic ( word -- )

View File

@ -22,8 +22,6 @@ SINGLETONS: float-rep double-rep ;
! On x86, floating point registers are really vector registers
SINGLETONS:
float-4-rep
double-2-rep
char-16-rep
uchar-16-rep
short-8-rep
@ -31,9 +29,11 @@ ushort-8-rep
int-4-rep
uint-4-rep ;
UNION: vector-rep
SINGLETONS:
float-4-rep
double-2-rep
double-2-rep ;
UNION: int-vector-rep
char-16-rep
uchar-16-rep
short-8-rep
@ -41,6 +41,14 @@ ushort-8-rep
int-4-rep
uint-4-rep ;
UNION: float-vector-rep
float-4-rep
double-2-rep ;
UNION: vector-rep
int-vector-rep
float-vector-rep ;
UNION: representation
any-rep
tagged-rep
@ -76,10 +84,15 @@ M: double-rep rep-size drop 8 ;
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
GENERIC: rep-component-type ( rep -- n )
! Methods defined in alien.c-types
GENERIC: scalar-rep-of ( rep -- rep' )
M: float-4-rep scalar-rep-of drop float-rep ;
M: double-2-rep scalar-rep-of drop double-rep ;
M: int-vector-rep scalar-rep-of drop int-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
@ -167,15 +180,42 @@ HOOK: %unbox-vector cpu ( dst src rep -- )
HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %abs-vector cpu ( dst src rep -- )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
HOOK: %broadcast-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %saturated-add-vector-reps cpu ( -- reps )
HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
HOOK: %abs-vector-reps cpu ( -- reps )
HOOK: %and-vector-reps cpu ( -- reps )
HOOK: %or-vector-reps cpu ( -- reps )
HOOK: %xor-vector-reps cpu ( -- reps )
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )

View File

@ -322,4 +322,4 @@ os windows? [
4 "double" c-type (>>align)
] unless
"cpu.x86.features" require
check-sse

View File

@ -249,4 +249,4 @@ USE: vocabs.loader
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
"cpu.x86.features" require
check-sse

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: system kernel math math.order math.parser namespaces
alien.c-types alien.syntax combinators locals init io cpu.x86
USING: system kernel memoize math math.order math.parser
namespaces alien.c-types alien.syntax combinators locals init io
compiler compiler.units accessors ;
IN: cpu.x86.features
@ -13,7 +13,18 @@ FUNCTION: longlong read_timestamp_counter ( ) ;
PRIVATE>
ALIAS: sse-version sse_version
MEMO: sse-version ( -- n )
sse_version
"sse-version" get string>number [ min ] when* ;
[ \ sse-version reset-memoized ] "cpu.x86.features" add-init-hook
: sse? ( -- ? ) sse-version 10 >= ;
: sse2? ( -- ? ) sse-version 20 >= ;
: sse3? ( -- ? ) sse-version 30 >= ;
: ssse3? ( -- ? ) sse-version 33 >= ;
: sse4.1? ( -- ? ) sse-version 41 >= ;
: sse4.2? ( -- ? ) sse-version 42 >= ;
: sse-string ( version -- string )
{
@ -32,37 +43,3 @@ M: x86 instruction-count read_timestamp_counter ;
: count-instructions ( quot -- n )
instruction-count [ call ] dip instruction-count swap - ; inline
USING: cpu.x86.features cpu.x86.features.private ;
:: install-sse-check ( version -- )
[
sse-version version < [
"This image was built to use " write
version sse-string write
" but your CPU only supports " write
sse-version sse-string write "." print
"You will need to bootstrap Factor again." print
flush
1 exit
] when
] "cpu.x86" add-init-hook ;
: enable-sse ( version -- )
{
{ 00 [ ] }
{ 10 [ ] }
{ 20 [ enable-sse2 ] }
{ 30 [ enable-sse3 ] }
{ 33 [ enable-sse3 ] }
{ 41 [ enable-sse3 ] }
{ 42 [ enable-sse3 ] }
} case ;
[ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version
"sse-version" get [ string>number min ] when*
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri

View File

@ -4,7 +4,8 @@ USING: accessors assocs alien alien.c-types arrays strings
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
cpu.architecture kernel kernel.private math memory namespaces make
sequences words system layouts combinators math.order fry locals
compiler.constants vm byte-arrays
compiler.constants byte-arrays io macros quotations cpu.x86.features
cpu.x86.features.private compiler compiler.units init vm
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.intrinsics
@ -248,12 +249,26 @@ M:: x86 %unbox-vector ( dst src rep -- )
dst src byte-array-offset [+]
rep copy-register ;
MACRO: available-reps ( alist -- )
! Each SSE version adds new representations and supports
! all old ones
unzip { } [ append ] accumulate rest swap suffix
[ [ 1quotation ] map ] bi@ zip
reverse [ { } ] suffix
'[ _ cond ] ;
M: x86 %broadcast-vector ( dst src rep -- )
{
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
} case ;
M: x86 %broadcast-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
@ -267,6 +282,11 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
}
} case ;
M: x86 %gather-vector-4-reps
{
{ sse? { float-4-rep } }
} available-reps ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
@ -278,6 +298,11 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
}
} case ;
M: x86 %gather-vector-2-reps
{
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %add-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDPS ] }
@ -290,6 +315,36 @@ M: x86 %add-vector ( dst src1 src2 rep -- )
{ uint-4-rep [ PADDD ] }
} case drop ;
M: x86 %add-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PADDSB ] }
{ uchar-16-rep [ PADDUSB ] }
{ short-8-rep [ PADDSW ] }
{ ushort-8-rep [ PADDUSW ] }
} case drop ;
M: x86 %saturated-add-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
} case drop ;
M: x86 %add-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ SUBPS ] }
@ -302,42 +357,183 @@ M: x86 %sub-vector ( dst src1 src2 rep -- )
{ uint-4-rep [ PSUBD ] }
} case drop ;
M: x86 %sub-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PSUBSB ] }
{ uchar-16-rep [ PSUBUSB ] }
{ short-8-rep [ PSUBSW ] }
{ ushort-8-rep [ PSUBUSW ] }
} case drop ;
M: x86 %saturated-sub-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MULPS ] }
{ double-2-rep [ MULPD ] }
{ int-4-rep [ PMULLW ] }
{ short-8-rep [ PMULLW ] }
{ ushort-8-rep [ PMULLW ] }
{ int-4-rep [ PMULLD ] }
{ uint-4-rep [ PMULLD ] }
} case drop ;
M: x86 %mul-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ DIVPS ] }
{ double-2-rep [ DIVPD ] }
} case drop ;
M: x86 %div-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %min-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PMINSB ] }
{ uchar-16-rep [ PMINUB ] }
{ short-8-rep [ PMINSW ] }
{ ushort-8-rep [ PMINUW ] }
{ int-4-rep [ PMINSD ] }
{ uint-4-rep [ PMINUD ] }
{ float-4-rep [ MINPS ] }
{ double-2-rep [ MINPD ] }
} case drop ;
M: x86 %min-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %max-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PMAXSB ] }
{ uchar-16-rep [ PMAXUB ] }
{ short-8-rep [ PMAXSW ] }
{ ushort-8-rep [ PMAXUW ] }
{ int-4-rep [ PMAXSD ] }
{ uint-4-rep [ PMAXUD ] }
{ float-4-rep [ MAXPS ] }
{ double-2-rep [ MAXPD ] }
} case drop ;
M: x86 %max-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
{
{ char-16-rep [ PABSB ] }
{ short-8-rep [ PABSW ] }
{ int-4-rep [ PABSD ] }
} case ;
M: x86 %abs-vector-reps
{
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
} available-reps ;
M: x86 %sqrt-vector ( dst src rep -- )
{
{ float-4-rep [ SQRTPS ] }
{ double-2-rep [ SQRTPD ] }
} case ;
M: x86 %horizontal-add-vector ( dst src rep -- )
M: x86 %sqrt-vector-reps
{
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %and-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ANDPS ] }
{ double-2-rep [ ANDPD ] }
{ char-16-rep [ PAND ] }
{ uchar-16-rep [ PAND ] }
{ short-8-rep [ PAND ] }
{ ushort-8-rep [ PAND ] }
{ int-4-rep [ PAND ] }
{ uint-4-rep [ PAND ] }
} case drop ;
M: x86 %and-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %or-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ORPS ] }
{ double-2-rep [ ORPD ] }
{ char-16-rep [ POR ] }
{ uchar-16-rep [ POR ] }
{ short-8-rep [ POR ] }
{ ushort-8-rep [ POR ] }
{ int-4-rep [ POR ] }
{ uint-4-rep [ POR ] }
} case drop ;
M: x86 %or-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %xor-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ XORPS ] }
{ double-2-rep [ XORPD ] }
{ char-16-rep [ PXOR ] }
{ uchar-16-rep [ PXOR ] }
{ short-8-rep [ PXOR ] }
{ ushort-8-rep [ PXOR ] }
{ int-4-rep [ PXOR ] }
{ uint-4-rep [ PXOR ] }
} case drop ;
M: x86 %xor-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %unbox-alien ( dst src -- )
alien-offset [+] MOV ;
@ -767,15 +963,29 @@ M: x86 small-enough? ( n -- ? )
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
: enable-sse2 ( -- )
enable-float-intrinsics
enable-fsqrt
enable-float-min/max
enable-sse2-simd ;
: enable-sse3 ( -- )
enable-sse2
enable-sse3-simd ;
enable-simd
enable-min/max
enable-fixnum-log2
enable-fixnum-log2
:: install-sse2-check ( -- )
[
sse-version 20 < [
"This image was built to use SSE2 but your CPU does not support it." print
"You will need to bootstrap Factor again." print
flush
1 exit
] when
] "cpu.x86" add-init-hook ;
: enable-sse2 ( version -- )
20 >= [
enable-float-intrinsics
enable-fsqrt
enable-float-min/max
install-sse2-check
] when ;
: check-sse ( -- )
[ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version
[ sse-string write " detected" print ] [ enable-sse2 ] bi ;

View File

@ -31,9 +31,7 @@ M: x87-env (set-fp-env-register)
set_x87_env ;
M: x86 (fp-env-registers)
sse-version 20 >=
[ <sse-env> <x87-env> 2array ]
[ <x87-env> 1array ] if ;
sse2? [ <sse-env> <x87-env> 2array ] [ <x87-env> 1array ] if ;
CONSTANT: sse-exception-flag-bits HEX: 3f
CONSTANT: sse-exception-flag>bit

View File

@ -1,70 +0,0 @@
USING: cpu.architecture math.vectors.simd
math.vectors.simd.intrinsics accessors math.vectors.simd.alien
kernel classes.struct tools.test compiler sequences byte-arrays
alien math kernel.private specialized-arrays combinators ;
SPECIALIZED-ARRAY: float
IN: math.vectors.simd.alien.tests
! Vector alien intrinsics
[ float-4{ 1 2 3 4 } ] [
[
float-4{ 1 2 3 4 }
underlying>> 0 float-4-rep alien-vector
] compile-call float-4 boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x float-4 }
{ y double-2 }
{ z double-4 }
{ w float-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test

View File

@ -1,42 +0,0 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien accessors alien.c-types byte-arrays compiler.units
cpu.architecture locals kernel math math.vectors.simd
math.vectors.simd.intrinsics ;
IN: math.vectors.simd.alien
:: define-simd-128-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
class name>> typedef ;
:: define-simd-256-type ( class rep -- )
<c-type>
class >>class
class >>boxed-class
[
[ rep alien-vector ]
[ 16 + >fixnum rep alien-vector ] 2bi
class boa
] >>getter
[
[ [ underlying1>> ] 2dip rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
rep >>rep
class name>> typedef ;
[
float-4 float-4-rep define-simd-128-type
double-2 double-2-rep define-simd-128-type
float-8 float-4-rep define-simd-256-type
double-4 double-2-rep define-simd-256-type
] with-compilation-unit

View File

@ -1,27 +1,124 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays classes functors
kernel math parser prettyprint.custom sequences
sequences.private literals ;
USING: accessors alien.c-types assocs byte-arrays classes
effects fry functors generalizations kernel literals locals
math math.functions math.vectors math.vectors.simd.intrinsics
math.vectors.specialization parser prettyprint.custom sequences
sequences.private strings words definitions macros cpu.architecture
namespaces arrays quotations ;
QUALIFIED-WITH: math m
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
MACRO: simd-boa ( rep class -- simd-array )
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
:: define-boa-custom-inlining ( word rep class -- )
word [
drop
rep rep rep-gather-word supported-simd-op? [
[ rep (simd-boa) class boa ]
] [ word def>> ] if
] "custom-inlining" set-word-prop ;
: simd-with ( rep class x -- simd-array )
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
:: define-with-custom-inlining ( word rep class -- )
word [
drop
rep \ (simd-broadcast) supported-simd-op? [
[ rep rep-coerce rep (simd-broadcast) class boa ]
] [ word def>> ] if
] "custom-inlining" set-word-prop ;
: boa-effect ( rep n -- effect )
[ rep-components ] dip *
[ CHAR: a + 1string ] map
{ "simd-vector" } <effect> ;
: supported-simd-ops ( assoc rep -- assoc' )
[ simd-ops get ] dip
'[ nip _ swap supported-simd-op? ] assoc-filter
'[ drop _ key? ] assoc-filter ;
ERROR: bad-schema schema ;
: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
[ simd-ops get ] dip '[
1quotation
over word-schema _ ?at [ bad-schema ] unless
[ ] 2sequence
] assoc-map ;
:: high-level-ops ( ctor elt-class -- assoc )
! Some SIMD operations are defined in terms of others.
{
{ vneg [ [ dup v- ] keep v- ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] }
{ v-n [ ctor execute v- ] }
{ n*v [ [ ctor execute ] dip v* ] }
{ v*n [ ctor execute v* ] }
{ n/v [ [ ctor execute ] dip v/ ] }
{ v/n [ ctor execute v/ ] }
{ norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
}
! To compute dot product and distance with integer vectors, we
! have to do things less efficiently, with integer overflow checks,
! in the general case.
elt-class m:float = [
{
{ distance [ v- norm ] }
{ v. [ v* sum ] }
} append
] when ;
:: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
rep rep-component-type c-type-boxed-class :> elt-class
class
elt-class
{
{ { +vector+ +vector+ -> +vector+ } vv->v }
{ { +vector+ -> +vector+ } v->v }
{ { +vector+ -> +scalar+ } v->n }
{ { +vector+ -> +nonnegative+ } v->n }
} low-level-ops
rep supported-simd-ops
ctor elt-class high-level-ops assoc-union
specialize-vector-words ;
:: define-simd-128-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
class typedef ;
FUNCTOR: define-simd-128 ( T -- )
T-TYPE IS ${T}
N [ 16 T-TYPE heap-size /i ]
N [ 16 T heap-size /i ]
A DEFINES-CLASS ${T}-${N}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
>A DEFINES >${A}
A{ DEFINES ${A}{
NTH [ T-TYPE dup c-type-getter-boxer array-accessor ]
SET-NTH [ T-TYPE dup c-setter array-accessor ]
NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ]
A-rep IS ${A}-rep
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
WHERE
@ -51,6 +148,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length underlying>> length ; inline
M: A element-type drop A-rep rep-component-type ;
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
@ -59,6 +158,16 @@ M: A pprint* pprint-object ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
\ A-with \ A-rep \ A define-with-custom-inlining
\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
\ A-rep rep-gather-word [
\ A-boa \ A-rep \ A define-boa-custom-inlining
] when
INSTANCE: A sequence
<PRIVATE
@ -66,31 +175,62 @@ INSTANCE: A sequence
: A-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
: A-v->v-op ( v1 quot -- v2 )
[ underlying>> A-rep ] dip call \ A boa ; inline
: A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-128-type
PRIVATE>
;FUNCTOR
! Synthesize 256-bit vectors from a pair of 128-bit vectors
SLOT: underlying1
SLOT: underlying2
:: define-simd-256-type ( class rep -- )
<c-type>
class >>class
class >>boxed-class
[
[ rep alien-vector ]
[ 16 + >fixnum rep alien-vector ] 2bi
class boa
] >>getter
[
[ [ underlying1>> ] 2dip rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
rep >>rep
class typedef ;
FUNCTOR: define-simd-256 ( T -- )
T-TYPE IS ${T}
N [ 32 T-TYPE heap-size /i ]
N [ 32 T heap-size /i ]
N/2 [ N 2 / ]
A/2 IS ${T}-${N/2}
A/2-boa IS ${A/2}-boa
A/2-with IS ${A/2}-with
A DEFINES-CLASS ${T}-${N}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
>A DEFINES >${A}
A{ DEFINES ${A}{
A-deref DEFINES-PRIVATE ${A}-deref
A-rep IS ${A/2}-rep
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
WHERE
@ -129,6 +269,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length drop 32 ; inline
M: A element-type drop A-rep rep-component-type ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
M: A pprint-delims drop \ A{ \ } ;
@ -137,6 +279,16 @@ M: A >pprint-sequence ;
M: A pprint* pprint-object ;
: A-with ( x -- simd-array )
[ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
\ A boa ; inline
: A-boa ( ... -- simd-array )
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
\ A boa ; inline
\ A-rep 2 boa-effect \ A-boa set-stack-effect
INSTANCE: A sequence
: A-vv->v-op ( v1 v2 quot -- v3 )
@ -144,8 +296,15 @@ INSTANCE: A sequence
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
\ A boa ; inline
: A-v->n-op ( v1 combine-quot reduce-quot -- v2 )
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
dip call ; inline
: A-v->v-op ( v1 combine-quot -- v2 )
[ [ underlying1>> A-rep ] dip call ]
[ [ underlying2>> A-rep ] dip call ] 2bi
\ A boa ; inline
: A-v->n-op ( v1 combine-quot -- v2 )
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-256-type
;FUNCTOR

View File

@ -0,0 +1,18 @@
IN: math.vectors.simd.intrinsics.tests
USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
[ 16 ] [ uchar-16-rep rep-components ] unit-test
[ 16 ] [ char-16-rep rep-components ] unit-test
[ 8 ] [ ushort-8-rep rep-components ] unit-test
[ 8 ] [ short-8-rep rep-components ] unit-test
[ 4 ] [ uint-4-rep rep-components ] unit-test
[ 4 ] [ int-4-rep rep-components ] unit-test
[ 4 ] [ float-4-rep rep-components ] unit-test
[ 2 ] [ double-2-rep rep-components ] unit-test
{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as

View File

@ -1,18 +1,48 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel alien alien.data cpu.architecture libc ;
USING: alien alien.c-types alien.data assocs combinators
cpu.architecture fry generalizations kernel libc macros math
sequences effects accessors namespaces lexer parser vocabs.parser
words arrays math.vectors ;
IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
<<
: simd-effect ( word -- effect )
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
SYMBOL: simd-ops
V{ } clone simd-ops set-global
SYNTAX: SIMD-OP:
scan-word dup name>> "(simd-" ")" surround create-in
[ nip [ bad-simd-call ] define ]
[ [ simd-effect ] dip set-stack-effect ]
[ 2array simd-ops get push ]
2tri ;
>>
SIMD-OP: v+
SIMD-OP: v-
SIMD-OP: v+-
SIMD-OP: vs+
SIMD-OP: vs-
SIMD-OP: vs*
SIMD-OP: v*
SIMD-OP: v/
SIMD-OP: vmin
SIMD-OP: vmax
SIMD-OP: vsqrt
SIMD-OP: sum
SIMD-OP: vabs
SIMD-OP: vbitand
SIMD-OP: vbitor
SIMD-OP: vbitxor
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
@ -26,3 +56,61 @@ ERROR: bad-simd-call ;
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
<<
: rep-components ( rep -- n )
16 swap rep-component-type heap-size /i ; foldable
: rep-coercer ( rep -- quot )
{
{ [ dup int-vector-rep? ] [ [ >fixnum ] ] }
{ [ dup float-vector-rep? ] [ [ >float ] ] }
} cond nip ; foldable
: rep-coerce ( value rep -- value' )
rep-coercer call( value -- value' ) ; inline
CONSTANT: rep-gather-words
{
{ 2 (simd-gather-2) }
{ 4 (simd-gather-4) }
}
: rep-gather-word ( rep -- word )
rep-components rep-gather-words at ;
>>
MACRO: (simd-boa) ( rep -- quot )
{
[ rep-coercer ]
[ rep-components ]
[ ]
[ rep-gather-word ]
} cleave
'[ _ _ napply _ _ execute ] ;
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] }
{ \ (simd-vmin) [ %min-vector-reps ] }
{ \ (simd-vmax) [ %max-vector-reps ] }
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
{ \ (simd-vabs) [ %abs-vector-reps ] }
{ \ (simd-vbitand) [ %and-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
} case member? ;

View File

@ -1,5 +1,5 @@
USING: help.markup help.syntax sequences math math.vectors
kernel.private classes.tuple.private
classes.tuple.private
math.vectors.simd.intrinsics cpu.architecture ;
IN: math.vectors.simd
@ -17,23 +17,49 @@ $nl
"There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs."
"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance."
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
$nl
"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
$nl
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
$nl
"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types."
$nl
"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
$nl
"The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8."
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl
"The following vector types are defined:"
{ $subsection float-4 }
{ $subsection double-2 }
{ $subsection float-8 }
{ $subsection double-4 }
"For each vector type, several words are defined:"
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
{ $subsection POSTPONE: SIMD: }
"The following vector types are supported:"
{ $code
"char-16"
"uchar-16"
"char-32"
"uchar-32"
"short-8"
"ushort-8"
"short-16"
"ushort-16"
"int-4"
"uint-4"
"int-8"
"uint-8"
"float-4"
"float-8"
"double-2"
"double-4"
} ;
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
"For each SIMD vector type, several words are defined:"
{ $table
{ "Word" "Stack effect" "Description" }
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
@ -41,24 +67,6 @@ $nl
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
}
"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
$nl
"Operations on " { $link float-4 } " instances:"
{ $subsection float-4-with }
{ $subsection float-4-boa }
{ $subsection POSTPONE: float-4{ }
"Operations on " { $link double-2 } " instances:"
{ $subsection double-2-with }
{ $subsection double-2-boa }
{ $subsection POSTPONE: double-2{ }
"Operations on " { $link float-8 } " instances:"
{ $subsection float-8-with }
{ $subsection float-8-boa }
{ $subsection POSTPONE: float-8{ }
"Operations on " { $link double-4 } " instances:"
{ $subsection double-4-with }
{ $subsection double-4-boa }
{ $subsection POSTPONE: double-4{ }
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
{ $see-also "c-types-specs" } ;
@ -84,6 +92,8 @@ SYMBOLS: x y ;
{ $code
"""USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo
: interpolate ( v a b -- w )
{ float-4 float-4 float-4 } declare
@ -96,6 +106,8 @@ $nl
{ $code
"""USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo
: interpolate ( v a b -- w )
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
@ -110,6 +122,7 @@ $nl
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code
"""USING: compiler.tree.debugger math.vectors math.vectors.simd ;
SIMD: float-4
IN: simd-demo
STRUCT: actor
@ -150,106 +163,37 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
}
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
$nl
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) }
{ $subsection (simd-v-) }
{ $subsection (simd-v/) }
{ $subsection (simd-vmin) }
{ $subsection (simd-vmax) }
{ $subsection (simd-vsqrt) }
{ $subsection (simd-sum) }
{ $subsection (simd-broadcast) }
{ $subsection (simd-gather-2) }
{ $subsection (simd-gather-4) }
"It is best to avoid calling SIMD primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
$nl
"There are two primitives which are used to implement accessing SIMD vector fields of " { $link "classes.struct" } ":"
{ $subsection alien-vector }
{ $subsection set-alien-vector }
"For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
"Struct classes may contain fields which store SIMD data; use one of the following C type names:"
{ $code
"""float-4
double-2
float-8
double-4""" }
"Passing SIMD data as function parameters is not yet supported." ;
"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
$nl
"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
$nl
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
{ $subsection "math.vectors.simd.intro" }
{ $subsection "math.vectors.simd.types" }
{ $subsection "math.vectors.simd.words" }
{ $subsection "math.vectors.simd.support" }
{ $subsection "math.vectors.simd.accuracy" }
{ $subsection "math.vectors.simd.efficiency" }
{ $subsection "math.vectors.simd.alien" }
{ $subsection "math.vectors.simd.intrinsics" } ;
! ! ! float-4
HELP: float-4
{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ;
HELP: float-4-with
{ $values { "x" float } { "simd-array" float-4 } }
{ $description "Creates a new vector with all four components equal to a scalar." } ;
HELP: float-4-boa
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } }
{ $description "Creates a new vector from four scalar components." } ;
HELP: float-4{
{ $syntax "float-4{ a b c d }" }
{ $description "Literal syntax for a " { $link float-4 } "." } ;
! ! ! double-2
HELP: double-2
{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ;
HELP: double-2-with
{ $values { "x" float } { "simd-array" double-2 } }
{ $description "Creates a new vector with both components equal to a scalar." } ;
HELP: double-2-boa
{ $values { "a" float } { "b" float } { "simd-array" double-2 } }
{ $description "Creates a new vector from two scalar components." } ;
HELP: double-2{
{ $syntax "double-2{ a b }" }
{ $description "Literal syntax for a " { $link double-2 } "." } ;
! ! ! float-8
HELP: float-8
{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ;
HELP: float-8-with
{ $values { "x" float } { "simd-array" float-8 } }
{ $description "Creates a new vector with all eight components equal to a scalar." } ;
HELP: float-8-boa
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } }
{ $description "Creates a new vector from eight scalar components." } ;
HELP: float-8{
{ $syntax "float-8{ a b c d e f g h }" }
{ $description "Literal syntax for a " { $link float-8 } "." } ;
! ! ! double-4
HELP: double-4
{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ;
HELP: double-4-with
{ $values { "x" float } { "simd-array" double-4 } }
{ $description "Creates a new vector with all four components equal to a scalar." } ;
HELP: double-4-boa
{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } }
{ $description "Creates a new vector from four scalar components." } ;
HELP: double-4{
{ $syntax "double-4{ a b c d }" }
{ $description "Literal syntax for a " { $link double-4 } "." } ;
HELP: SIMD:
{ $syntax "SIMD: type-length" }
{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
ABOUT: "math.vectors.simd"

View File

@ -1,8 +1,30 @@
USING: accessors arrays classes compiler compiler.tree.debugger
effects fry io kernel kernel.private math math.functions
math.private math.vectors math.vectors.simd
math.vectors.simd.private prettyprint random sequences system
tools.test vocabs assocs compiler.cfg.debugger words
locals math.vectors.specialization combinators cpu.architecture
math.vectors.simd.intrinsics namespaces byte-arrays alien
specialized-arrays classes.struct ;
FROM: alien.c-types => c-type-boxed-class ;
SPECIALIZED-ARRAY: float
SIMD: char-16
SIMD: uchar-16
SIMD: char-32
SIMD: uchar-32
SIMD: short-8
SIMD: ushort-8
SIMD: short-16
SIMD: ushort-16
SIMD: int-4
SIMD: uint-4
SIMD: int-8
SIMD: uint-8
SIMD: float-4
SIMD: float-8
SIMD: double-2
SIMD: double-4
IN: math.vectors.simd.tests
USING: math math.vectors.simd math.vectors.simd.private
math.vectors math.functions math.private kernel.private compiler
sequences tools.test compiler.tree.debugger accessors kernel
system ;
[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
@ -12,344 +34,6 @@ system ;
[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
[ float-4{ 12 12 12 12 } ] [
12 [ float-4-with ] compile-call
] unit-test
[ float-4{ 1 2 3 4 } ] [
1 2 3 4 [ float-4-boa ] compile-call
] unit-test
[ float-4{ 11 22 33 44 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v+ ] compile-call
] unit-test
[ float-4{ -9 -18 -27 -36 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v- ] compile-call
] unit-test
[ float-4{ 10 40 90 160 } ] [
float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v* ] compile-call
] unit-test
[ float-4{ 10 100 1000 10000 } ] [
float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
[ { float-4 float-4 } declare v/ ] compile-call
] unit-test
[ float-4{ -10 -20 -30 -40 } ] [
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
[ { float-4 float-4 } declare vmin ] compile-call
] unit-test
[ float-4{ 10 20 30 40 } ] [
float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
[ { float-4 float-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
[ { float-4 float-4 } declare v. ] compile-call
] unit-test
[ float-4{ 5 10 15 20 } ] [
5.0 float-4{ 1 2 3 4 }
[ { float float-4 } declare n*v ] compile-call
] unit-test
[ float-4{ 5 10 15 20 } ] [
float-4{ 1 2 3 4 } 5.0
[ { float float-4 } declare v*n ] compile-call
] unit-test
[ float-4{ 10 5 2 5 } ] [
10.0 float-4{ 1 2 5 2 }
[ { float float-4 } declare n/v ] compile-call
] unit-test
[ float-4{ 0.5 1 1.5 2 } ] [
float-4{ 1 2 3 4 } 2
[ { float float-4 } declare v/n ] compile-call
] unit-test
[ float-4{ 1 0 0 0 } ] [
float-4{ 10 0 0 0 }
[ { float-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
float-4{ 1 2 3 4 }
[ { float-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
float-4{ 1 0 0 0 }
float-4{ 0 1 0 0 }
[ { float-4 float-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ double-2{ 12 12 } ] [
12 [ double-2-with ] compile-call
] unit-test
[ double-2{ 1 2 } ] [
1 2 [ double-2-boa ] compile-call
] unit-test
[ double-2{ 11 22 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v+ ] compile-call
] unit-test
[ double-2{ -9 -18 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v- ] compile-call
] unit-test
[ double-2{ 10 40 } ] [
double-2{ 1 2 } double-2{ 10 20 }
[ { double-2 double-2 } declare v* ] compile-call
] unit-test
[ double-2{ 10 100 } ] [
double-2{ 100 2000 } double-2{ 10 20 }
[ { double-2 double-2 } declare v/ ] compile-call
] unit-test
[ double-2{ -10 -20 } ] [
double-2{ -10 20 } double-2{ 10 -20 }
[ { double-2 double-2 } declare vmin ] compile-call
] unit-test
[ double-2{ 10 20 } ] [
double-2{ -10 20 } double-2{ 10 -20 }
[ { double-2 double-2 } declare vmax ] compile-call
] unit-test
[ 3.0 ] [
double-2{ 1 2 }
[ { double-2 } declare sum ] compile-call
] unit-test
[ 7.0 ] [
double-2{ 1 2 }
[ { double-2 } declare sum 4.0 + ] compile-call
] unit-test
[ 16.0 ] [
double-2{ 1 2 } double-2{ 2 7 }
[ { double-2 double-2 } declare v. ] compile-call
] unit-test
[ double-2{ 5 10 } ] [
5.0 double-2{ 1 2 }
[ { float double-2 } declare n*v ] compile-call
] unit-test
[ double-2{ 5 10 } ] [
double-2{ 1 2 } 5.0
[ { float double-2 } declare v*n ] compile-call
] unit-test
[ double-2{ 10 5 } ] [
10.0 double-2{ 1 2 }
[ { float double-2 } declare n/v ] compile-call
] unit-test
[ double-2{ 0.5 1 } ] [
double-2{ 1 2 } 2
[ { float double-2 } declare v/n ] compile-call
] unit-test
[ double-2{ 0 0 } ] [ double-2 new ] unit-test
[ double-2{ 1 0 } ] [
double-2{ 10 0 }
[ { double-2 } declare normalize ] compile-call
] unit-test
[ 5.0 ] [
double-2{ 1 2 }
[ { double-2 } declare norm-sq ] compile-call
] unit-test
[ t ] [
double-2{ 1 0 }
double-2{ 0 1 }
[ { double-2 double-2 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
[ double-4{ 1 2 3 4 } ] [
1 2 3 4 double-4-boa
] unit-test
[ double-4{ 1 1 1 1 } ] [
1 double-4-with
] unit-test
[ double-4{ 0 1 2 3 } ] [
1 double-4-with [ * ] map-index
] unit-test
[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
[ double-4{ 12 12 12 12 } ] [
12 [ double-4-with ] compile-call
] unit-test
[ double-4{ 1 2 3 4 } ] [
1 2 3 4 [ double-4-boa ] compile-call
] unit-test
[ double-4{ 11 22 33 44 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v+ ] compile-call
] unit-test
[ double-4{ -9 -18 -27 -36 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v- ] compile-call
] unit-test
[ double-4{ 10 40 90 160 } ] [
double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v* ] compile-call
] unit-test
[ double-4{ 10 100 1000 10000 } ] [
double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
[ { double-4 double-4 } declare v/ ] compile-call
] unit-test
[ double-4{ -10 -20 -30 -40 } ] [
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
[ { double-4 double-4 } declare vmin ] compile-call
] unit-test
[ double-4{ 10 20 30 40 } ] [
double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
[ { double-4 double-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
[ { double-4 double-4 } declare v. ] compile-call
] unit-test
[ double-4{ 5 10 15 20 } ] [
5.0 double-4{ 1 2 3 4 }
[ { float double-4 } declare n*v ] compile-call
] unit-test
[ double-4{ 5 10 15 20 } ] [
double-4{ 1 2 3 4 } 5.0
[ { float double-4 } declare v*n ] compile-call
] unit-test
[ double-4{ 10 5 2 5 } ] [
10.0 double-4{ 1 2 5 2 }
[ { float double-4 } declare n/v ] compile-call
] unit-test
[ double-4{ 0.5 1 1.5 2 } ] [
double-4{ 1 2 3 4 } 2
[ { float double-4 } declare v/n ] compile-call
] unit-test
[ double-4{ 1 0 0 0 } ] [
double-4{ 10 0 0 0 }
[ { double-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
double-4{ 1 2 3 4 }
[ { double-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
double-4{ 1 0 0 0 }
double-4{ 0 1 0 0 }
[ { double-4 double-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
[ float-8{ 3 6 9 12 15 18 21 24 } ] [
float-8{ 1 2 3 4 5 6 7 8 }
float-8{ 2 4 6 8 10 12 14 16 }
[ { float-8 float-8 } declare v+ ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 1 2 3 4 5 6 7 8 }
float-8{ 2 4 6 8 10 12 14 16 }
[ { float-8 float-8 } declare v- ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
-0.5
float-8{ 2 4 6 8 10 12 14 16 }
[ { float float-8 } declare n*v ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 2 4 6 8 10 12 14 16 }
-0.5
[ { float-8 float } declare v*n ] compile-call
] unit-test
[ float-8{ 256 128 64 32 16 8 4 2 } ] [
256.0
float-8{ 1 2 4 8 16 32 64 128 }
[ { float float-8 } declare n/v ] compile-call
] unit-test
[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
float-8{ 2 4 6 8 10 12 14 16 }
-2.0
[ { float-8 float } declare v/n ] compile-call
] unit-test
! Test puns; only on x86
cpu x86? [
[ double-2{ 4 1024 } ] [
@ -362,3 +46,201 @@ cpu x86? [
[ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
] unit-test
] when
! Fuzz testing
CONSTANT: simd-classes
{
char-16
uchar-16
char-32
uchar-32
short-8
ushort-8
short-16
ushort-16
int-4
uint-4
int-8
uint-8
float-4
float-8
double-2
double-4
}
: with-ctors ( -- seq )
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
: boa-ctors ( -- seq )
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
: check-optimizer ( seq inputs quot eq-quot -- )
'[
@
[ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
[ [ call ] dip call ]
[ [ call ] dip compile-call ] 2tri @ not
] filter ; inline
"== Checking -new constructors" print
[ { } ] [
simd-classes [ [ [ ] ] dip '[ _ new ] ] [ = ] check-optimizer
] unit-test
[ { } ] [
simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
] unit-test
"== Checking -with constructors" print
[ { } ] [
with-ctors [
[ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
] [ = ] check-optimizer
] unit-test
"== Checking -boa constructors" print
[ { } ] [
boa-ctors [
dup stack-effect in>> length
[ nip [ 1000 random ] [ ] replicate-as ]
[ fixnum <array> swap '[ _ declare _ execute ] ]
2bi
] [ = ] check-optimizer
] unit-test
"== Checking vector operations" print
: random-vector ( class -- vec )
new [ drop 1000 random ] map ;
:: check-vector-op ( word inputs class elt-class -- inputs quot )
inputs [
[
{
{ +vector+ [ class random-vector ] }
{ +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
} case
] [ ] map-as
] [
[
{
{ +vector+ [ class ] }
{ +scalar+ [ elt-class ] }
} case
] map
] bi
word '[ _ declare _ execute ] ;
: remove-float-words ( alist -- alist' )
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip
float = [ remove-float-words ] unless ;
: check-vector-ops ( class elt-class compare-quot -- )
[
[ nip ops-to-check ] 2keep
'[ first2 inputs _ _ check-vector-op ]
] dip check-optimizer ; inline
: approx= ( x y -- ? )
{
{ [ 2dup [ float? ] both? ] [ -1.e8 ~ ] }
{ [ 2dup [ sequence? ] both? ] [
[
{
{ [ 2dup [ fp-nan? ] both? ] [ 2drop t ] }
{ [ 2dup [ fp-nan? ] either? not ] [ -1.e8 ~ ] }
} cond
] 2all?
] }
} cond ;
: simd-classes&reps ( -- alist )
simd-classes [
{
{ [ dup name>> "float" head? ] [ float [ approx= ] ] }
{ [ dup name>> "double" tail? ] [ float [ = ] ] }
[ fixnum [ = ] ]
} cond 3array
] map ;
simd-classes&reps [
[ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
] each
! Other regressions
[ 8000000 ] [
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
] unit-test
! Vector alien intrinsics
[ float-4{ 1 2 3 4 } ] [
[
float-4{ 1 2 3 4 }
underlying>> 0 float-4-rep alien-vector
] compile-call float-4 boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x float-4 }
{ y double-2 }
{ z double-4 }
{ w float-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test
[ ] [ char-16 new 1array stack. ] unit-test

View File

@ -1,185 +1,32 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays cpu.architecture
kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.simd.intrinsics
math.vectors.specialization parser prettyprint.custom sequences
sequences.private locals assocs words fry ;
FROM: alien.c-types => float ;
QUALIFIED-WITH: math m
USING: alien.c-types combinators fry kernel lexer math math.parser
math.vectors.simd.functor sequences splitting vocabs.generated
vocabs.loader vocabs.parser words ;
IN: math.vectors.simd
<<
DEFER: float-4
DEFER: double-2
DEFER: float-8
DEFER: double-4
"double" define-simd-128
"float" define-simd-128
"double" define-simd-256
"float" define-simd-256
>>
: float-4-with ( x -- simd-array )
[ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
: float-4-boa ( a b c d -- simd-array )
\ float-4 new 4sequence ;
: double-2-with ( x -- simd-array )
[ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
: double-2-boa ( a b -- simd-array )
\ double-2 new 2sequence ;
! More efficient expansions for the above, used when SIMD is
! actually available.
<<
\ float-4-with [
drop
\ (simd-broadcast) "intrinsic" word-prop [
[ >float float-4-rep (simd-broadcast) \ float-4 boa ]
] [ \ float-4-with def>> ] if
] "custom-inlining" set-word-prop
\ float-4-boa [
drop
\ (simd-gather-4) "intrinsic" word-prop [
[| a b c d |
a >float b >float c >float d >float
float-4-rep (simd-gather-4) \ float-4 boa
]
] [ \ float-4-boa def>> ] if
] "custom-inlining" set-word-prop
\ double-2-with [
drop
\ (simd-broadcast) "intrinsic" word-prop [
[ >float double-2-rep (simd-broadcast) \ double-2 boa ]
] [ \ double-2-with def>> ] if
] "custom-inlining" set-word-prop
\ double-2-boa [
drop
\ (simd-gather-4) "intrinsic" word-prop [
[ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
] [ \ double-2-boa def>> ] if
] "custom-inlining" set-word-prop
>>
: float-8-with ( x -- simd-array )
[ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
\ float-8 boa ; inline
:: float-8-boa ( a b c d e f g h -- simd-array )
a b c d float-4-boa
e f g h float-4-boa
[ underlying>> ] bi@
\ float-8 boa ; inline
: double-4-with ( x -- simd-array )
[ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
\ double-4 boa ; inline
:: double-4-boa ( a b c d -- simd-array )
a b double-2-boa
c d double-2-boa
[ underlying>> ] bi@
\ double-4 boa ; inline
<<
ERROR: bad-vector-size bits ;
<PRIVATE
! Filter out operations that are not available, eg horizontal adds
! on SSE2. Fallback code in math.vectors is used in that case.
: simd-vocab ( type -- vocab )
"math.vectors.simd.instances." prepend ;
: supported-simd-ops ( assoc -- assoc' )
{
{ v+ (simd-v+) }
{ v- (simd-v-) }
{ v* (simd-v*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
{ vmax (simd-vmax) }
{ sum (simd-sum) }
} [ nip "intrinsic" word-prop ] assoc-filter
'[ drop _ key? ] assoc-filter ;
! Some SIMD operations are defined in terms of others.
:: high-level-ops ( ctor -- assoc )
{
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] }
{ v-n [ ctor execute v- ] }
{ n*v [ [ ctor execute ] dip v* ] }
{ v*n [ ctor execute v* ] }
{ n/v [ [ ctor execute ] dip v/ ] }
{ v/n [ ctor execute v/ ] }
{ norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
{ distance [ v- norm ] }
} ;
:: simd-vector-words ( class ctor elt-type assoc -- )
class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
specialize-vector-words ;
: parse-simd-name ( string -- c-type quot )
"-" split1
[ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
* 8 * {
{ 128 [ [ define-simd-128 ] ] }
{ 256 [ [ define-simd-256 ] ] }
[ bad-vector-size ]
} case ;
PRIVATE>
\ float-4 \ float-4-with m:float H{
{ v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
{ v- [ [ (simd-v-) ] float-4-vv->v-op ] }
{ v* [ [ (simd-v*) ] float-4-vv->v-op ] }
{ v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
{ sum [ [ (simd-sum) ] float-4-v->n-op ] }
} simd-vector-words
: define-simd-vocab ( type -- vocab )
[ simd-vocab ]
[ '[ _ parse-simd-name call( type -- ) ] ] bi
generate-vocab ;
\ double-2 \ double-2-with m:float H{
{ v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
{ v- [ [ (simd-v-) ] double-2-vv->v-op ] }
{ v* [ [ (simd-v*) ] double-2-vv->v-op ] }
{ v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
{ sum [ [ (simd-sum) ] double-2-v->n-op ] }
} simd-vector-words
\ float-8 \ float-8-with m:float H{
{ v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
{ v- [ [ (simd-v-) ] float-8-vv->v-op ] }
{ v* [ [ (simd-v*) ] float-8-vv->v-op ] }
{ v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
{ sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
} simd-vector-words
\ double-4 \ double-4-with m:float H{
{ v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
{ v- [ [ (simd-v-) ] double-4-vv->v-op ] }
{ v* [ [ (simd-v*) ] double-4-vv->v-op ] }
{ v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
{ sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
} simd-vector-words
>>
USE: vocabs.loader
"math.vectors.simd.alien" require
SYNTAX: SIMD:
scan define-simd-vocab use-vocab ;

View File

@ -0,0 +1 @@
Single-instruction-multiple-data parallel vector operations

View File

@ -53,10 +53,14 @@ H{
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
@ -68,6 +72,11 @@ H{
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
}
PREDICATE: vector-word < word vector-words key? ;

View File

@ -1,4 +1,4 @@
USING: help.markup help.syntax math sequences ;
USING: help.markup help.syntax math math.functions sequences ;
IN: math.vectors
ARTICLE: "math-vectors" "Vector arithmetic"
@ -14,18 +14,46 @@ $nl
{ $subsection n+v }
{ $subsection v-n }
{ $subsection n-v }
"Combining two vectors to form another vector with " { $link 2map } ":"
"Vector unary operations:"
{ $subsection vneg }
{ $subsection vabs }
{ $subsection vsqrt }
{ $subsection vfloor }
{ $subsection vceiling }
{ $subsection vtruncate }
"Vector/vector binary operations:"
{ $subsection v+ }
{ $subsection v- }
{ $subsection v+- }
{ $subsection v* }
{ $subsection v/ }
"Saturated arithmetic (only on " { $link "specialized-arrays" } "):"
{ $subsection vs+ }
{ $subsection vs- }
{ $subsection vs* }
"Comparisons:"
{ $subsection vmax }
{ $subsection vmin }
"Bitwise operations:"
{ $subsection vbitand }
{ $subsection vbitor }
{ $subsection vbitxor }
"Inner product and norm:"
{ $subsection v. }
{ $subsection norm }
{ $subsection norm-sq }
{ $subsection normalize } ;
{ $subsection normalize }
"Comparing vectors:"
{ $subsection distance }
{ $subsection v~ }
"Other functions:"
{ $subsection vsupremum }
{ $subsection vinfimum }
{ $subsection trilerp }
{ $subsection bilerp }
{ $subsection vlerp }
{ $subsection vnlerp }
{ $subsection vbilerp } ;
ABOUT: "math-vectors"
@ -33,6 +61,43 @@ HELP: vneg
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
{ $description "Negates each element of " { $snippet "u" } "." } ;
HELP: vabs
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of non-negative real numbers" } }
{ $description "Takes the absolute value of each element of " { $snippet "u" } "." } ;
HELP: vsqrt
{ $values { "u" "a sequence of non-negative real numbers" } { "v" "a sequence of non-negative real numbers" } }
{ $description "Takes the square root of each element of " { $snippet "u" } "." }
{ $warning "For performance reasons, this does not work with negative inputs, unlike " { $link sqrt } "." } ;
HELP: vfloor
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
{ $description "Takes the " { $link floor } " of each element of " { $snippet "u" } "." } ;
HELP: vceiling
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
{ $description "Takes the " { $link ceiling } " of each element of " { $snippet "u" } "." } ;
HELP: vtruncate
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } }
{ $description "Truncates each element of " { $snippet "u" } "." } ;
HELP: n+v
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
HELP: v+n
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
HELP: n-v
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
{ $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ;
HELP: v-n
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ;
HELP: n*v
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
{ $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
@ -43,11 +108,13 @@ HELP: v*n
HELP: n/v
{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } ;
{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." }
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
HELP: v/n
{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." }
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
HELP: v+
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
@ -57,6 +124,17 @@ HELP: v-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
HELP: v+-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
{ $examples
{ $example
"USING: math.vectors prettyprint ;"
"{ 1 2 3 } { 2 3 2 } v+- ."
"{ -1 5 1 }"
}
} ;
HELP: [v-]
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;
@ -68,7 +146,7 @@ HELP: v*
HELP: v/
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Divides " { $snippet "u" } " by " { $snippet "v" } " component-wise." }
{ $errors "Throws an error if an integer division by zero occurs." } ;
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
HELP: vmax
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
@ -85,9 +163,52 @@ HELP: v.
{ $description "Computes the real-valued dot product." }
{ $notes
"This word can also take complex number sequences as input, however mathematically it will compute the wrong result. The complex-valued dot product is defined differently:"
{ $snippet "0 [ conjugate * + ] 2reduce" }
{ $code "0 [ conjugate * + ] 2reduce" }
} ;
HELP: vs+
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
{ $examples
"With saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
"uchar-array{ 170 255 220 }"
}
"Without saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
"uchar-array{ 170 14 220 }"
}
} ;
HELP: vs-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
HELP: vs*
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
HELP: vbitand
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
HELP: vbitor
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $notes "Unlike " { $link bitor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
HELP: vbitxor
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Takes the bitwise exclusive or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $notes "Unlike " { $link bitxor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ;
@ -100,6 +221,10 @@ HELP: normalize
{ $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } }
{ $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ;
HELP: distance
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Outputs the Euclidean distance between two vectors." } ;
HELP: set-axis
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } }
{ $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." }
@ -108,3 +233,5 @@ HELP: set-axis
{ 2map v+ v- v* v/ } related-words
{ 2reduce v. } related-words
{ vs+ vs- vs* } related-words

View File

@ -17,4 +17,6 @@ USING: math.vectors tools.test ;
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test

View File

@ -1,9 +1,12 @@
! Copyright (C) 2005, 2008 Slava Pestov.
! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays kernel sequences math math.functions hints
math.order ;
USING: arrays alien.c-types kernel sequences math math.functions
hints math.order math.libm fry combinators ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors
GENERIC: element-type ( obj -- c-type )
: vneg ( u -- v ) [ neg ] map ;
: v+n ( u n -- v ) [ + ] curry map ;
@ -24,9 +27,43 @@ IN: math.vectors
: vmax ( u v -- w ) [ max ] 2map ;
: vmin ( u v -- w ) [ min ] 2map ;
: vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ;
: v+- ( u v -- w )
[ t ] 2dip
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ;
<PRIVATE
: 2saturate-map ( u v quot -- w )
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
PRIVATE>
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
: vs- ( u v -- w ) [ - ] 2saturate-map ;
: vs* ( u v -- w ) [ * ] 2saturate-map ;
: vabs ( u -- v ) [ abs ] map ;
: vsqrt ( u -- v ) [ >float fsqrt ] map ;
<PRIVATE
: fp-bitwise-op ( x y seq quot -- z )
swap element-type {
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
[ drop call ]
} case ; inline
PRIVATE>
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
: vfloor ( u -- v ) [ floor ] map ;
: vceiling ( u -- v ) [ ceiling ] map ;
: vtruncate ( u -- v ) [ truncate ] map ;
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ;
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ;

View File

@ -173,6 +173,7 @@ M: tuple pprint*
] when ;
: pprint-elements ( seq -- )
>array
do-length-limit
[ [ pprint* ] each ] dip
[ "~" swap number>string " more~" 3append text ] when* ;

View File

@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors
kernel arrays combinators compiler compiler.units classes.struct
combinators.smart compiler.tree.debugger math libc destructors
sequences.private multiline eval words vocabs namespaces
assocs prettyprint alien.data ;
assocs prettyprint alien.data math.vectors ;
FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: int
@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort
SPECIALIZED-ARRAY: char
SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: float
SPECIALIZED-ARRAY: ulonglong
[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test

View File

@ -1,10 +1,11 @@
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser assocs
byte-arrays classes compiler.units functors kernel lexer libc math
math.vectors.specialization namespaces parser prettyprint.custom
sequences sequences.private strings summary vocabs vocabs.loader
vocabs.parser words fry combinators ;
USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer
libc math math.vectors math.vectors.specialization namespaces
parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators present ;
IN: specialized-arrays
MIXIN: specialized-array
@ -53,14 +54,14 @@ TUPLE: A
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline
: <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline
: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline
: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
: byte-array>A ( byte-array -- specialized-array )
dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless
dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ;
M: A resize
[
[ T heap-size * ] [ underlying>> ] bi*
[ \ T heap-size * ] [ underlying>> ] bi*
resize-byte-array
] [ drop ] 2bi
<direct-A> ; inline
M: A byte-length length T heap-size * ; inline
M: A byte-length length \ T heap-size * ; inline
M: A element-type drop \ T ; inline
M: A direct-array-syntax drop \ A@ ;
@ -116,24 +119,15 @@ M: word (underlying-type) "c-type" word-prop ;
} cond ;
: underlying-type-name ( c-type -- name )
underlying-type dup word? [ name>> ] when ;
underlying-type present ;
: specialized-array-vocab ( c-type -- vocab )
"specialized-arrays.instances." prepend ;
present "specialized-arrays.instances." prepend ;
PRIVATE>
: generate-vocab ( vocab-name quot -- vocab )
[ dup vocab [ ] ] dip '[
[
[
_ with-current-vocab
] with-compilation-unit
] keep
] ?if ; inline
: define-array-vocab ( type -- vocab )
underlying-type-name
underlying-type
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
generate-vocab ;

View File

@ -16,8 +16,8 @@ ARTICLE: "specialized-vector-words" "Specialized vector words"
}
"Behind the scenes, these words are placed in a vocabulary named " { $snippet "specialized-vectors.instances.T" } ", however this vocabulary should not be placed in a " { $link POSTPONE: USING: } " form directly. Instead, always use " { $link POSTPONE: SPECIALIZED-VECTOR: } ". This ensures that the vocabulary can get generated the first time it is needed." ;
ARTICLE: "specialized-vector-c" "Passing specialized arrays to C functions"
"Each specialized array has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ;
ARTICLE: "specialized-vector-c" "Passing specialized vectors to C functions"
"Each specialized vector has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ;
ARTICLE: "specialized-vectors" "Specialized vectors"
"The " { $vocab-link "specialized-vectors" } " vocabulary implements resizable sequence types for storing machine values in a space-efficient manner without boxing."

View File

@ -3,7 +3,7 @@
USING: accessors alien.c-types assocs compiler.units functors
growable kernel lexer namespaces parser prettyprint.custom
sequences specialized-arrays specialized-arrays.private strings
vocabs vocabs.parser fry ;
vocabs vocabs.parser vocabs.generated fry ;
QUALIFIED: vectors.functor
IN: specialized-vectors

View File

@ -0,0 +1,13 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: compiler.units fry kernel vocabs vocabs.parser ;
IN: vocabs.generated
: generate-vocab ( vocab-name quot -- vocab )
[ dup vocab [ ] ] dip '[
[
[
_ with-current-vocab
] with-compilation-unit
] keep
] ?if ; inline

View File

@ -434,11 +434,15 @@ HELP: byte-array>bignum
{ $description "Converts a byte-array, interpreted as little-endian, into a bignum integer. User code should call " { $link le> } " or " { $link be> } " instead." } ;
ARTICLE: "division-by-zero" "Division by zero"
"Floating point division never raises an error if the denominator is zero. This means that if at least one of the two inputs to " { $link / } ", " { $link /f } " or " { $link mod } " is a float, the result will be a floating point infinity or not a number value."
"Behavior of division operations when a denominator of zero is used depends on the data types in question, as well as the platform being used."
$nl
"Floating point division only throws an error if the appropriate traps are enabled in the floating point environment. If traps are disabled, a Not-a-number value or an infinity is output, depending on whether the numerator is zero or non-zero."
$nl
"Floating point traps are disabled by default and the " { $vocab-link "math.floats.env" } " vocabulary provides words to enable them. Floating point division is performed by " { $link / } ", " { $link /f } " or " { $link mod } " if at least one of the two inputs is a float. Floating point division is always performed by " { $link /f } "."
$nl
"The behavior of integer division is hardware specific. On x86 processors, " { $link /i } " and " { $link mod } " raise an error if both inputs are integers and the denominator is zero. On PowerPC, integer division by zero yields a result of zero."
$nl
"On the other hand, the " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ;
"The " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ;
ARTICLE: "number-protocol" "Number protocol"
"Math operations obey certain numerical upgrade rules. If one of the inputs is a bignum and the other is a fixnum, the latter is first coerced to a bignum; if one of the inputs is a float, the other is coerced to a float."
@ -459,7 +463,8 @@ $nl
{ $subsection > }
{ $subsection >= }
"Numbers can be compared for equality using " { $link = } ", or a less precise test which disregards types:"
{ $subsection number= } ;
{ $subsection number= }
{ $see-also "math.floats.compare" } ;
ARTICLE: "modular-arithmetic" "Modular arithmetic"
{ $subsection mod }

View File

@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants
math.functions math.vectors math.vectors.simd prettyprint
combinators.smart sequences hints classes.struct
specialized-arrays ;
SIMD: double-4
IN: benchmark.nbody-simd
: solar-mass ( -- x ) 4 pi sq * ; inline

View File

@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp
io.encodings.binary kernel math math.constants math.functions
math.vectors math.vectors.simd math.parser make sequences
sequences.private words hints classes.struct ;
SIMD: double-4
IN: benchmark.raytracer-simd
! parameters

View File

@ -2,6 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel io math math.functions math.parser math.vectors
math.vectors.simd sequences specialized-arrays ;
SIMD: float-4
SPECIALIZED-ARRAY: float-4
IN: benchmark.simd-1

View File

@ -33,7 +33,7 @@ USING: mason.child mason.config tools.test namespaces io kernel sequences ;
] with-scope
] unit-test
[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" "-sse-version=30" } ] [
[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" } ] [
[
"winnt" target-os set
"x86.32" target-cpu set

View File

@ -34,7 +34,6 @@ IN: mason.child
factor-vm ,
"-i=" boot-image-name append ,
"-no-user-init" ,
target-cpu get { "x86.32" "x86.64" } member? [ "-sse-version=30" , ] when
] { } make ;
: boot ( -- )

View File

@ -66,12 +66,12 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
DEF(bool,sse_version,(void)):
mov $0x1,RETURN_REG
cpuid
/* test $0x100000,%ecx
test $0x100000,%ecx
jnz sse_42
test $0x80000,%ecx
jnz sse_41
test $0x200,%ecx
jnz ssse_3 */
jnz ssse_3
test $0x1,%ecx
jnz sse_3
test $0x4000000,%edx