diff --git a/basis/alien/c-types/c-types-tests.factor b/basis/alien/c-types/c-types-tests.factor index 792e7d416a..a893ffebe8 100644 --- a/basis/alien/c-types/c-types-tests.factor +++ b/basis/alien/c-types/c-types-tests.factor @@ -1,5 +1,6 @@ USING: alien alien.syntax alien.c-types kernel tools.test -sequences system libc alien.strings io.encodings.utf8 ; +sequences system libc alien.strings io.encodings.utf8 +math.constants ; IN: alien.c-types.tests CONSTANT: xyz 123 @@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE os windows? cpu x86.64? and [ [ -2147467259 ] [ 2147500037 *long ] unit-test ] when + +[ 0 ] [ -10 uchar c-type-clamp ] unit-test +[ 12 ] [ 12 uchar c-type-clamp ] unit-test +[ -10 ] [ -10 char c-type-clamp ] unit-test +[ 127 ] [ 230 char c-type-clamp ] unit-test +[ t ] [ pi dup float c-type-clamp = ] unit-test diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index fa27e29c04..1ad4f75a3c 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -1,11 +1,12 @@ ! Copyright (C) 2004, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: byte-arrays arrays assocs kernel kernel.private math -namespaces make parser sequences strings words splitting math.parser -cpu.architecture alien alien.accessors alien.strings quotations -layouts system compiler.units io io.files io.encodings.binary -io.streams.memory accessors combinators effects continuations fry -classes vocabs vocabs.loader words.symbol ; +math.order math.parser namespaces make parser sequences strings +words splitting cpu.architecture alien alien.accessors +alien.strings quotations layouts system compiler.units io +io.files io.encodings.binary io.streams.memory accessors +combinators effects continuations fry classes vocabs +vocabs.loader words.symbol ; QUALIFIED: math IN: alien.c-types @@ -472,3 +473,25 @@ SYMBOLS: \ ulong \ size_t typedef ] with-compilation-unit +M: char-16-rep rep-component-type drop char ; +M: uchar-16-rep rep-component-type drop uchar ; +M: short-8-rep rep-component-type drop short ; +M: ushort-8-rep rep-component-type drop ushort ; +M: int-4-rep rep-component-type drop int ; +M: uint-4-rep rep-component-type drop uint ; +M: float-4-rep rep-component-type drop float ; +M: double-2-rep rep-component-type drop double ; + +: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable +: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable +: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable +: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable + +: c-type-interval ( c-type -- from to ) + { + { [ dup { float double } memq? ] [ drop -1/0. 1/0. ] } + { [ dup { char short int long longlong } memq? ] [ signed-interval ] } + { [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] } + } cond ; foldable + +: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline diff --git a/basis/checksums/openssl/openssl.factor b/basis/checksums/openssl/openssl.factor index bc70230fd0..095ab38ace 100644 --- a/basis/checksums/openssl/openssl.factor +++ b/basis/checksums/openssl/openssl.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2008 Slava Pestov +! copyright (C) 2008 Slava Pestov ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays alien.c-types alien.data kernel continuations destructors sequences io openssl openssl.libcrypto @@ -23,10 +23,10 @@ TUPLE: evp-md-context < disposable handle ; : ( -- ctx ) evp-md-context new-disposable - EVP_MD_CTX dup EVP_MD_CTX_init >>handle ; + EVP_MD_CTX_create >>handle ; M: evp-md-context dispose* - handle>> EVP_MD_CTX_cleanup drop ; + handle>> EVP_MD_CTX_destroy ; : with-evp-md-context ( quot -- ) maybe-init-ssl [ ] dip with-disposal ; inline diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 7c28198f67..874093ed40 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -305,16 +305,36 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##saturated-add-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##add-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##sub-vector def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##saturated-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##mul-vector def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##saturated-mul-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##div-vector def: dst use: src1 src2 @@ -330,14 +350,34 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##horizontal-add-vector +def: dst/scalar-rep +use: src +literal: rep ; + +PURE-INSN: ##abs-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##sqrt-vector def: dst use: src literal: rep ; -PURE-INSN: ##horizontal-add-vector -def: dst/scalar-rep -use: src +PURE-INSN: ##and-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##or-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##xor-vector +def: dst +use: src1 src2 literal: rep ; ! Boxing and unboxing aliens diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 0daab82395..d2f158f06d 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -151,27 +151,31 @@ IN: compiler.cfg.intrinsics { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } } enable-intrinsics ; -: enable-sse2-simd ( -- ) +: enable-simd ( -- ) { { math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; -: enable-sse3-simd ( -- ) - { - { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - } enable-intrinsics ; - : emit-intrinsic ( node word -- ) "intrinsic" word-prop call( node -- ) ; diff --git a/basis/compiler/cfg/ssa/cssa/cssa.factor b/basis/compiler/cfg/ssa/cssa/cssa.factor index 14287e900f..d58cebac65 100644 --- a/basis/compiler/cfg/ssa/cssa/cssa.factor +++ b/basis/compiler/cfg/ssa/cssa/cssa.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs kernel locals fry +USING: accessors assocs kernel locals fry sequences cpu.architecture compiler.cfg.rpo +compiler.cfg.def-use compiler.cfg.utilities compiler.cfg.registers compiler.cfg.instructions @@ -13,10 +14,19 @@ IN: compiler.cfg.ssa.cssa ! selection, so it must keep track of representations when introducing ! new values. +: insert-copy? ( bb vreg -- ? ) + ! If the last instruction defines a value (which means it is + ! ##fixnum-add, ##fixnum-sub or ##fixnum-mul) then we don't + ! need to insert a copy since in fact doing so will result + ! in incorrect code. + [ instructions>> last defs-vreg ] dip eq? not ; + :: insert-copy ( bb src rep -- bb dst ) - rep next-vreg-rep :> dst - bb [ dst src rep src rep-of emit-conversion ] add-instructions - bb dst ; + bb src insert-copy? [ + rep next-vreg-rep :> dst + bb [ dst src rep src rep-of emit-conversion ] add-instructions + bb dst + ] [ bb src ] if ; : convert-phi ( ##phi -- ) dup dst>> rep-of '[ [ _ insert-copy ] assoc-map ] change-inputs drop ; diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index 20fa1d0b18..45d248f8f4 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -47,11 +47,18 @@ UNION: two-operand-insn ##min-float ##max-float ##add-vector + ##saturated-add-vector + ##add-sub-vector ##sub-vector + ##saturated-sub-vector ##mul-vector + ##saturated-mul-vector ##div-vector ##min-vector - ##max-vector ; + ##max-vector + ##and-vector + ##or-vector + ##xor-vector ; GENERIC: convert-two-operand* ( insn -- ) diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index e1551f54c0..43d11b5d4f 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -169,13 +169,21 @@ CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector +CODEGEN: ##saturated-add-vector %saturated-add-vector +CODEGEN: ##add-sub-vector %add-sub-vector CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##saturated-sub-vector %saturated-sub-vector CODEGEN: ##mul-vector %mul-vector +CODEGEN: ##saturated-mul-vector %saturated-mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector +CODEGEN: ##abs-vector %abs-vector +CODEGEN: ##and-vector %and-vector +CODEGEN: ##or-vector %or-vector +CODEGEN: ##xor-vector %xor-vector CODEGEN: ##box-alien %box-alien CODEGEN: ##box-displaced-alien %box-displaced-alien CODEGEN: ##unbox-alien %unbox-alien diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor index 14ed2294c7..3dbde076a6 100644 --- a/basis/compiler/tests/codegen.factor +++ b/basis/compiler/tests/codegen.factor @@ -3,7 +3,7 @@ math hashtables.private math.private namespaces sequences tools.test namespaces.private slots.private sequences.private byte-arrays alien alien.accessors layouts words definitions compiler.units io combinators vectors grouping make alien.c-types combinators.short-circuit -math.order math.libm math.parser ; +math.order math.libm math.parser alien.c-types ; FROM: math => float ; QUALIFIED: namespaces.private IN: compiler.tests.codegen @@ -431,3 +431,21 @@ cell 4 = [ ] curry each-integer ] compile-call ] unit-test + +TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-only } ; + +[ 2 ] [ + little-endian? + T{ myseq f B{ 1 0 0 0 } B{ 1 0 0 0 } } + T{ myseq f B{ 0 0 0 1 } B{ 0 0 0 1 } } ? + [ + { myseq } declare + [ 0 2 ] dip dup + [ + [ + over 1 < [ underlying1>> ] [ [ 1 - ] dip underlying2>> ] if + swap 4 * >fixnum alien-signed-4 + ] bi-curry@ bi * + + ] 2curry each-integer + ] compile-call +] unit-test diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index 621b8d082b..d4780b335b 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -18,6 +18,7 @@ compiler.tree.propagation.constraints compiler.tree.propagation.call-effect compiler.tree.propagation.transforms compiler.tree.propagation.simd ; +FROM: alien.c-types => (signed-interval) (unsigned-interval) ; IN: compiler.tree.propagation.known-words { + - * / } @@ -260,15 +261,9 @@ generic-comparison-ops [ alien-unsigned-8 } [ dup name>> { - { - [ "alien-signed-" ?head ] - [ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ] - } - { - [ "alien-unsigned-" ?head ] - [ string>number 8 * 2^ 1 - 0 swap [a,b] ] - } - } cond + { [ "alien-signed-" ?head ] [ string>number (signed-interval) ] } + { [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] } + } cond [a,b] [ fits-in-fixnum? fixnum integer ? ] keep '[ 2drop _ ] "outputs" set-word-prop ] each diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 3baa7cdcbf..fadb382398 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,46 +1,45 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators fry +USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd -\ (simd-v+) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v-) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v*) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v/) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop +{ + (simd-v+) + (simd-v-) + (simd-v+-) + (simd-v*) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-sum) + (simd-vabs) + (simd-vsqrt) + (simd-vbitand) + (simd-vbitor) + (simd-vbitxor) + (simd-broadcast) + (simd-gather-2) + (simd-gather-4) + alien-vector +} [ { byte-array } "default-output-classes" set-word-prop ] each \ (simd-sum) [ nip dup literal?>> [ literal>> scalar-rep-of { { float-rep [ float ] } { double-rep [ float ] } + { int-rep [ integer ] } } case ] [ drop real ] if ] "outputs" set-word-prop -\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop - \ assert-positive [ real [0,inf] value-info-intersect ] "outputs" set-word-prop -\ alien-vector { byte-array } "default-output-classes" set-word-prop - ! If SIMD is not available, inline alien-vector and set-alien-vector ! to get a speedup : inline-unless-intrinsic ( word -- ) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index fbec9f697a..2dbe724f0a 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -22,8 +22,6 @@ SINGLETONS: float-rep double-rep ; ! On x86, floating point registers are really vector registers SINGLETONS: -float-4-rep -double-2-rep char-16-rep uchar-16-rep short-8-rep @@ -31,9 +29,11 @@ ushort-8-rep int-4-rep uint-4-rep ; -UNION: vector-rep +SINGLETONS: float-4-rep -double-2-rep +double-2-rep ; + +UNION: int-vector-rep char-16-rep uchar-16-rep short-8-rep @@ -41,6 +41,14 @@ ushort-8-rep int-4-rep uint-4-rep ; +UNION: float-vector-rep +float-4-rep +double-2-rep ; + +UNION: vector-rep +int-vector-rep +float-vector-rep ; + UNION: representation any-rep tagged-rep @@ -76,10 +84,15 @@ M: double-rep rep-size drop 8 ; M: stack-params rep-size drop cell ; M: vector-rep rep-size drop 16 ; +GENERIC: rep-component-type ( rep -- n ) + +! Methods defined in alien.c-types + GENERIC: scalar-rep-of ( rep -- rep' ) M: float-4-rep scalar-rep-of drop float-rep ; M: double-2-rep scalar-rep-of drop double-rep ; +M: int-vector-rep scalar-rep-of drop int-rep ; ! Mapping from register class to machine registers HOOK: machine-registers cpu ( -- assoc ) @@ -167,15 +180,42 @@ HOOK: %unbox-vector cpu ( dst src rep -- ) HOOK: %broadcast-vector cpu ( dst src rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) - HOOK: %add-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) +HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- ) +HOOK: %abs-vector cpu ( dst src rep -- ) +HOOK: %and-vector cpu ( dst src1 src2 rep -- ) +HOOK: %or-vector cpu ( dst src1 src2 rep -- ) +HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) + +HOOK: %broadcast-vector-reps cpu ( -- reps ) +HOOK: %gather-vector-2-reps cpu ( -- reps ) +HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %add-vector-reps cpu ( -- reps ) +HOOK: %saturated-add-vector-reps cpu ( -- reps ) +HOOK: %add-sub-vector-reps cpu ( -- reps ) +HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %saturated-sub-vector-reps cpu ( -- reps ) +HOOK: %mul-vector-reps cpu ( -- reps ) +HOOK: %saturated-mul-vector-reps cpu ( -- reps ) +HOOK: %div-vector-reps cpu ( -- reps ) +HOOK: %min-vector-reps cpu ( -- reps ) +HOOK: %max-vector-reps cpu ( -- reps ) +HOOK: %sqrt-vector-reps cpu ( -- reps ) +HOOK: %horizontal-add-vector-reps cpu ( -- reps ) +HOOK: %abs-vector-reps cpu ( -- reps ) +HOOK: %and-vector-reps cpu ( -- reps ) +HOOK: %or-vector-reps cpu ( -- reps ) +HOOK: %xor-vector-reps cpu ( -- reps ) HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 85db5fb09c..7a7d1befd9 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -322,4 +322,4 @@ os windows? [ 4 "double" c-type (>>align) ] unless -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index 0528733af1..af13546657 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -249,4 +249,4 @@ USE: vocabs.loader { [ os winnt? ] [ "cpu.x86.64.winnt" require ] } } cond -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/features/features.factor b/basis/cpu/x86/features/features.factor index c5cf2d470a..b21aa762d8 100644 --- a/basis/cpu/x86/features/features.factor +++ b/basis/cpu/x86/features/features.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: system kernel math math.order math.parser namespaces -alien.c-types alien.syntax combinators locals init io cpu.x86 +USING: system kernel memoize math math.order math.parser +namespaces alien.c-types alien.syntax combinators locals init io compiler compiler.units accessors ; IN: cpu.x86.features @@ -13,7 +13,18 @@ FUNCTION: longlong read_timestamp_counter ( ) ; PRIVATE> -ALIAS: sse-version sse_version +MEMO: sse-version ( -- n ) + sse_version + "sse-version" get string>number [ min ] when* ; + +[ \ sse-version reset-memoized ] "cpu.x86.features" add-init-hook + +: sse? ( -- ? ) sse-version 10 >= ; +: sse2? ( -- ? ) sse-version 20 >= ; +: sse3? ( -- ? ) sse-version 30 >= ; +: ssse3? ( -- ? ) sse-version 33 >= ; +: sse4.1? ( -- ? ) sse-version 41 >= ; +: sse4.2? ( -- ? ) sse-version 42 >= ; : sse-string ( version -- string ) { @@ -32,37 +43,3 @@ M: x86 instruction-count read_timestamp_counter ; : count-instructions ( quot -- n ) instruction-count [ call ] dip instruction-count swap - ; inline - -USING: cpu.x86.features cpu.x86.features.private ; - -:: install-sse-check ( version -- ) - [ - sse-version version < [ - "This image was built to use " write - version sse-string write - " but your CPU only supports " write - sse-version sse-string write "." print - "You will need to bootstrap Factor again." print - flush - 1 exit - ] when - ] "cpu.x86" add-init-hook ; - -: enable-sse ( version -- ) - { - { 00 [ ] } - { 10 [ ] } - { 20 [ enable-sse2 ] } - { 30 [ enable-sse3 ] } - { 33 [ enable-sse3 ] } - { 41 [ enable-sse3 ] } - { 42 [ enable-sse3 ] } - } case ; - -[ { sse_version } compile ] with-optimizer - -"Checking for multimedia extensions: " write sse-version -"sse-version" get [ string>number min ] when* -[ sse-string write " detected" print ] -[ install-sse-check ] -[ enable-sse ] tri diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index d8e02fe516..1a96e93c63 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -4,7 +4,8 @@ USING: accessors assocs alien alien.c-types arrays strings cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.architecture kernel kernel.private math memory namespaces make sequences words system layouts combinators math.order fry locals -compiler.constants vm byte-arrays +compiler.constants byte-arrays io macros quotations cpu.x86.features +cpu.x86.features.private compiler compiler.units init vm compiler.cfg.registers compiler.cfg.instructions compiler.cfg.intrinsics @@ -248,12 +249,26 @@ M:: x86 %unbox-vector ( dst src rep -- ) dst src byte-array-offset [+] rep copy-register ; +MACRO: available-reps ( alist -- ) + ! Each SSE version adds new representations and supports + ! all old ones + unzip { } [ append ] accumulate rest swap suffix + [ [ 1quotation ] map ] bi@ zip + reverse [ { } ] suffix + '[ _ cond ] ; + M: x86 %broadcast-vector ( dst src rep -- ) { { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] } { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] } } case ; +M: x86 %broadcast-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) rep { { @@ -267,6 +282,11 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) } } case ; +M: x86 %gather-vector-4-reps + { + { sse? { float-4-rep } } + } available-reps ; + M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) rep { { @@ -278,6 +298,11 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) } } case ; +M: x86 %gather-vector-2-reps + { + { sse2? { double-2-rep } } + } available-reps ; + M: x86 %add-vector ( dst src1 src2 rep -- ) { { float-4-rep [ ADDPS ] } @@ -290,6 +315,36 @@ M: x86 %add-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PADDD ] } } case drop ; +M: x86 %add-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) + { + { char-16-rep [ PADDSB ] } + { uchar-16-rep [ PADDUSB ] } + { short-8-rep [ PADDSW ] } + { ushort-8-rep [ PADDUSW ] } + } case drop ; + +M: x86 %saturated-add-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } + } available-reps ; + +M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ADDSUBPS ] } + { double-2-rep [ ADDSUBPD ] } + } case drop ; + +M: x86 %add-sub-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %sub-vector ( dst src1 src2 rep -- ) { { float-4-rep [ SUBPS ] } @@ -302,42 +357,183 @@ M: x86 %sub-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PSUBD ] } } case drop ; +M: x86 %sub-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) + { + { char-16-rep [ PSUBSB ] } + { uchar-16-rep [ PSUBUSB ] } + { short-8-rep [ PSUBSW ] } + { ushort-8-rep [ PSUBUSW ] } + } case drop ; + +M: x86 %saturated-sub-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } + } available-reps ; + M: x86 %mul-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MULPS ] } { double-2-rep [ MULPD ] } - { int-4-rep [ PMULLW ] } + { short-8-rep [ PMULLW ] } + { ushort-8-rep [ PMULLW ] } + { int-4-rep [ PMULLD ] } + { uint-4-rep [ PMULLD ] } } case drop ; +M: x86 %mul-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %saturated-mul-vector-reps + ! No multiplication with saturation on x86 + { } ; + M: x86 %div-vector ( dst src1 src2 rep -- ) { { float-4-rep [ DIVPS ] } { double-2-rep [ DIVPD ] } } case drop ; +M: x86 %div-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + M: x86 %min-vector ( dst src1 src2 rep -- ) { + { char-16-rep [ PMINSB ] } + { uchar-16-rep [ PMINUB ] } + { short-8-rep [ PMINSW ] } + { ushort-8-rep [ PMINUW ] } + { int-4-rep [ PMINSD ] } + { uint-4-rep [ PMINUD ] } { float-4-rep [ MINPS ] } { double-2-rep [ MINPD ] } } case drop ; +M: x86 %min-vector-reps + { + { sse? { float-4-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + M: x86 %max-vector ( dst src1 src2 rep -- ) { + { char-16-rep [ PMAXSB ] } + { uchar-16-rep [ PMAXUB ] } + { short-8-rep [ PMAXSW ] } + { ushort-8-rep [ PMAXUW ] } + { int-4-rep [ PMAXSD ] } + { uint-4-rep [ PMAXUD ] } { float-4-rep [ MAXPS ] } { double-2-rep [ MAXPD ] } } case drop ; +M: x86 %max-vector-reps + { + { sse? { float-4-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %horizontal-add-vector ( dst src rep -- ) + { + { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } + { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } + } case ; + +M: x86 %horizontal-add-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + +M: x86 %abs-vector ( dst src rep -- ) + { + { char-16-rep [ PABSB ] } + { short-8-rep [ PABSW ] } + { int-4-rep [ PABSD ] } + } case ; + +M: x86 %abs-vector-reps + { + { ssse3? { char-16-rep short-8-rep int-4-rep } } + } available-reps ; + M: x86 %sqrt-vector ( dst src rep -- ) { { float-4-rep [ SQRTPS ] } { double-2-rep [ SQRTPD ] } } case ; -M: x86 %horizontal-add-vector ( dst src rep -- ) +M: x86 %sqrt-vector-reps { - { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } - { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } - } case ; + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + +M: x86 %and-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ANDPS ] } + { double-2-rep [ ANDPD ] } + { char-16-rep [ PAND ] } + { uchar-16-rep [ PAND ] } + { short-8-rep [ PAND ] } + { ushort-8-rep [ PAND ] } + { int-4-rep [ PAND ] } + { uint-4-rep [ PAND ] } + } case drop ; + +M: x86 %and-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %or-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ORPS ] } + { double-2-rep [ ORPD ] } + { char-16-rep [ POR ] } + { uchar-16-rep [ POR ] } + { short-8-rep [ POR ] } + { ushort-8-rep [ POR ] } + { int-4-rep [ POR ] } + { uint-4-rep [ POR ] } + } case drop ; + +M: x86 %or-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %xor-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ XORPS ] } + { double-2-rep [ XORPD ] } + { char-16-rep [ PXOR ] } + { uchar-16-rep [ PXOR ] } + { short-8-rep [ PXOR ] } + { ushort-8-rep [ PXOR ] } + { int-4-rep [ PXOR ] } + { uint-4-rep [ PXOR ] } + } case drop ; + +M: x86 %xor-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; M: x86 %unbox-alien ( dst src -- ) alien-offset [+] MOV ; @@ -767,15 +963,29 @@ M: x86 small-enough? ( n -- ? ) #! set up by the caller. stack-frame get total-size>> + stack@ ; -: enable-sse2 ( -- ) - enable-float-intrinsics - enable-fsqrt - enable-float-min/max - enable-sse2-simd ; - -: enable-sse3 ( -- ) - enable-sse2 - enable-sse3-simd ; - +enable-simd enable-min/max -enable-fixnum-log2 \ No newline at end of file +enable-fixnum-log2 + +:: install-sse2-check ( -- ) + [ + sse-version 20 < [ + "This image was built to use SSE2 but your CPU does not support it." print + "You will need to bootstrap Factor again." print + flush + 1 exit + ] when + ] "cpu.x86" add-init-hook ; + +: enable-sse2 ( version -- ) + 20 >= [ + enable-float-intrinsics + enable-fsqrt + enable-float-min/max + install-sse2-check + ] when ; + +: check-sse ( -- ) + [ { sse_version } compile ] with-optimizer + "Checking for multimedia extensions: " write sse-version + [ sse-string write " detected" print ] [ enable-sse2 ] bi ; diff --git a/basis/math/floats/env/x86/x86.factor b/basis/math/floats/env/x86/x86.factor index e91fc4eda9..e9120567aa 100644 --- a/basis/math/floats/env/x86/x86.factor +++ b/basis/math/floats/env/x86/x86.factor @@ -31,9 +31,7 @@ M: x87-env (set-fp-env-register) set_x87_env ; M: x86 (fp-env-registers) - sse-version 20 >= - [ 2array ] - [ 1array ] if ; + sse2? [ 2array ] [ 1array ] if ; CONSTANT: sse-exception-flag-bits HEX: 3f CONSTANT: sse-exception-flag>bit diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor deleted file mode 100644 index 87540dd9a5..0000000000 --- a/basis/math/vectors/simd/alien/alien-tests.factor +++ /dev/null @@ -1,70 +0,0 @@ -USING: cpu.architecture math.vectors.simd -math.vectors.simd.intrinsics accessors math.vectors.simd.alien -kernel classes.struct tools.test compiler sequences byte-arrays -alien math kernel.private specialized-arrays combinators ; -SPECIALIZED-ARRAY: float -IN: math.vectors.simd.alien.tests - -! Vector alien intrinsics -[ float-4{ 1 2 3 4 } ] [ - [ - float-4{ 1 2 3 4 } - underlying>> 0 float-4-rep alien-vector - ] compile-call float-4 boa -] unit-test - -[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ - 16 [ 1 ] B{ } replicate-as 16 - [ - 0 [ - { byte-array c-ptr fixnum } declare - float-4-rep set-alien-vector - ] compile-call - ] keep -] unit-test - -[ float-array{ 1 2 3 4 } ] [ - [ - float-array{ 1 2 3 4 } underlying>> - float-array{ 4 3 2 1 } clone - [ underlying>> 0 float-4-rep set-alien-vector ] keep - ] compile-call -] unit-test - -STRUCT: simd-struct -{ x float-4 } -{ y double-2 } -{ z double-4 } -{ w float-8 } ; - -[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave -] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave - ] compile-call -] unit-test diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor deleted file mode 100644 index 1486f6d0af..0000000000 --- a/basis/math/vectors/simd/alien/alien.factor +++ /dev/null @@ -1,42 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: alien accessors alien.c-types byte-arrays compiler.units -cpu.architecture locals kernel math math.vectors.simd -math.vectors.simd.intrinsics ; -IN: math.vectors.simd.alien - -:: define-simd-128-type ( class rep -- ) - - byte-array >>class - class >>boxed-class - [ rep alien-vector class boa ] >>getter - [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter - 16 >>size - 8 >>align - rep >>rep - class name>> typedef ; - -:: define-simd-256-type ( class rep -- ) - - class >>class - class >>boxed-class - [ - [ rep alien-vector ] - [ 16 + >fixnum rep alien-vector ] 2bi - class boa - ] >>getter - [ - [ [ underlying1>> ] 2dip rep set-alien-vector ] - [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] - 3bi - ] >>setter - 32 >>size - 8 >>align - rep >>rep - class name>> typedef ; -[ - float-4 float-4-rep define-simd-128-type - double-2 double-2-rep define-simd-128-type - float-8 float-4-rep define-simd-256-type - double-4 double-2-rep define-simd-256-type -] with-compilation-unit diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 641585a5d7..e934a641c4 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -1,27 +1,124 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types byte-arrays classes functors -kernel math parser prettyprint.custom sequences -sequences.private literals ; +USING: accessors alien.c-types assocs byte-arrays classes +effects fry functors generalizations kernel literals locals +math math.functions math.vectors math.vectors.simd.intrinsics +math.vectors.specialization parser prettyprint.custom sequences +sequences.private strings words definitions macros cpu.architecture +namespaces arrays quotations ; +QUALIFIED-WITH: math m IN: math.vectors.simd.functor ERROR: bad-length got expected ; +MACRO: simd-boa ( rep class -- simd-array ) + [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; + +:: define-boa-custom-inlining ( word rep class -- ) + word [ + drop + rep rep rep-gather-word supported-simd-op? [ + [ rep (simd-boa) class boa ] + ] [ word def>> ] if + ] "custom-inlining" set-word-prop ; + +: simd-with ( rep class x -- simd-array ) + [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline + +:: define-with-custom-inlining ( word rep class -- ) + word [ + drop + rep \ (simd-broadcast) supported-simd-op? [ + [ rep rep-coerce rep (simd-broadcast) class boa ] + ] [ word def>> ] if + ] "custom-inlining" set-word-prop ; + +: boa-effect ( rep n -- effect ) + [ rep-components ] dip * + [ CHAR: a + 1string ] map + { "simd-vector" } ; + +: supported-simd-ops ( assoc rep -- assoc' ) + [ simd-ops get ] dip + '[ nip _ swap supported-simd-op? ] assoc-filter + '[ drop _ key? ] assoc-filter ; + +ERROR: bad-schema schema ; + +: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist ) + [ simd-ops get ] dip '[ + 1quotation + over word-schema _ ?at [ bad-schema ] unless + [ ] 2sequence + ] assoc-map ; + +:: high-level-ops ( ctor elt-class -- assoc ) + ! Some SIMD operations are defined in terms of others. + { + { vneg [ [ dup v- ] keep v- ] } + { n+v [ [ ctor execute ] dip v+ ] } + { v+n [ ctor execute v+ ] } + { n-v [ [ ctor execute ] dip v- ] } + { v-n [ ctor execute v- ] } + { n*v [ [ ctor execute ] dip v* ] } + { v*n [ ctor execute v* ] } + { n/v [ [ ctor execute ] dip v/ ] } + { v/n [ ctor execute v/ ] } + { norm-sq [ dup v. assert-positive ] } + { norm [ norm-sq sqrt ] } + { normalize [ dup norm v/n ] } + } + ! To compute dot product and distance with integer vectors, we + ! have to do things less efficiently, with integer overflow checks, + ! in the general case. + elt-class m:float = [ + { + { distance [ v- norm ] } + { v. [ v* sum ] } + } append + ] when ; + +:: simd-vector-words ( class ctor rep vv->v v->v v->n -- ) + rep rep-component-type c-type-boxed-class :> elt-class + class + elt-class + { + { { +vector+ +vector+ -> +vector+ } vv->v } + { { +vector+ -> +vector+ } v->v } + { { +vector+ -> +scalar+ } v->n } + { { +vector+ -> +nonnegative+ } v->n } + } low-level-ops + rep supported-simd-ops + ctor elt-class high-level-ops assoc-union + specialize-vector-words ; + +:: define-simd-128-type ( class rep -- ) + + byte-array >>class + class >>boxed-class + [ rep alien-vector class boa ] >>getter + [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter + 16 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-128 ( T -- ) -T-TYPE IS ${T} - -N [ 16 T-TYPE heap-size /i ] +N [ 16 T heap-size /i ] A DEFINES-CLASS ${T}-${N} +A-boa DEFINES ${A}-boa +A-with DEFINES ${A}-with >A DEFINES >${A} A{ DEFINES ${A}{ -NTH [ T-TYPE dup c-type-getter-boxer array-accessor ] -SET-NTH [ T-TYPE dup c-setter array-accessor ] +NTH [ T dup c-type-getter-boxer array-accessor ] +SET-NTH [ T dup c-setter array-accessor ] -A-rep IS ${A}-rep +A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op +A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op WHERE @@ -51,6 +148,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length underlying>> length ; inline +M: A element-type drop A-rep rep-component-type ; + M: A pprint-delims drop \ A{ \ } ; M: A >pprint-sequence ; @@ -59,6 +158,16 @@ M: A pprint* pprint-object ; SYNTAX: A{ \ } [ >A ] parse-literal ; +: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ; + +\ A-with \ A-rep \ A define-with-custom-inlining + +\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared + +\ A-rep rep-gather-word [ + \ A-boa \ A-rep \ A define-boa-custom-inlining +] when + INSTANCE: A sequence v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline +: A-v->v-op ( v1 quot -- v2 ) + [ underlying>> A-rep ] dip call \ A boa ; inline + : A-v->n-op ( v quot -- n ) [ underlying>> A-rep ] dip call ; inline +\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words +\ A \ A-rep define-simd-128-type + PRIVATE> ;FUNCTOR ! Synthesize 256-bit vectors from a pair of 128-bit vectors +SLOT: underlying1 +SLOT: underlying2 + +:: define-simd-256-type ( class rep -- ) + + class >>class + class >>boxed-class + [ + [ rep alien-vector ] + [ 16 + >fixnum rep alien-vector ] 2bi + class boa + ] >>getter + [ + [ [ underlying1>> ] 2dip rep set-alien-vector ] + [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] + 3bi + ] >>setter + 32 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-256 ( T -- ) -T-TYPE IS ${T} - -N [ 32 T-TYPE heap-size /i ] +N [ 32 T heap-size /i ] N/2 [ N 2 / ] A/2 IS ${T}-${N/2} +A/2-boa IS ${A/2}-boa +A/2-with IS ${A/2}-with A DEFINES-CLASS ${T}-${N} +A-boa DEFINES ${A}-boa +A-with DEFINES ${A}-with >A DEFINES >${A} A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref -A-rep IS ${A/2}-rep +A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op +A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op WHERE @@ -129,6 +269,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length drop 32 ; inline +M: A element-type drop A-rep rep-component-type ; + SYNTAX: A{ \ } [ >A ] parse-literal ; M: A pprint-delims drop \ A{ \ } ; @@ -137,6 +279,16 @@ M: A >pprint-sequence ; M: A pprint* pprint-object ; +: A-with ( x -- simd-array ) + [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@ + \ A boa ; inline + +: A-boa ( ... -- simd-array ) + [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ + \ A boa ; inline + +\ A-rep 2 boa-effect \ A-boa set-stack-effect + INSTANCE: A sequence : A-vv->v-op ( v1 v2 quot -- v3 ) @@ -144,8 +296,15 @@ INSTANCE: A sequence [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi \ A boa ; inline -: A-v->n-op ( v1 combine-quot reduce-quot -- v2 ) - [ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ] - dip call ; inline +: A-v->v-op ( v1 combine-quot -- v2 ) + [ [ underlying1>> A-rep ] dip call ] + [ [ underlying2>> A-rep ] dip call ] 2bi + \ A boa ; inline + +: A-v->n-op ( v1 combine-quot -- v2 ) + [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline + +\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words +\ A \ A-rep define-simd-256-type ;FUNCTOR diff --git a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor new file mode 100644 index 0000000000..84eee935a0 --- /dev/null +++ b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor @@ -0,0 +1,18 @@ +IN: math.vectors.simd.intrinsics.tests +USING: math.vectors.simd.intrinsics cpu.architecture tools.test ; + +[ 16 ] [ uchar-16-rep rep-components ] unit-test +[ 16 ] [ char-16-rep rep-components ] unit-test +[ 8 ] [ ushort-8-rep rep-components ] unit-test +[ 8 ] [ short-8-rep rep-components ] unit-test +[ 4 ] [ uint-4-rep rep-components ] unit-test +[ 4 ] [ int-4-rep rep-components ] unit-test +[ 4 ] [ float-4-rep rep-components ] unit-test +[ 2 ] [ double-2-rep rep-components ] unit-test + +{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as +{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as +{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as +{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as + + diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 914d1ef169..2c1f76cfe1 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -1,18 +1,48 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel alien alien.data cpu.architecture libc ; +USING: alien alien.c-types alien.data assocs combinators +cpu.architecture fry generalizations kernel libc macros math +sequences effects accessors namespaces lexer parser vocabs.parser +words arrays math.vectors ; IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; -: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ; +<< + +: simd-effect ( word -- effect ) + stack-effect [ in>> "rep" suffix ] [ out>> ] bi ; + +SYMBOL: simd-ops + +V{ } clone simd-ops set-global + +SYNTAX: SIMD-OP: + scan-word dup name>> "(simd-" ")" surround create-in + [ nip [ bad-simd-call ] define ] + [ [ simd-effect ] dip set-stack-effect ] + [ 2array simd-ops get push ] + 2tri ; + +>> + +SIMD-OP: v+ +SIMD-OP: v- +SIMD-OP: v+- +SIMD-OP: vs+ +SIMD-OP: vs- +SIMD-OP: vs* +SIMD-OP: v* +SIMD-OP: v/ +SIMD-OP: vmin +SIMD-OP: vmax +SIMD-OP: vsqrt +SIMD-OP: sum +SIMD-OP: vabs +SIMD-OP: vbitand +SIMD-OP: vbitor +SIMD-OP: vbitxor + : (simd-broadcast) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; @@ -26,3 +56,61 @@ ERROR: bad-simd-call ; ! Inefficient version for when intrinsics are missing [ swap swap ] dip rep-size memcpy ; +<< + +: rep-components ( rep -- n ) + 16 swap rep-component-type heap-size /i ; foldable + +: rep-coercer ( rep -- quot ) + { + { [ dup int-vector-rep? ] [ [ >fixnum ] ] } + { [ dup float-vector-rep? ] [ [ >float ] ] } + } cond nip ; foldable + +: rep-coerce ( value rep -- value' ) + rep-coercer call( value -- value' ) ; inline + +CONSTANT: rep-gather-words + { + { 2 (simd-gather-2) } + { 4 (simd-gather-4) } + } + +: rep-gather-word ( rep -- word ) + rep-components rep-gather-words at ; + +>> + +MACRO: (simd-boa) ( rep -- quot ) + { + [ rep-coercer ] + [ rep-components ] + [ ] + [ rep-gather-word ] + } cleave + '[ _ _ napply _ _ execute ] ; + +GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) + +M: vector-rep supported-simd-op? + { + { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } + { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } + { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } + { \ (simd-v/) [ %div-vector-reps ] } + { \ (simd-vmin) [ %min-vector-reps ] } + { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-vsqrt) [ %sqrt-vector-reps ] } + { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-vabs) [ %abs-vector-reps ] } + { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitor) [ %or-vector-reps ] } + { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-broadcast) [ %broadcast-vector-reps ] } + { \ (simd-gather-2) [ %gather-vector-2-reps ] } + { \ (simd-gather-4) [ %gather-vector-4-reps ] } + } case member? ; diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d35f8589b6..5153b0c6f4 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,5 +1,5 @@ USING: help.markup help.syntax sequences math math.vectors -kernel.private classes.tuple.private +classes.tuple.private math.vectors.simd.intrinsics cpu.architecture ; IN: math.vectors.simd @@ -17,23 +17,49 @@ $nl "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ; ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" -"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs." +"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance." +"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")." $nl -"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." +"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." +$nl +"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." +$nl +"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "." +$nl +"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types." +$nl +"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." $nl "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ; ARTICLE: "math.vectors.simd.types" "SIMD vector types" -"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8." +"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." $nl -"The following vector types are defined:" -{ $subsection float-4 } -{ $subsection double-2 } -{ $subsection float-8 } -{ $subsection double-4 } -"For each vector type, several words are defined:" +"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" +{ $subsection POSTPONE: SIMD: } +"The following vector types are supported:" +{ $code + "char-16" + "uchar-16" + "char-32" + "uchar-32" + "short-8" + "ushort-8" + "short-16" + "ushort-16" + "int-4" + "uint-4" + "int-8" + "uint-8" + "float-4" + "float-8" + "double-2" + "double-4" +} ; + +ARTICLE: "math.vectors.simd.words" "SIMD vector words" +"For each SIMD vector type, several words are defined:" { $table { "Word" "Stack effect" "Description" } { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" } @@ -41,24 +67,6 @@ $nl { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" } { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" } } -"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers." -$nl -"Operations on " { $link float-4 } " instances:" -{ $subsection float-4-with } -{ $subsection float-4-boa } -{ $subsection POSTPONE: float-4{ } -"Operations on " { $link double-2 } " instances:" -{ $subsection double-2-with } -{ $subsection double-2-boa } -{ $subsection POSTPONE: double-2{ } -"Operations on " { $link float-8 } " instances:" -{ $subsection float-8-with } -{ $subsection float-8-boa } -{ $subsection POSTPONE: float-8{ } -"Operations on " { $link double-4 } " instances:" -{ $subsection double-4-with } -{ $subsection double-4-boa } -{ $subsection POSTPONE: double-4{ } "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words." { $see-also "c-types-specs" } ; @@ -84,6 +92,8 @@ SYMBOLS: x y ; { $code """USING: compiler.tree.debugger kernel.private math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) { float-4 float-4 float-4 } declare @@ -96,6 +106,8 @@ $nl { $code """USING: compiler.tree.debugger hints math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; @@ -110,6 +122,7 @@ $nl "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" { $code """USING: compiler.tree.debugger math.vectors math.vectors.simd ; +SIMD: float-4 IN: simd-demo STRUCT: actor @@ -150,106 +163,37 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" } "The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD." $nl -"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." -{ $subsection (simd-v+) } -{ $subsection (simd-v-) } -{ $subsection (simd-v/) } -{ $subsection (simd-vmin) } -{ $subsection (simd-vmax) } -{ $subsection (simd-vsqrt) } -{ $subsection (simd-sum) } -{ $subsection (simd-broadcast) } -{ $subsection (simd-gather-2) } -{ $subsection (simd-gather-4) } +"It is best to avoid calling SIMD primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." +$nl "There are two primitives which are used to implement accessing SIMD vector fields of " { $link "classes.struct" } ":" { $subsection alien-vector } { $subsection set-alien-vector } "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ; ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes" -"Struct classes may contain fields which store SIMD data; use one of the following C type names:" -{ $code -"""float-4 -double-2 -float-8 -double-4""" } -"Passing SIMD data as function parameters is not yet supported." ; +"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name." +$nl +"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ; + +ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives" +"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks." +$nl +"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ; ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." { $subsection "math.vectors.simd.intro" } { $subsection "math.vectors.simd.types" } +{ $subsection "math.vectors.simd.words" } { $subsection "math.vectors.simd.support" } +{ $subsection "math.vectors.simd.accuracy" } { $subsection "math.vectors.simd.efficiency" } { $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.intrinsics" } ; -! ! ! float-4 - -HELP: float-4 -{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ; - -HELP: float-4-with -{ $values { "x" float } { "simd-array" float-4 } } -{ $description "Creates a new vector with all four components equal to a scalar." } ; - -HELP: float-4-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } } -{ $description "Creates a new vector from four scalar components." } ; - -HELP: float-4{ -{ $syntax "float-4{ a b c d }" } -{ $description "Literal syntax for a " { $link float-4 } "." } ; - -! ! ! double-2 - -HELP: double-2 -{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ; - -HELP: double-2-with -{ $values { "x" float } { "simd-array" double-2 } } -{ $description "Creates a new vector with both components equal to a scalar." } ; - -HELP: double-2-boa -{ $values { "a" float } { "b" float } { "simd-array" double-2 } } -{ $description "Creates a new vector from two scalar components." } ; - -HELP: double-2{ -{ $syntax "double-2{ a b }" } -{ $description "Literal syntax for a " { $link double-2 } "." } ; - -! ! ! float-8 - -HELP: float-8 -{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ; - -HELP: float-8-with -{ $values { "x" float } { "simd-array" float-8 } } -{ $description "Creates a new vector with all eight components equal to a scalar." } ; - -HELP: float-8-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } } -{ $description "Creates a new vector from eight scalar components." } ; - -HELP: float-8{ -{ $syntax "float-8{ a b c d e f g h }" } -{ $description "Literal syntax for a " { $link float-8 } "." } ; - -! ! ! double-4 - -HELP: double-4 -{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ; - -HELP: double-4-with -{ $values { "x" float } { "simd-array" double-4 } } -{ $description "Creates a new vector with all four components equal to a scalar." } ; - -HELP: double-4-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } } -{ $description "Creates a new vector from four scalar components." } ; - -HELP: double-4{ -{ $syntax "double-4{ a b c d }" } -{ $description "Literal syntax for a " { $link double-4 } "." } ; +HELP: SIMD: +{ $syntax "SIMD: type-length" } +{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } } +{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index f5318c341f..db8597fc9d 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -1,8 +1,30 @@ +USING: accessors arrays classes compiler compiler.tree.debugger +effects fry io kernel kernel.private math math.functions +math.private math.vectors math.vectors.simd +math.vectors.simd.private prettyprint random sequences system +tools.test vocabs assocs compiler.cfg.debugger words +locals math.vectors.specialization combinators cpu.architecture +math.vectors.simd.intrinsics namespaces byte-arrays alien +specialized-arrays classes.struct ; +FROM: alien.c-types => c-type-boxed-class ; +SPECIALIZED-ARRAY: float +SIMD: char-16 +SIMD: uchar-16 +SIMD: char-32 +SIMD: uchar-32 +SIMD: short-8 +SIMD: ushort-8 +SIMD: short-16 +SIMD: ushort-16 +SIMD: int-4 +SIMD: uint-4 +SIMD: int-8 +SIMD: uint-8 +SIMD: float-4 +SIMD: float-8 +SIMD: double-2 +SIMD: double-4 IN: math.vectors.simd.tests -USING: math math.vectors.simd math.vectors.simd.private -math.vectors math.functions math.private kernel.private compiler -sequences tools.test compiler.tree.debugger accessors kernel -system ; [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test @@ -12,344 +34,6 @@ system ; [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test -[ float-4{ 12 12 12 12 } ] [ - 12 [ float-4-with ] compile-call -] unit-test - -[ float-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ float-4-boa ] compile-call -] unit-test - -[ float-4{ 11 22 33 44 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v+ ] compile-call -] unit-test - -[ float-4{ -9 -18 -27 -36 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v- ] compile-call -] unit-test - -[ float-4{ 10 40 90 160 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v* ] compile-call -] unit-test - -[ float-4{ 10 100 1000 10000 } ] [ - float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v/ ] compile-call -] unit-test - -[ float-4{ -10 -20 -30 -40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmin ] compile-call -] unit-test - -[ float-4{ 10 20 30 40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum ] compile-call -] unit-test - -[ 13.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum 3.0 + ] compile-call -] unit-test - -[ 8.0 ] [ - float-4{ 1 2 3 4 } float-4{ 2 0 2 0 } - [ { float-4 float-4 } declare v. ] compile-call -] unit-test - -[ float-4{ 5 10 15 20 } ] [ - 5.0 float-4{ 1 2 3 4 } - [ { float float-4 } declare n*v ] compile-call -] unit-test - -[ float-4{ 5 10 15 20 } ] [ - float-4{ 1 2 3 4 } 5.0 - [ { float float-4 } declare v*n ] compile-call -] unit-test - -[ float-4{ 10 5 2 5 } ] [ - 10.0 float-4{ 1 2 5 2 } - [ { float float-4 } declare n/v ] compile-call -] unit-test - -[ float-4{ 0.5 1 1.5 2 } ] [ - float-4{ 1 2 3 4 } 2 - [ { float float-4 } declare v/n ] compile-call -] unit-test - -[ float-4{ 1 0 0 0 } ] [ - float-4{ 10 0 0 0 } - [ { float-4 } declare normalize ] compile-call -] unit-test - -[ 30.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - float-4{ 1 0 0 0 } - float-4{ 0 1 0 0 } - [ { float-4 float-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-2{ 12 12 } ] [ - 12 [ double-2-with ] compile-call -] unit-test - -[ double-2{ 1 2 } ] [ - 1 2 [ double-2-boa ] compile-call -] unit-test - -[ double-2{ 11 22 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v+ ] compile-call -] unit-test - -[ double-2{ -9 -18 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v- ] compile-call -] unit-test - -[ double-2{ 10 40 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v* ] compile-call -] unit-test - -[ double-2{ 10 100 } ] [ - double-2{ 100 2000 } double-2{ 10 20 } - [ { double-2 double-2 } declare v/ ] compile-call -] unit-test - -[ double-2{ -10 -20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmin ] compile-call -] unit-test - -[ double-2{ 10 20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmax ] compile-call -] unit-test - -[ 3.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum ] compile-call -] unit-test - -[ 7.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum 4.0 + ] compile-call -] unit-test - -[ 16.0 ] [ - double-2{ 1 2 } double-2{ 2 7 } - [ { double-2 double-2 } declare v. ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - 5.0 double-2{ 1 2 } - [ { float double-2 } declare n*v ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - double-2{ 1 2 } 5.0 - [ { float double-2 } declare v*n ] compile-call -] unit-test - -[ double-2{ 10 5 } ] [ - 10.0 double-2{ 1 2 } - [ { float double-2 } declare n/v ] compile-call -] unit-test - -[ double-2{ 0.5 1 } ] [ - double-2{ 1 2 } 2 - [ { float double-2 } declare v/n ] compile-call -] unit-test - -[ double-2{ 0 0 } ] [ double-2 new ] unit-test - -[ double-2{ 1 0 } ] [ - double-2{ 10 0 } - [ { double-2 } declare normalize ] compile-call -] unit-test - -[ 5.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-2{ 1 0 } - double-2{ 0 1 } - [ { double-2 double-2 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 double-4-boa -] unit-test - -[ double-4{ 1 1 1 1 } ] [ - 1 double-4-with -] unit-test - -[ double-4{ 0 1 2 3 } ] [ - 1 double-4-with [ * ] map-index -] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test - -[ double-4{ 12 12 12 12 } ] [ - 12 [ double-4-with ] compile-call -] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ double-4-boa ] compile-call -] unit-test - -[ double-4{ 11 22 33 44 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v+ ] compile-call -] unit-test - -[ double-4{ -9 -18 -27 -36 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v- ] compile-call -] unit-test - -[ double-4{ 10 40 90 160 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v* ] compile-call -] unit-test - -[ double-4{ 10 100 1000 10000 } ] [ - double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v/ ] compile-call -] unit-test - -[ double-4{ -10 -20 -30 -40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmin ] compile-call -] unit-test - -[ double-4{ 10 20 30 40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum ] compile-call -] unit-test - -[ 13.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum 3.0 + ] compile-call -] unit-test - -[ 8.0 ] [ - double-4{ 1 2 3 4 } double-4{ 2 0 2 0 } - [ { double-4 double-4 } declare v. ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - 5.0 double-4{ 1 2 3 4 } - [ { float double-4 } declare n*v ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - double-4{ 1 2 3 4 } 5.0 - [ { float double-4 } declare v*n ] compile-call -] unit-test - -[ double-4{ 10 5 2 5 } ] [ - 10.0 double-4{ 1 2 5 2 } - [ { float double-4 } declare n/v ] compile-call -] unit-test - -[ double-4{ 0.5 1 1.5 2 } ] [ - double-4{ 1 2 3 4 } 2 - [ { float double-4 } declare v/n ] compile-call -] unit-test - -[ double-4{ 1 0 0 0 } ] [ - double-4{ 10 0 0 0 } - [ { double-4 } declare normalize ] compile-call -] unit-test - -[ 30.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-4{ 1 0 0 0 } - double-4{ 0 1 0 0 } - [ { double-4 double-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test - -[ float-8{ 3 6 9 12 15 18 21 24 } ] [ - float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v+ ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v- ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - -0.5 - float-8{ 2 4 6 8 10 12 14 16 } - [ { float float-8 } declare n*v ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -0.5 - [ { float-8 float } declare v*n ] compile-call -] unit-test - -[ float-8{ 256 128 64 32 16 8 4 2 } ] [ - 256.0 - float-8{ 1 2 4 8 16 32 64 128 } - [ { float float-8 } declare n/v ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -2.0 - [ { float-8 float } declare v/n ] compile-call -] unit-test - ! Test puns; only on x86 cpu x86? [ [ double-2{ 4 1024 } ] [ @@ -362,3 +46,201 @@ cpu x86? [ [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call ] unit-test ] when + +! Fuzz testing +CONSTANT: simd-classes + { + char-16 + uchar-16 + char-32 + uchar-32 + short-8 + ushort-8 + short-16 + ushort-16 + int-4 + uint-4 + int-8 + uint-8 + float-4 + float-8 + double-2 + double-4 + } + +: with-ctors ( -- seq ) + simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ; + +: boa-ctors ( -- seq ) + simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ; + +: check-optimizer ( seq inputs quot eq-quot -- ) + '[ + @ + [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ] + [ [ call ] dip call ] + [ [ call ] dip compile-call ] 2tri @ not + ] filter ; inline + +"== Checking -new constructors" print + +[ { } ] [ + simd-classes [ [ [ ] ] dip '[ _ new ] ] [ = ] check-optimizer +] unit-test + +[ { } ] [ + simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter +] unit-test + +"== Checking -with constructors" print + +[ { } ] [ + with-ctors [ + [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ] + ] [ = ] check-optimizer +] unit-test + +"== Checking -boa constructors" print + +[ { } ] [ + boa-ctors [ + dup stack-effect in>> length + [ nip [ 1000 random ] [ ] replicate-as ] + [ fixnum swap '[ _ declare _ execute ] ] + 2bi + ] [ = ] check-optimizer +] unit-test + +"== Checking vector operations" print + +: random-vector ( class -- vec ) + new [ drop 1000 random ] map ; + +:: check-vector-op ( word inputs class elt-class -- inputs quot ) + inputs [ + [ + { + { +vector+ [ class random-vector ] } + { +scalar+ [ 1000 random elt-class float = [ >float ] when ] } + } case + ] [ ] map-as + ] [ + [ + { + { +vector+ [ class ] } + { +scalar+ [ elt-class ] } + } case + ] map + ] bi + word '[ _ declare _ execute ] ; + +: remove-float-words ( alist -- alist' ) + [ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ; + +: ops-to-check ( elt-class -- alist ) + [ vector-words >alist ] dip + float = [ remove-float-words ] unless ; + +: check-vector-ops ( class elt-class compare-quot -- ) + [ + [ nip ops-to-check ] 2keep + '[ first2 inputs _ _ check-vector-op ] + ] dip check-optimizer ; inline + +: approx= ( x y -- ? ) + { + { [ 2dup [ float? ] both? ] [ -1.e8 ~ ] } + { [ 2dup [ sequence? ] both? ] [ + [ + { + { [ 2dup [ fp-nan? ] both? ] [ 2drop t ] } + { [ 2dup [ fp-nan? ] either? not ] [ -1.e8 ~ ] } + } cond + ] 2all? + ] } + } cond ; + +: simd-classes&reps ( -- alist ) + simd-classes [ + { + { [ dup name>> "float" head? ] [ float [ approx= ] ] } + { [ dup name>> "double" tail? ] [ float [ = ] ] } + [ fixnum [ = ] ] + } cond 3array + ] map ; + +simd-classes&reps [ + [ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test +] each + +! Other regressions +[ 8000000 ] [ + int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 } + [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call +] unit-test + +! Vector alien intrinsics +[ float-4{ 1 2 3 4 } ] [ + [ + float-4{ 1 2 3 4 } + underlying>> 0 float-4-rep alien-vector + ] compile-call float-4 boa +] unit-test + +[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ + 16 [ 1 ] B{ } replicate-as 16 + [ + 0 [ + { byte-array c-ptr fixnum } declare + float-4-rep set-alien-vector + ] compile-call + ] keep +] unit-test + +[ float-array{ 1 2 3 4 } ] [ + [ + float-array{ 1 2 3 4 } underlying>> + float-array{ 4 3 2 1 } clone + [ underlying>> 0 float-4-rep set-alien-vector ] keep + ] compile-call +] unit-test + +STRUCT: simd-struct +{ x float-4 } +{ y double-2 } +{ z double-4 } +{ w float-8 } ; + +[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test + +[ + float-4{ 1 2 3 4 } + double-2{ 2 1 } + double-4{ 4 3 2 1 } + float-8{ 1 2 3 4 5 6 7 8 } +] [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave +] unit-test + +[ + float-4{ 1 2 3 4 } + double-2{ 2 1 } + double-4{ 4 3 2 1 } + float-8{ 1 2 3 4 5 6 7 8 } +] [ + [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave + ] compile-call +] unit-test + +[ ] [ char-16 new 1array stack. ] unit-test diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index a3c99ae217..fe043032b8 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,185 +1,32 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types byte-arrays cpu.architecture -kernel math math.functions math.vectors -math.vectors.simd.functor math.vectors.simd.intrinsics -math.vectors.specialization parser prettyprint.custom sequences -sequences.private locals assocs words fry ; -FROM: alien.c-types => float ; -QUALIFIED-WITH: math m +USING: alien.c-types combinators fry kernel lexer math math.parser +math.vectors.simd.functor sequences splitting vocabs.generated +vocabs.loader vocabs.parser words ; IN: math.vectors.simd -<< - -DEFER: float-4 -DEFER: double-2 -DEFER: float-8 -DEFER: double-4 - -"double" define-simd-128 -"float" define-simd-128 -"double" define-simd-256 -"float" define-simd-256 - ->> - -: float-4-with ( x -- simd-array ) - [ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ; - -: float-4-boa ( a b c d -- simd-array ) - \ float-4 new 4sequence ; - -: double-2-with ( x -- simd-array ) - [ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ; - -: double-2-boa ( a b -- simd-array ) - \ double-2 new 2sequence ; - -! More efficient expansions for the above, used when SIMD is -! actually available. - -<< - -\ float-4-with [ - drop - \ (simd-broadcast) "intrinsic" word-prop [ - [ >float float-4-rep (simd-broadcast) \ float-4 boa ] - ] [ \ float-4-with def>> ] if -] "custom-inlining" set-word-prop - -\ float-4-boa [ - drop - \ (simd-gather-4) "intrinsic" word-prop [ - [| a b c d | - a >float b >float c >float d >float - float-4-rep (simd-gather-4) \ float-4 boa - ] - ] [ \ float-4-boa def>> ] if -] "custom-inlining" set-word-prop - -\ double-2-with [ - drop - \ (simd-broadcast) "intrinsic" word-prop [ - [ >float double-2-rep (simd-broadcast) \ double-2 boa ] - ] [ \ double-2-with def>> ] if -] "custom-inlining" set-word-prop - -\ double-2-boa [ - drop - \ (simd-gather-4) "intrinsic" word-prop [ - [ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ] - ] [ \ double-2-boa def>> ] if -] "custom-inlining" set-word-prop - ->> - -: float-8-with ( x -- simd-array ) - [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@ - \ float-8 boa ; inline - -:: float-8-boa ( a b c d e f g h -- simd-array ) - a b c d float-4-boa - e f g h float-4-boa - [ underlying>> ] bi@ - \ float-8 boa ; inline - -: double-4-with ( x -- simd-array ) - [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@ - \ double-4 boa ; inline - -:: double-4-boa ( a b c d -- simd-array ) - a b double-2-boa - c d double-2-boa - [ underlying>> ] bi@ - \ double-4 boa ; inline - -<< +ERROR: bad-vector-size bits ; number ] bi* + * 8 * { + { 128 [ [ define-simd-128 ] ] } + { 256 [ [ define-simd-256 ] ] } + [ bad-vector-size ] + } case ; PRIVATE> -\ float-4 \ float-4-with m:float H{ - { v+ [ [ (simd-v+) ] float-4-vv->v-op ] } - { v- [ [ (simd-v-) ] float-4-vv->v-op ] } - { v* [ [ (simd-v*) ] float-4-vv->v-op ] } - { v/ [ [ (simd-v/) ] float-4-vv->v-op ] } - { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] } - { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] } - { sum [ [ (simd-sum) ] float-4-v->n-op ] } -} simd-vector-words +: define-simd-vocab ( type -- vocab ) + [ simd-vocab ] + [ '[ _ parse-simd-name call( type -- ) ] ] bi + generate-vocab ; -\ double-2 \ double-2-with m:float H{ - { v+ [ [ (simd-v+) ] double-2-vv->v-op ] } - { v- [ [ (simd-v-) ] double-2-vv->v-op ] } - { v* [ [ (simd-v*) ] double-2-vv->v-op ] } - { v/ [ [ (simd-v/) ] double-2-vv->v-op ] } - { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] } - { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] } - { sum [ [ (simd-sum) ] double-2-v->n-op ] } -} simd-vector-words - -\ float-8 \ float-8-with m:float H{ - { v+ [ [ (simd-v+) ] float-8-vv->v-op ] } - { v- [ [ (simd-v-) ] float-8-vv->v-op ] } - { v* [ [ (simd-v*) ] float-8-vv->v-op ] } - { v/ [ [ (simd-v/) ] float-8-vv->v-op ] } - { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] } - { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] } - { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] } -} simd-vector-words - -\ double-4 \ double-4-with m:float H{ - { v+ [ [ (simd-v+) ] double-4-vv->v-op ] } - { v- [ [ (simd-v-) ] double-4-vv->v-op ] } - { v* [ [ (simd-v*) ] double-4-vv->v-op ] } - { v/ [ [ (simd-v/) ] double-4-vv->v-op ] } - { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] } - { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] } - { sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] } -} simd-vector-words - ->> - -USE: vocabs.loader - -"math.vectors.simd.alien" require +SYNTAX: SIMD: + scan define-simd-vocab use-vocab ; diff --git a/basis/math/vectors/simd/summary.txt b/basis/math/vectors/simd/summary.txt new file mode 100644 index 0000000000..22593f1286 --- /dev/null +++ b/basis/math/vectors/simd/summary.txt @@ -0,0 +1 @@ +Single-instruction-multiple-data parallel vector operations diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 21ec9f64f3..bf2dac29d6 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -53,10 +53,14 @@ H{ { norm-sq { +vector+ -> +nonnegative+ } } { normalize { +vector+ -> +vector+ } } { v* { +vector+ +vector+ -> +vector+ } } + { vs* { +vector+ +vector+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } } + { vs+ { +vector+ +vector+ -> +vector+ } } + { v+- { +vector+ +vector+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } } + { vs- { +vector+ +vector+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } } { v. { +vector+ +vector+ -> +scalar+ } } { v/ { +vector+ +vector+ -> +vector+ } } @@ -68,6 +72,11 @@ H{ { vneg { +vector+ -> +vector+ } } { vtruncate { +vector+ -> +vector+ } } { sum { +vector+ -> +scalar+ } } + { vabs { +vector+ -> +vector+ } } + { vsqrt { +vector+ -> +vector+ } } + { vbitand { +vector+ +vector+ -> +vector+ } } + { vbitor { +vector+ +vector+ -> +vector+ } } + { vbitxor { +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 7456597278..3790e38d55 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -1,4 +1,4 @@ -USING: help.markup help.syntax math sequences ; +USING: help.markup help.syntax math math.functions sequences ; IN: math.vectors ARTICLE: "math-vectors" "Vector arithmetic" @@ -14,18 +14,46 @@ $nl { $subsection n+v } { $subsection v-n } { $subsection n-v } -"Combining two vectors to form another vector with " { $link 2map } ":" +"Vector unary operations:" +{ $subsection vneg } +{ $subsection vabs } +{ $subsection vsqrt } +{ $subsection vfloor } +{ $subsection vceiling } +{ $subsection vtruncate } +"Vector/vector binary operations:" { $subsection v+ } { $subsection v- } +{ $subsection v+- } { $subsection v* } { $subsection v/ } +"Saturated arithmetic (only on " { $link "specialized-arrays" } "):" +{ $subsection vs+ } +{ $subsection vs- } +{ $subsection vs* } +"Comparisons:" { $subsection vmax } { $subsection vmin } +"Bitwise operations:" +{ $subsection vbitand } +{ $subsection vbitor } +{ $subsection vbitxor } "Inner product and norm:" { $subsection v. } { $subsection norm } { $subsection norm-sq } -{ $subsection normalize } ; +{ $subsection normalize } +"Comparing vectors:" +{ $subsection distance } +{ $subsection v~ } +"Other functions:" +{ $subsection vsupremum } +{ $subsection vinfimum } +{ $subsection trilerp } +{ $subsection bilerp } +{ $subsection vlerp } +{ $subsection vnlerp } +{ $subsection vbilerp } ; ABOUT: "math-vectors" @@ -33,6 +61,43 @@ HELP: vneg { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } { $description "Negates each element of " { $snippet "u" } "." } ; +HELP: vabs +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of non-negative real numbers" } } +{ $description "Takes the absolute value of each element of " { $snippet "u" } "." } ; + +HELP: vsqrt +{ $values { "u" "a sequence of non-negative real numbers" } { "v" "a sequence of non-negative real numbers" } } +{ $description "Takes the square root of each element of " { $snippet "u" } "." } +{ $warning "For performance reasons, this does not work with negative inputs, unlike " { $link sqrt } "." } ; + +HELP: vfloor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Takes the " { $link floor } " of each element of " { $snippet "u" } "." } ; + +HELP: vceiling +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Takes the " { $link ceiling } " of each element of " { $snippet "u" } "." } ; + +HELP: vtruncate +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Truncates each element of " { $snippet "u" } "." } ; + +HELP: n+v +{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } +{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ; + +HELP: v+n +{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } +{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ; + +HELP: n-v +{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } +{ $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ; + +HELP: v-n +{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } +{ $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ; + HELP: n*v { $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } { $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ; @@ -43,11 +108,13 @@ HELP: v*n HELP: n/v { $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } -{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } ; +{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: v/n { $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } -{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } ; +{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: v+ { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } @@ -57,6 +124,17 @@ HELP: v- { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ; +HELP: v+- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." } +{ $examples + { $example + "USING: math.vectors prettyprint ;" + "{ 1 2 3 } { 2 3 2 } v+- ." + "{ -1 5 1 }" + } +} ; + HELP: [v-] { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ; @@ -68,7 +146,7 @@ HELP: v* HELP: v/ { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Divides " { $snippet "u" } " by " { $snippet "v" } " component-wise." } -{ $errors "Throws an error if an integer division by zero occurs." } ; +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: vmax { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } @@ -85,9 +163,52 @@ HELP: v. { $description "Computes the real-valued dot product." } { $notes "This word can also take complex number sequences as input, however mathematically it will compute the wrong result. The complex-valued dot product is defined differently:" - { $snippet "0 [ conjugate * + ] 2reduce" } + { $code "0 [ conjugate * + ] 2reduce" } } ; +HELP: vs+ +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } +{ $examples + "With saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ." + "uchar-array{ 170 255 220 }" + } + "Without saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ." + "uchar-array{ 170 14 220 }" + } +} ; + +HELP: vs- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ; + +HELP: vs* +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ; + +HELP: vbitand +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + +HELP: vbitor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + +HELP: vbitxor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise exclusive or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitxor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; @@ -100,6 +221,10 @@ HELP: normalize { $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } } { $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ; +HELP: distance +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } +{ $description "Outputs the Euclidean distance between two vectors." } ; + HELP: set-axis { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } } { $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." } @@ -108,3 +233,5 @@ HELP: set-axis { 2map v+ v- v* v/ } related-words { 2reduce v. } related-words + +{ vs+ vs- vs* } related-words diff --git a/basis/math/vectors/vectors-tests.factor b/basis/math/vectors/vectors-tests.factor index 3e56644d3e..fc482815a9 100644 --- a/basis/math/vectors/vectors-tests.factor +++ b/basis/math/vectors/vectors-tests.factor @@ -17,4 +17,6 @@ USING: math.vectors tools.test ; [ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test -[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test \ No newline at end of file +[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test + +[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test \ No newline at end of file diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index dd48525b53..4b6f67544a 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,9 +1,12 @@ -! Copyright (C) 2005, 2008 Slava Pestov. +! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays kernel sequences math math.functions hints -math.order ; +USING: arrays alien.c-types kernel sequences math math.functions +hints math.order math.libm fry combinators ; +QUALIFIED-WITH: alien.c-types c IN: math.vectors +GENERIC: element-type ( obj -- c-type ) + : vneg ( u -- v ) [ neg ] map ; : v+n ( u n -- v ) [ + ] curry map ; @@ -24,9 +27,43 @@ IN: math.vectors : vmax ( u v -- w ) [ max ] 2map ; : vmin ( u v -- w ) [ min ] 2map ; -: vfloor ( v -- _v_ ) [ floor ] map ; -: vceiling ( v -- ^v^ ) [ ceiling ] map ; -: vtruncate ( v -- -v- ) [ truncate ] map ; +: v+- ( u v -- w ) + [ t ] 2dip + [ [ not ] 2dip pick [ + ] [ - ] if ] 2map + nip ; + + + +: vs+ ( u v -- w ) [ + ] 2saturate-map ; +: vs- ( u v -- w ) [ - ] 2saturate-map ; +: vs* ( u v -- w ) [ * ] 2saturate-map ; + +: vabs ( u -- v ) [ abs ] map ; +: vsqrt ( u -- v ) [ >float fsqrt ] map ; + +bits ] bi@ ] dip call bits>double ] } + { c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] } + [ drop call ] + } case ; inline + +PRIVATE> + +: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; +: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ; +: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; + +: vfloor ( u -- v ) [ floor ] map ; +: vceiling ( u -- v ) [ ceiling ] map ; +: vtruncate ( u -- v ) [ truncate ] map ; : vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; : vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; diff --git a/basis/prettyprint/backend/backend.factor b/basis/prettyprint/backend/backend.factor index cba40bbff1..fb47c50fbe 100644 --- a/basis/prettyprint/backend/backend.factor +++ b/basis/prettyprint/backend/backend.factor @@ -173,6 +173,7 @@ M: tuple pprint* ] when ; : pprint-elements ( seq -- ) + >array do-length-limit [ [ pprint* ] each ] dip [ "~" swap number>string " more~" 3append text ] when* ; diff --git a/basis/specialized-arrays/specialized-arrays-tests.factor b/basis/specialized-arrays/specialized-arrays-tests.factor index 56f7f8a1cd..070323a5d6 100755 --- a/basis/specialized-arrays/specialized-arrays-tests.factor +++ b/basis/specialized-arrays/specialized-arrays-tests.factor @@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors kernel arrays combinators compiler compiler.units classes.struct combinators.smart compiler.tree.debugger math libc destructors sequences.private multiline eval words vocabs namespaces -assocs prettyprint alien.data ; +assocs prettyprint alien.data math.vectors ; FROM: alien.c-types => float ; SPECIALIZED-ARRAY: int @@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort SPECIALIZED-ARRAY: char SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: float +SPECIALIZED-ARRAY: ulonglong + +[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test [ t ] [ { 1 2 3 } >int-array int-array? ] unit-test diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index 6931c83677..9692980858 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien alien.c-types alien.data alien.parser assocs -byte-arrays classes compiler.units functors kernel lexer libc math -math.vectors.specialization namespaces parser prettyprint.custom -sequences sequences.private strings summary vocabs vocabs.loader -vocabs.parser words fry combinators ; +USING: accessors alien alien.c-types alien.data alien.parser +assocs byte-arrays classes compiler.units functors kernel lexer +libc math math.vectors math.vectors.specialization namespaces +parser prettyprint.custom sequences sequences.private strings +summary vocabs vocabs.loader vocabs.parser vocabs.generated +words fry combinators present ; IN: specialized-arrays MIXIN: specialized-array @@ -53,14 +54,14 @@ TUPLE: A : ( alien len -- specialized-array ) A boa ; inline -: ( n -- specialized-array ) [ T ] keep ; inline +: ( n -- specialized-array ) [ \ T ] keep ; inline -: (A) ( n -- specialized-array ) [ T (underlying) ] keep ; inline +: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep ; inline -: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep ; inline +: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep ; inline : byte-array>A ( byte-array -- specialized-array ) - dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless + dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless ; inline M: A clone [ underlying>> clone ] [ length>> ] bi ; inline @@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ; M: A resize [ - [ T heap-size * ] [ underlying>> ] bi* + [ \ T heap-size * ] [ underlying>> ] bi* resize-byte-array ] [ drop ] 2bi ; inline -M: A byte-length length T heap-size * ; inline +M: A byte-length length \ T heap-size * ; inline + +M: A element-type drop \ T ; inline M: A direct-array-syntax drop \ A@ ; @@ -116,24 +119,15 @@ M: word (underlying-type) "c-type" word-prop ; } cond ; : underlying-type-name ( c-type -- name ) - underlying-type dup word? [ name>> ] when ; + underlying-type present ; : specialized-array-vocab ( c-type -- vocab ) - "specialized-arrays.instances." prepend ; + present "specialized-arrays.instances." prepend ; PRIVATE> -: generate-vocab ( vocab-name quot -- vocab ) - [ dup vocab [ ] ] dip '[ - [ - [ - _ with-current-vocab - ] with-compilation-unit - ] keep - ] ?if ; inline - : define-array-vocab ( type -- vocab ) - underlying-type-name + underlying-type [ specialized-array-vocab ] [ '[ _ define-array ] ] bi generate-vocab ; diff --git a/basis/specialized-vectors/specialized-vectors-docs.factor b/basis/specialized-vectors/specialized-vectors-docs.factor index 9c575fe73a..c773356a64 100644 --- a/basis/specialized-vectors/specialized-vectors-docs.factor +++ b/basis/specialized-vectors/specialized-vectors-docs.factor @@ -16,8 +16,8 @@ ARTICLE: "specialized-vector-words" "Specialized vector words" } "Behind the scenes, these words are placed in a vocabulary named " { $snippet "specialized-vectors.instances.T" } ", however this vocabulary should not be placed in a " { $link POSTPONE: USING: } " form directly. Instead, always use " { $link POSTPONE: SPECIALIZED-VECTOR: } ". This ensures that the vocabulary can get generated the first time it is needed." ; -ARTICLE: "specialized-vector-c" "Passing specialized arrays to C functions" -"Each specialized array has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ; +ARTICLE: "specialized-vector-c" "Passing specialized vectors to C functions" +"Each specialized vector has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ; ARTICLE: "specialized-vectors" "Specialized vectors" "The " { $vocab-link "specialized-vectors" } " vocabulary implements resizable sequence types for storing machine values in a space-efficient manner without boxing." diff --git a/basis/specialized-vectors/specialized-vectors.factor b/basis/specialized-vectors/specialized-vectors.factor index 58fb97764b..7cda026cb3 100644 --- a/basis/specialized-vectors/specialized-vectors.factor +++ b/basis/specialized-vectors/specialized-vectors.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types assocs compiler.units functors growable kernel lexer namespaces parser prettyprint.custom sequences specialized-arrays specialized-arrays.private strings -vocabs vocabs.parser fry ; +vocabs vocabs.parser vocabs.generated fry ; QUALIFIED: vectors.functor IN: specialized-vectors diff --git a/basis/math/vectors/simd/alien/authors.txt b/basis/vocabs/generated/authors.txt similarity index 100% rename from basis/math/vectors/simd/alien/authors.txt rename to basis/vocabs/generated/authors.txt diff --git a/basis/vocabs/generated/generated.factor b/basis/vocabs/generated/generated.factor new file mode 100644 index 0000000000..1ddcc73db2 --- /dev/null +++ b/basis/vocabs/generated/generated.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: compiler.units fry kernel vocabs vocabs.parser ; +IN: vocabs.generated + +: generate-vocab ( vocab-name quot -- vocab ) + [ dup vocab [ ] ] dip '[ + [ + [ + _ with-current-vocab + ] with-compilation-unit + ] keep + ] ?if ; inline diff --git a/core/math/math-docs.factor b/core/math/math-docs.factor index e5de106bbb..e6805d693b 100644 --- a/core/math/math-docs.factor +++ b/core/math/math-docs.factor @@ -434,11 +434,15 @@ HELP: byte-array>bignum { $description "Converts a byte-array, interpreted as little-endian, into a bignum integer. User code should call " { $link le> } " or " { $link be> } " instead." } ; ARTICLE: "division-by-zero" "Division by zero" -"Floating point division never raises an error if the denominator is zero. This means that if at least one of the two inputs to " { $link / } ", " { $link /f } " or " { $link mod } " is a float, the result will be a floating point infinity or not a number value." +"Behavior of division operations when a denominator of zero is used depends on the data types in question, as well as the platform being used." +$nl +"Floating point division only throws an error if the appropriate traps are enabled in the floating point environment. If traps are disabled, a Not-a-number value or an infinity is output, depending on whether the numerator is zero or non-zero." +$nl +"Floating point traps are disabled by default and the " { $vocab-link "math.floats.env" } " vocabulary provides words to enable them. Floating point division is performed by " { $link / } ", " { $link /f } " or " { $link mod } " if at least one of the two inputs is a float. Floating point division is always performed by " { $link /f } "." $nl "The behavior of integer division is hardware specific. On x86 processors, " { $link /i } " and " { $link mod } " raise an error if both inputs are integers and the denominator is zero. On PowerPC, integer division by zero yields a result of zero." $nl -"On the other hand, the " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ; +"The " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ; ARTICLE: "number-protocol" "Number protocol" "Math operations obey certain numerical upgrade rules. If one of the inputs is a bignum and the other is a fixnum, the latter is first coerced to a bignum; if one of the inputs is a float, the other is coerced to a float." @@ -459,7 +463,8 @@ $nl { $subsection > } { $subsection >= } "Numbers can be compared for equality using " { $link = } ", or a less precise test which disregards types:" -{ $subsection number= } ; +{ $subsection number= } +{ $see-also "math.floats.compare" } ; ARTICLE: "modular-arithmetic" "Modular arithmetic" { $subsection mod } diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor index e8bef58923..3aedffed91 100644 --- a/extra/benchmark/nbody-simd/nbody-simd.factor +++ b/extra/benchmark/nbody-simd/nbody-simd.factor @@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants math.functions math.vectors math.vectors.simd prettyprint combinators.smart sequences hints classes.struct specialized-arrays ; +SIMD: double-4 IN: benchmark.nbody-simd : solar-mass ( -- x ) 4 pi sq * ; inline diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor index 3712972862..2d16c8cd1f 100644 --- a/extra/benchmark/raytracer-simd/raytracer-simd.factor +++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor @@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp io.encodings.binary kernel math math.constants math.functions math.vectors math.vectors.simd math.parser make sequences sequences.private words hints classes.struct ; +SIMD: double-4 IN: benchmark.raytracer-simd ! parameters diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor index 4f57cca0bb..1e753a331d 100644 --- a/extra/benchmark/simd-1/simd-1.factor +++ b/extra/benchmark/simd-1/simd-1.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel io math math.functions math.parser math.vectors math.vectors.simd sequences specialized-arrays ; +SIMD: float-4 SPECIALIZED-ARRAY: float-4 IN: benchmark.simd-1 diff --git a/extra/mason/child/child-tests.factor b/extra/mason/child/child-tests.factor index e8e1a9e0e9..2d5a7c6635 100644 --- a/extra/mason/child/child-tests.factor +++ b/extra/mason/child/child-tests.factor @@ -33,7 +33,7 @@ USING: mason.child mason.config tools.test namespaces io kernel sequences ; ] with-scope ] unit-test -[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" "-sse-version=30" } ] [ +[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" } ] [ [ "winnt" target-os set "x86.32" target-cpu set diff --git a/extra/mason/child/child.factor b/extra/mason/child/child.factor index b3ee6c2c76..193ac1e212 100755 --- a/extra/mason/child/child.factor +++ b/extra/mason/child/child.factor @@ -34,7 +34,6 @@ IN: mason.child factor-vm , "-i=" boot-image-name append , "-no-user-init" , - target-cpu get { "x86.32" "x86.64" } member? [ "-sse-version=30" , ] when ] { } make ; : boot ( -- ) diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S index 5360d6c227..52022e55cc 100644 --- a/vm/cpu-x86.S +++ b/vm/cpu-x86.S @@ -66,12 +66,12 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)): DEF(bool,sse_version,(void)): mov $0x1,RETURN_REG cpuid - /* test $0x100000,%ecx + test $0x100000,%ecx jnz sse_42 test $0x80000,%ecx jnz sse_41 test $0x200,%ecx - jnz ssse_3 */ + jnz ssse_3 test $0x1,%ecx jnz sse_3 test $0x4000000,%edx