diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index aac76c835a..a9c2b4e2c0 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -91,6 +91,11 @@ INSN: ##set-string-nth-fast use: src/int-rep obj/int-rep index/int-rep temp: temp/int-rep ; +PURE-INSN: ##copy +def: dst +use: src +literal: rep ; + ! Integer arithmetic PURE-INSN: ##add def: dst/int-rep @@ -201,6 +206,15 @@ use: src/int-rep temp: temp/int-rep ; ! Float arithmetic +PURE-INSN: ##unbox-float +def: dst/double-float-rep +use: src/int-rep ; + +PURE-INSN: ##box-float +def: dst/int-rep +use: src/double-float-rep +temp: temp/int-rep ; + PURE-INSN: ##add-float def: dst/double-float-rep use: src1/double-float-rep src2/double-float-rep ; @@ -240,6 +254,15 @@ def: dst/double-float-rep use: src1/double-float-rep src2/double-float-rep literal: func ; +! Single/double float conversion +PURE-INSN: ##single>double-float +def: dst/double-float-rep +use: src/single-float-rep ; + +PURE-INSN: ##double>single-float +def: dst/single-float-rep +use: src/double-float-rep ; + ! Float/integer conversion PURE-INSN: ##float>integer def: dst/int-rep @@ -249,26 +272,80 @@ PURE-INSN: ##integer>float def: dst/double-float-rep use: src/int-rep ; -! Boxing and unboxing -PURE-INSN: ##copy +! SIMD operations + +INSN: ##box-vector +def: dst/int-rep +use: src +literal: rep +temp: temp/int-rep ; + +INSN: ##unbox-vector +def: dst +use: src/int-rep +literal: rep ; + +INSN: ##broadcast-vector +def: dst +use: src/scalar-rep +literal: rep ; + +INSN: ##gather-vector-2 +def: dst +use: src1/scalar-rep src2/scalar-rep +literal: rep ; + +INSN: ##gather-vector-4 +def: dst +use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep +literal: rep ; + +INSN: ##add-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##sub-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##mul-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##div-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##min-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##max-vector +def: dst +use: src1 src2 +literal: rep ; + +INSN: ##sqrt-vector def: dst use: src literal: rep ; -PURE-INSN: ##unbox-float -def: dst/double-float-rep -use: src/int-rep ; +INSN: ##horizontal-add-vector +def: dst/scalar-rep +use: src +literal: rep ; +! Boxing and unboxing aliens PURE-INSN: ##unbox-any-c-ptr def: dst/int-rep use: src/int-rep temp: temp/int-rep ; -PURE-INSN: ##box-float -def: dst/int-rep -use: src/double-float-rep -temp: temp/int-rep ; - PURE-INSN: ##box-alien def: dst/int-rep use: src/int-rep @@ -322,13 +399,18 @@ def: dst/int-rep use: src/int-rep ; INSN: ##alien-float -def: dst/double-float-rep +def: dst/single-float-rep use: src/int-rep ; INSN: ##alien-double def: dst/double-float-rep use: src/int-rep ; +INSN: ##alien-vector +def: dst +use: src/int-rep +literal: rep ; + INSN: ##set-alien-integer-1 use: src/int-rep value/int-rep ; @@ -342,11 +424,15 @@ INSN: ##set-alien-cell use: src/int-rep value/int-rep ; INSN: ##set-alien-float -use: src/int-rep value/double-float-rep ; +use: src/int-rep value/single-float-rep ; INSN: ##set-alien-double use: src/int-rep value/double-float-rep ; +INSN: ##set-alien-vector +use: src/int-rep value +literal: rep ; + ! Memory allocation INSN: ##allot def: dst/int-rep @@ -510,6 +596,7 @@ literal: n ; UNION: ##allocation ##allot ##box-float +##box-vector ##box-alien ##box-displaced-alien ##integer>bignum ; diff --git a/basis/compiler/cfg/instructions/syntax/syntax.factor b/basis/compiler/cfg/instructions/syntax/syntax.factor index c4876866a3..53d124ea9d 100644 --- a/basis/compiler/cfg/instructions/syntax/syntax.factor +++ b/basis/compiler/cfg/instructions/syntax/syntax.factor @@ -7,11 +7,20 @@ IN: compiler.cfg.instructions.syntax SYMBOLS: def use temp literal constant ; +SYMBOL: scalar-rep + TUPLE: insn-slot-spec type name rep ; +: parse-rep ( str/f -- rep ) + { + { [ dup not ] [ ] } + { [ dup "scalar-rep" = ] [ drop scalar-rep ] } + [ "cpu.architecture" lookup ] + } cond ; + : parse-insn-slot-spec ( type string -- spec ) over [ "Missing type" throw ] unless - "/" split1 dup [ "cpu.architecture" lookup ] when + "/" split1 parse-rep insn-slot-spec boa ; : parse-insn-slot-specs ( seq -- specs ) diff --git a/basis/compiler/cfg/intrinsics/alien/alien.factor b/basis/compiler/cfg/intrinsics/alien/alien.factor index c2faf27f03..440a453173 100644 --- a/basis/compiler/cfg/intrinsics/alien/alien.factor +++ b/basis/compiler/cfg/intrinsics/alien/alien.factor @@ -20,22 +20,14 @@ IN: compiler.cfg.intrinsics.alien ^^box-displaced-alien ds-push ] [ emit-primitive ] if ; -: (prepare-alien-accessor-imm) ( class offset -- offset-vreg ) - ds-drop [ ds-pop swap ^^unbox-c-ptr ] dip ^^add-imm ; - -: (prepare-alien-accessor) ( class -- offset-vreg ) - [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ; - : prepare-alien-accessor ( infos -- offset-vreg ) - [ second class>> ] [ first ] bi - dup value-info-small-fixnum? [ - literal>> (prepare-alien-accessor-imm) - ] [ drop (prepare-alien-accessor) ] if ; + second class>> + [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ; :: inline-alien ( node quot test -- ) [let | infos [ node node-input-infos ] | infos test call - [ infos prepare-alien-accessor quot call ] + [ infos quot call ] [ node emit-primitive ] if ] ; inline @@ -46,7 +38,7 @@ IN: compiler.cfg.intrinsics.alien bi and ; : inline-alien-getter ( node quot -- ) - '[ @ ds-push ] + '[ prepare-alien-accessor @ ds-push ] [ inline-alien-getter? ] inline-alien ; inline : inline-alien-setter? ( infos class -- ? ) @@ -56,18 +48,17 @@ IN: compiler.cfg.intrinsics.alien tri and and ; : inline-alien-integer-setter ( node quot -- ) - '[ ds-pop ^^untag-fixnum @ ] + '[ prepare-alien-accessor ds-pop ^^untag-fixnum @ ] [ fixnum inline-alien-setter? ] inline-alien ; inline : inline-alien-cell-setter ( node quot -- ) - [ dup node-input-infos first class>> ] dip - '[ ds-pop _ ^^unbox-c-ptr @ ] + '[ [ prepare-alien-accessor ds-pop ] [ first class>> ] bi ^^unbox-c-ptr @ ] [ pinned-c-ptr inline-alien-setter? ] inline-alien ; inline : inline-alien-float-setter ( node quot -- ) - '[ ds-pop @ ] + '[ prepare-alien-accessor ds-pop @ ] [ float inline-alien-setter? ] inline-alien ; inline diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 920def14c1..739bcdb366 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -7,6 +7,7 @@ compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.allot compiler.cfg.intrinsics.fixnum compiler.cfg.intrinsics.float +compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.slots compiler.cfg.intrinsics.misc compiler.cfg.comparisons ; @@ -22,6 +23,9 @@ QUALIFIED: classes.tuple.private QUALIFIED: math.private QUALIFIED: math.integers.private QUALIFIED: math.floats.private +QUALIFIED: math.vectors.simd +QUALIFIED: math.vectors.simd.private +QUALIFIED: math.vectors.simd.alien QUALIFIED: math.libm IN: compiler.cfg.intrinsics @@ -142,5 +146,27 @@ IN: compiler.cfg.intrinsics { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } } enable-intrinsics ; +: enable-sse2-simd ( -- ) + { + { math.vectors.simd.private:assert-positive [ drop ] } + { math.vectors.simd.private:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] } + { math.vectors.simd.private:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } + { math.vectors.simd.private:(simd-gather-2) [ emit-gather-vector-2 ] } + { math.vectors.simd.private:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd.alien:alien-vector [ emit-alien-vector ] } + { math.vectors.simd.alien:set-alien-vector [ emit-set-alien-vector ] } + } enable-intrinsics ; + +: enable-sse3-simd ( -- ) + { + { math.vectors.simd.private:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } + } enable-intrinsics ; + : emit-intrinsic ( node word -- ) "intrinsic" word-prop call( node -- ) ; diff --git a/basis/compiler/cfg/intrinsics/simd/authors.txt b/basis/compiler/cfg/intrinsics/simd/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/basis/compiler/cfg/intrinsics/simd/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor new file mode 100644 index 0000000000..25b30c95da --- /dev/null +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -0,0 +1,57 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors byte-arrays fry cpu.architecture kernel +sequences compiler.tree.propagation.info +compiler.cfg.builder.blocks compiler.cfg.stacks +compiler.cfg.stacks.local compiler.cfg.hats +compiler.cfg.instructions compiler.cfg.registers +compiler.cfg.intrinsics.alien ; +IN: compiler.cfg.intrinsics.simd + +: emit-vector-op ( node quot: ( rep -- ) -- ) + [ dup node-input-infos last literal>> ] dip over representation? + [ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline + +: emit-binary-vector-op ( node quot -- ) + '[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline + +: emit-unary-vector-op ( node quot -- ) + '[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline + +: emit-gather-vector-2 ( node -- ) + [ ^^gather-vector-2 ] emit-binary-vector-op ; + +: emit-gather-vector-4 ( node -- ) + [ + ds-drop + [ + D 3 peek-loc + D 2 peek-loc + D 1 peek-loc + D 0 peek-loc + -4 inc-d + ] dip + ^^gather-vector-4 + ds-push + ] emit-vector-op ; + +: inline-alien-vector-setter ( node quot -- ) + '[ ds-drop prepare-alien-accessor ds-pop @ ] + [ byte-array inline-alien-setter? ] + inline-alien ; inline + +: emit-alien-vector ( node -- ) + dup [ + '[ + ds-drop prepare-alien-accessor + _ ^^alien-vector ds-push + ] + [ inline-alien-getter? ] inline-alien + ] with emit-vector-op ; + +: emit-set-alien-vector ( node -- ) + dup [ + '[ + _ ##set-alien-vector + ] inline-alien-vector-setter + ] with emit-vector-op ; diff --git a/basis/compiler/cfg/representations/preferred/preferred.factor b/basis/compiler/cfg/representations/preferred/preferred.factor index 2e72e56584..54da7bdf69 100644 --- a/basis/compiler/cfg/representations/preferred/preferred.factor +++ b/basis/compiler/cfg/representations/preferred/preferred.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors sequences arrays fry namespaces generic -words sets cpu.architecture compiler.units +words sets combinators generalizations cpu.architecture compiler.units compiler.cfg.utilities compiler.cfg compiler.cfg.rpo compiler.cfg.instructions compiler.cfg.instructions.syntax compiler.cfg.def-use ; @@ -13,35 +13,41 @@ GENERIC: uses-vreg-reps ( insn -- reps ) > ] ] } + { scalar-rep [ [ rep>> scalar-rep-of ] ] } + [ '[ _ nip ] ] + } case ; + : define-defs-vreg-rep-method ( insn -- ) [ \ defs-vreg-rep create-method ] - [ insn-def-slot dup [ rep>> ] when '[ drop _ ] ] bi - define ; + [ insn-def-slot [ rep>> rep-getter-quot ] [ [ drop f ] ] if* ] + bi define ; + +: reps-getter-quot ( reps -- quot ) + [ rep>> rep-getter-quot ] map dup length '[ _ cleave _ narray ] ; : define-uses-vreg-reps-method ( insn -- ) [ \ uses-vreg-reps create-method ] - [ insn-use-slots [ rep>> ] map '[ drop _ ] ] bi - define ; + [ insn-use-slots reps-getter-quot ] + bi define ; : define-temp-vreg-reps-method ( insn -- ) [ \ temp-vreg-reps create-method ] - [ insn-temp-slots [ rep>> ] map '[ drop _ ] ] bi - define ; + [ insn-temp-slots reps-getter-quot ] + bi define ; PRIVATE> [ insn-classes get - [ { ##copy } diff [ define-defs-vreg-rep-method ] each ] - [ { ##copy ##phi } diff [ define-uses-vreg-reps-method ] each ] + [ [ define-defs-vreg-rep-method ] each ] + [ { ##phi } diff [ define-uses-vreg-reps-method ] each ] [ [ define-temp-vreg-reps-method ] each ] tri ] with-compilation-unit -M: ##copy defs-vreg-rep rep>> ; - -M: ##copy uses-vreg-reps rep>> 1array ; - : each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- ) [ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index cb98eb0ae5..4c417442ea 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -5,6 +5,7 @@ arrays combinators make locals deques dlists cpu.architecture compiler.utilities compiler.cfg compiler.cfg.rpo +compiler.cfg.hats compiler.cfg.registers compiler.cfg.instructions compiler.cfg.def-use @@ -16,13 +17,47 @@ IN: compiler.cfg.representations ! Virtual register representation selection. +ERROR: bad-conversion dst src dst-rep src-rep ; + +GENERIC: emit-box ( dst src rep -- ) +GENERIC: emit-unbox ( dst src rep -- ) + +M: single-float-rep emit-box + drop + [ double-float-rep next-vreg-rep dup ] dip ##single>double-float + int-rep next-vreg-rep ##box-float ; + +M: single-float-rep emit-unbox + drop + [ double-float-rep next-vreg-rep dup ] dip ##unbox-float + ##double>single-float ; + +M: double-float-rep emit-box + drop + int-rep next-vreg-rep ##box-float ; + +M: double-float-rep emit-unbox + drop ##unbox-float ; + +M: vector-rep emit-box + int-rep next-vreg-rep ##box-vector ; + +M: vector-rep emit-unbox + ##unbox-vector ; + : emit-conversion ( dst src dst-rep src-rep -- ) - 2array { - { { int-rep int-rep } [ int-rep ##copy ] } - { { double-float-rep double-float-rep } [ double-float-rep ##copy ] } - { { double-float-rep int-rep } [ ##unbox-float ] } - { { int-rep double-float-rep } [ int-rep next-vreg-rep ##box-float ] } - } case ; + { + { [ 2dup eq? ] [ drop ##copy ] } + { [ dup int-rep eq? ] [ drop emit-unbox ] } + { [ over int-rep eq? ] [ nip emit-box ] } + [ + 2array { + { { double-float-rep single-float-rep } [ ##single>double-float ] } + { { single-float-rep double-float-rep } [ ##double>single-float ] } + [ first2 bad-conversion ] + } case + ] + } cond ; bignum %integer>bignum CODEGEN: ##bignum>integer %bignum>integer +CODEGEN: ##unbox-float %unbox-float +CODEGEN: ##box-float %box-float CODEGEN: ##add-float %add-float CODEGEN: ##sub-float %sub-float CODEGEN: ##mul-float %mul-float @@ -155,12 +158,24 @@ CODEGEN: ##max-float %max-float CODEGEN: ##sqrt %sqrt CODEGEN: ##unary-float-function %unary-float-function CODEGEN: ##binary-float-function %binary-float-function +CODEGEN: ##single>double-float %single>double-float +CODEGEN: ##double>single-float %double>single-float CODEGEN: ##integer>float %integer>float CODEGEN: ##float>integer %float>integer -CODEGEN: ##copy %copy -CODEGEN: ##unbox-float %unbox-float +CODEGEN: ##unbox-vector %unbox-vector +CODEGEN: ##broadcast-vector %broadcast-vector +CODEGEN: ##gather-vector-2 %gather-vector-2 +CODEGEN: ##gather-vector-4 %gather-vector-4 +CODEGEN: ##box-vector %box-vector +CODEGEN: ##add-vector %add-vector +CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##mul-vector %mul-vector +CODEGEN: ##div-vector %div-vector +CODEGEN: ##min-vector %min-vector +CODEGEN: ##max-vector %max-vector +CODEGEN: ##sqrt-vector %sqrt-vector +CODEGEN: ##horizontal-add-vector %horizontal-add-vector CODEGEN: ##unbox-any-c-ptr %unbox-any-c-ptr -CODEGEN: ##box-float %box-float CODEGEN: ##box-alien %box-alien CODEGEN: ##box-displaced-alien %box-displaced-alien CODEGEN: ##alien-unsigned-1 %alien-unsigned-1 @@ -172,12 +187,14 @@ CODEGEN: ##alien-signed-4 %alien-signed-4 CODEGEN: ##alien-cell %alien-cell CODEGEN: ##alien-float %alien-float CODEGEN: ##alien-double %alien-double +CODEGEN: ##alien-vector %alien-vector CODEGEN: ##set-alien-integer-1 %set-alien-integer-1 CODEGEN: ##set-alien-integer-2 %set-alien-integer-2 CODEGEN: ##set-alien-integer-4 %set-alien-integer-4 CODEGEN: ##set-alien-cell %set-alien-cell CODEGEN: ##set-alien-float %set-alien-float CODEGEN: ##set-alien-double %set-alien-double +CODEGEN: ##set-alien-vector %set-alien-vector CODEGEN: ##allot %allot CODEGEN: ##write-barrier %write-barrier CODEGEN: ##compare %compare diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor index 0fb2dca5b9..fcbac30444 100644 --- a/basis/compiler/tests/codegen.factor +++ b/basis/compiler/tests/codegen.factor @@ -412,4 +412,6 @@ cell 4 = [ [ 1 "0.169967142900241" ] [ 1.4 [ 1 swap fcos ] compile-call number>string ] unit-test [ 1 "0.169967142900241" ] [ 1.4 1 [ swap fcos ] compile-call number>string ] unit-test [ "0.169967142900241" "0.9854497299884601" ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test -[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test \ No newline at end of file +[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test + +[ 6.0 ] [ 1.0 [ >float 3.0 + [ B{ 0 0 0 0 } 0 set-alien-float ] [ 2.0 + ] bi ] compile-call ] unit-test \ No newline at end of file diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor new file mode 100644 index 0000000000..57c77538a9 --- /dev/null +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -0,0 +1,43 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors byte-arrays combinators +compiler.tree.propagation.info cpu.architecture kernel words math +math.intervals math.vectors.simd math.vectors.simd.private +math.vectors.simd.alien ; +IN: compiler.tree.propagation.simd + +\ (simd-v+) { byte-array } "default-output-classes" set-word-prop + +\ (simd-v-) { byte-array } "default-output-classes" set-word-prop + +\ (simd-v*) { byte-array } "default-output-classes" set-word-prop + +\ (simd-v/) { byte-array } "default-output-classes" set-word-prop + +\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop + +\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop + +\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop + +\ (simd-sum) [ + nip dup literal?>> [ + literal>> scalar-rep-of { + { single-float-rep [ float ] } + { double-float-rep [ float ] } + } case + ] [ drop real ] if + +] "outputs" set-word-prop + +\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop + +\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop + +\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop + +\ assert-positive [ + real [0,inf] value-info-intersect +] "outputs" set-word-prop + +\ alien-vector { byte-array } "default-output-classes" set-word-prop diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index eaf2f4af66..eaa4bc394a 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -20,7 +20,33 @@ SINGLETONS: tagged-rep int-rep ; ! one of these representations SINGLETONS: single-float-rep double-float-rep ; -UNION: representation any-rep tagged-rep int-rep single-float-rep double-float-rep ; +SINGLETONS: +4float-array-rep +2double-array-rep +16char-array-rep +16uchar-array-rep +8short-array-rep +8ushort-array-rep +4int-array-rep +4uint-array-rep ; + +UNION: vector-rep +4float-array-rep +2double-array-rep +16char-array-rep +16uchar-array-rep +8short-array-rep +8ushort-array-rep +4int-array-rep +4uint-array-rep ; + +UNION: representation +any-rep +tagged-rep +int-rep +single-float-rep +double-float-rep +vector-rep ; ! Register classes SINGLETONS: int-regs float-regs ; @@ -31,23 +57,28 @@ CONSTANT: reg-classes { int-regs float-regs } ! A pseudo-register class for parameters spilled on the stack SINGLETON: stack-params -: reg-class-of ( rep -- reg-class ) - { - { tagged-rep [ int-regs ] } - { int-rep [ int-regs ] } - { single-float-rep [ float-regs ] } - { double-float-rep [ float-regs ] } - { stack-params [ stack-params ] } - } case ; +GENERIC: reg-class-of ( rep -- reg-class ) -: rep-size ( rep -- n ) - { - { tagged-rep [ cell ] } - { int-rep [ cell ] } - { single-float-rep [ 4 ] } - { double-float-rep [ 8 ] } - { stack-params [ cell ] } - } case ; +M: tagged-rep reg-class-of drop int-regs ; +M: int-rep reg-class-of drop int-regs ; +M: single-float-rep reg-class-of drop float-regs ; +M: double-float-rep reg-class-of drop float-regs ; +M: vector-rep reg-class-of drop float-regs ; +M: stack-params reg-class-of drop stack-params ; + +GENERIC: rep-size ( rep -- n ) + +M: tagged-rep rep-size drop cell ; +M: int-rep rep-size drop cell ; +M: single-float-rep rep-size drop 4 ; +M: double-float-rep rep-size drop 8 ; +M: stack-params rep-size drop cell ; +M: vector-rep rep-size drop 16 ; + +GENERIC: scalar-rep-of ( rep -- rep' ) + +M: 4float-array-rep scalar-rep-of drop single-float-rep ; +M: 2double-array-rep scalar-rep-of drop double-float-rep ; ! Mapping from register class to machine registers HOOK: machine-registers cpu ( -- assoc ) @@ -101,6 +132,8 @@ HOOK: %max cpu ( dst src1 src2 -- ) HOOK: %not cpu ( dst src -- ) HOOK: %log2 cpu ( dst src -- ) +HOOK: %copy cpu ( dst src rep -- ) + HOOK: %fixnum-add cpu ( label dst src1 src2 -- ) HOOK: %fixnum-sub cpu ( label dst src1 src2 -- ) HOOK: %fixnum-mul cpu ( label dst src1 src2 -- ) @@ -108,6 +141,9 @@ HOOK: %fixnum-mul cpu ( label dst src1 src2 -- ) HOOK: %integer>bignum cpu ( dst src temp -- ) HOOK: %bignum>integer cpu ( dst src temp -- ) +HOOK: %unbox-float cpu ( dst src -- ) +HOOK: %box-float cpu ( dst src temp -- ) + HOOK: %add-float cpu ( dst src1 src2 -- ) HOOK: %sub-float cpu ( dst src1 src2 -- ) HOOK: %mul-float cpu ( dst src1 src2 -- ) @@ -118,13 +154,29 @@ HOOK: %sqrt cpu ( dst src -- ) HOOK: %unary-float-function cpu ( dst src func -- ) HOOK: %binary-float-function cpu ( dst src1 src2 func -- ) +HOOK: %single>double-float cpu ( dst src -- ) +HOOK: %double>single-float cpu ( dst src -- ) + HOOK: %integer>float cpu ( dst src -- ) HOOK: %float>integer cpu ( dst src -- ) -HOOK: %copy cpu ( dst src rep -- ) -HOOK: %unbox-float cpu ( dst src -- ) +HOOK: %box-vector cpu ( dst src temp rep -- ) +HOOK: %unbox-vector cpu ( dst src rep -- ) + +HOOK: %broadcast-vector cpu ( dst src rep -- ) +HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) +HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) + +HOOK: %add-vector cpu ( dst src1 src2 rep -- ) +HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) +HOOK: %div-vector cpu ( dst src1 src2 rep -- ) +HOOK: %min-vector cpu ( dst src1 src2 rep -- ) +HOOK: %max-vector cpu ( dst src1 src2 rep -- ) +HOOK: %sqrt-vector cpu ( dst src rep -- ) +HOOK: %horizontal-add-vector cpu ( dst src rep -- ) + HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) -HOOK: %box-float cpu ( dst src temp -- ) HOOK: %box-alien cpu ( dst src temp -- ) HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- ) @@ -137,6 +189,7 @@ HOOK: %alien-signed-4 cpu ( dst src -- ) HOOK: %alien-cell cpu ( dst src -- ) HOOK: %alien-float cpu ( dst src -- ) HOOK: %alien-double cpu ( dst src -- ) +HOOK: %alien-vector cpu ( dst src rep -- ) HOOK: %set-alien-integer-1 cpu ( ptr value -- ) HOOK: %set-alien-integer-2 cpu ( ptr value -- ) @@ -144,6 +197,7 @@ HOOK: %set-alien-integer-4 cpu ( ptr value -- ) HOOK: %set-alien-cell cpu ( ptr value -- ) HOOK: %set-alien-float cpu ( ptr value -- ) HOOK: %set-alien-double cpu ( ptr value -- ) +HOOK: %set-alien-vector cpu ( ptr value rep -- ) HOOK: %alien-global cpu ( dst symbol library -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 24b8bf2870..af548a1f2b 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -4,7 +4,7 @@ USING: accessors assocs alien alien.c-types arrays strings cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.architecture kernel kernel.private math memory namespaces make sequences words system layouts combinators math.order fry locals -compiler.constants +compiler.constants byte-arrays compiler.cfg.registers compiler.cfg.instructions compiler.cfg.intrinsics @@ -130,6 +130,21 @@ M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ; M: x86 %not drop NOT ; M: x86 %log2 BSR ; +GENERIC: copy-register* ( dst src rep -- ) + +M: int-rep copy-register* drop MOV ; +M: tagged-rep copy-register* drop MOV ; +M: single-float-rep copy-register* drop MOVSS ; +M: double-float-rep copy-register* drop MOVSD ; +M: 4float-array-rep copy-register* drop MOVUPS ; +M: 2double-array-rep copy-register* drop MOVUPD ; +M: vector-rep copy-register* drop MOVDQU ; + +: copy-register ( dst src rep -- ) + 2over eq? [ 3drop ] [ copy-register* ] if ; + +M: x86 %copy ( dst src rep -- ) copy-register ; + :: overflow-template ( label dst src1 src2 insn -- ) src1 src2 insn call label JO ; inline @@ -211,24 +226,120 @@ M: x86 %min-float nip MINSD ; M: x86 %max-float nip MAXSD ; M: x86 %sqrt SQRTSD ; +M: x86 %single>double-float CVTSS2SD ; +M: x86 %double>single-float CVTSD2SS ; + M: x86 %integer>float CVTSI2SD ; M: x86 %float>integer CVTTSD2SI ; -GENERIC: copy-register* ( dst src rep -- ) - -M: int-rep copy-register* drop MOV ; -M: tagged-rep copy-register* drop MOV ; -M: single-float-rep copy-register* drop MOVSS ; -M: double-float-rep copy-register* drop MOVSD ; - -: copy-register ( dst src rep -- ) - 2over eq? [ 3drop ] [ copy-register* ] if ; - -M: x86 %copy ( dst src rep -- ) copy-register ; - M: x86 %unbox-float ( dst src -- ) float-offset [+] MOVSD ; +M:: x86 %box-float ( dst src temp -- ) + dst 16 float temp %allot + dst float-offset [+] src MOVSD ; + +M:: x86 %box-vector ( dst src rep temp -- ) + dst rep rep-size 2 cells + byte-array temp %allot + 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm + dst byte-array-offset [+] + src rep copy-register ; + +M:: x86 %unbox-vector ( dst src rep -- ) + dst src byte-array-offset [+] + rep copy-register ; + +M: x86 %broadcast-vector ( dst src rep -- ) + { + { 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] } + { 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] } + } case ; + +M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) + rep { + { + 4float-array-rep + [ + dst src1 MOVSS + dst src2 UNPCKLPS + src3 src4 UNPCKLPS + dst src3 HEX: 44 SHUFPS + ] + } + } case ; + +M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) + rep { + { + 2double-array-rep + [ + dst src1 MOVAPD + dst src2 0 SHUFPD + ] + } + } case ; + +M: x86 %add-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ ADDPS ] } + { 2double-array-rep [ ADDPD ] } + { 16char-array-rep [ PADDB ] } + { 16uchar-array-rep [ PADDB ] } + { 8short-array-rep [ PADDW ] } + { 8ushort-array-rep [ PADDW ] } + { 4int-array-rep [ PADDD ] } + { 4uint-array-rep [ PADDD ] } + } case drop ; + +M: x86 %sub-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ SUBPS ] } + { 2double-array-rep [ SUBPD ] } + { 16char-array-rep [ PSUBB ] } + { 16uchar-array-rep [ PSUBB ] } + { 8short-array-rep [ PSUBW ] } + { 8ushort-array-rep [ PSUBW ] } + { 4int-array-rep [ PSUBD ] } + { 4uint-array-rep [ PSUBD ] } + } case drop ; + +M: x86 %mul-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ MULPS ] } + { 2double-array-rep [ MULPD ] } + { 4int-array-rep [ PMULLW ] } + } case drop ; + +M: x86 %div-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ DIVPS ] } + { 2double-array-rep [ DIVPD ] } + } case drop ; + +M: x86 %min-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ MINPS ] } + { 2double-array-rep [ MINPD ] } + } case drop ; + +M: x86 %max-vector ( dst src1 src2 rep -- ) + { + { 4float-array-rep [ MAXPS ] } + { 2double-array-rep [ MAXPD ] } + } case drop ; + +M: x86 %sqrt-vector ( dst src rep -- ) + { + { 4float-array-rep [ SQRTPS ] } + { 2double-array-rep [ SQRTPD ] } + } case ; + +M: x86 %horizontal-add-vector ( dst src rep -- ) + { + { 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } + { 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } + } case ; + M:: x86 %unbox-any-c-ptr ( dst src temp -- ) [ { "is-byte-array" "end" "start" } [ define-label ] each @@ -255,10 +366,6 @@ M:: x86 %unbox-any-c-ptr ( dst src temp -- ) "end" resolve-label ] with-scope ; -M:: x86 %box-float ( dst src temp -- ) - dst 16 float temp %allot - dst float-offset [+] src MOVSD ; - : alien@ ( reg n -- op ) cells alien tag-number - [+] ; :: %allot-alien ( dst displacement base temp -- ) @@ -405,8 +512,9 @@ M: x86 %alien-signed-2 16 %alien-signed-getter ; M: x86 %alien-signed-4 32 %alien-signed-getter ; M: x86 %alien-cell [] MOV ; -M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ; +M: x86 %alien-float [] MOVSS ; M: x86 %alien-double [] MOVSD ; +M: x86 %alien-vector [ [] ] dip copy-register ; :: %alien-integer-setter ( ptr value size -- ) value { ptr } size [| new-value | @@ -418,8 +526,9 @@ M: x86 %set-alien-integer-1 8 %alien-integer-setter ; M: x86 %set-alien-integer-2 16 %alien-integer-setter ; M: x86 %set-alien-integer-4 32 %alien-integer-setter ; M: x86 %set-alien-cell [ [] ] dip MOV ; -M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ; +M: x86 %set-alien-float [ [] ] dip MOVSS ; M: x86 %set-alien-double [ [] ] dip MOVSD ; +M: x86 %set-alien-vector [ [] ] 2dip copy-register ; : shift-count? ( reg -- ? ) { ECX RCX } memq? ; diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor new file mode 100644 index 0000000000..62a1d25c58 --- /dev/null +++ b/basis/math/vectors/simd/alien/alien-tests.factor @@ -0,0 +1,56 @@ +IN: math.vectors.simd.alien.tests +USING: cpu.architecture math.vectors.simd accessors +math.vectors.simd.alien kernel classes.struct tools.test +compiler sequences byte-arrays alien math kernel.private +specialized-arrays.float ; + +! Vector alien intrinsics +[ 4float-array{ 1 2 3 4 } ] [ + [ + 4float-array{ 1 2 3 4 } + underlying>> 0 4float-array-rep alien-vector + ] compile-call 4float-array boa +] unit-test + +[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ + 16 [ 1 ] B{ } replicate-as 16 + [ + 0 [ + { byte-array c-ptr fixnum } declare + 4float-array-rep set-alien-vector + ] compile-call + ] keep +] unit-test + +[ float-array{ 1 2 3 4 } ] [ + [ + float-array{ 1 2 3 4 } underlying>> + float-array{ 4 3 2 1 } clone + [ underlying>> 0 4float-array-rep set-alien-vector ] keep + ] compile-call +] unit-test + +STRUCT: simd-struct +{ x 4float-array } +{ y 2double-array } +{ z 4double-array } ; + +[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test + +[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [ + simd-struct + 4float-array{ 1 2 3 4 } >>x + 2double-array{ 2 1 } >>y + 4double-array{ 4 3 2 1 } >>z + [ x>> ] [ y>> ] [ z>> ] tri +] unit-test + +[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [ + [ + simd-struct + 4float-array{ 1 2 3 4 } >>x + 2double-array{ 2 1 } >>y + 4double-array{ 4 3 2 1 } >>z + [ x>> ] [ y>> ] [ z>> ] tri + ] compile-call +] unit-test diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor new file mode 100644 index 0000000000..c3dd25512c --- /dev/null +++ b/basis/math/vectors/simd/alien/alien.factor @@ -0,0 +1,51 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: alien accessors alien.c-types byte-arrays compiler.units +cpu.architecture libc locals kernel math math.vectors.simd +math.vectors.simd.private ; +IN: math.vectors.simd.alien + +: alien-vector ( c-ptr n rep -- value ) + ! Inefficient version for when intrinsics are missing + [ swap ] dip rep-size memory>byte-array ; + +: set-alien-vector ( value c-ptr n rep -- ) + ! Inefficient version for when intrinsics are missing + [ swap swap ] dip rep-size memcpy ; + +:: define-simd-type ( class rep -- ) + + byte-array >>class + class >>boxed-class + [ rep alien-vector ] >>getter + [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter + 16 >>size + 8 >>align + rep >>rep + [ class boa ] >>boxer-quot + [ underlying>> ] >>unboxer-quot + class name>> typedef ; + +: define-4double-array-type ( -- ) + + 4double-array >>class + 4double-array >>boxed-class + [ + [ 2double-array-rep alien-vector ] + [ 16 + >fixnum 2double-array-rep alien-vector ] 2bi + 4double-array boa + ] >>getter + [ + [ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ] + [ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ] + 3bi + ] >>setter + 32 >>size + 8 >>align + 2double-array-rep >>rep + "4double-array" typedef ; +[ + 4float-array 4float-array-rep define-simd-type + 2double-array 2double-array-rep define-simd-type + define-4double-array-type +] with-compilation-unit diff --git a/basis/math/vectors/simd/alien/authors.txt b/basis/math/vectors/simd/alien/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/basis/math/vectors/simd/alien/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/basis/math/vectors/simd/authors.txt b/basis/math/vectors/simd/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/basis/math/vectors/simd/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/basis/math/vectors/simd/functor/authors.txt b/basis/math/vectors/simd/functor/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/basis/math/vectors/simd/functor/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor new file mode 100644 index 0000000000..9273f1108e --- /dev/null +++ b/basis/math/vectors/simd/functor/functor.factor @@ -0,0 +1,61 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien.c-types byte-arrays classes functors +kernel math parser prettyprint.custom sequences +sequences.private ; +IN: math.vectors.simd.functor + +ERROR: bad-length got expected ; + +FUNCTOR: define-simd-type ( T N -- ) + +A DEFINES-CLASS ${N}${T}-array + DEFINES <${A}> +(A) DEFINES (${A}) +>A DEFINES >${A} +A{ DEFINES ${A}{ + +NTH [ T dup c-type-getter-boxer array-accessor ] +SET-NTH [ T dup c-setter array-accessor ] + +BYTES [ T heap-size N * ] +INITIAL [ BYTES ] + +WHERE + +TUPLE: A +{ underlying byte-array read-only initial: INITIAL } ; + +: ( -- simd-array ) BYTES A boa ; inline + +: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline + +M: A clone underlying>> clone \ A boa ; inline + +M: A length drop N ; inline + +M: A nth-unsafe underlying>> NTH call ; inline + +M: A set-nth-unsafe underlying>> SET-NTH call ; inline + +: >A ( seq -- simd-array ) \ A new clone-like ; + +M: A like drop dup \ A instance? [ >A ] unless ; inline + +M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline + +M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; + +M: A byte-length underlying>> length ; inline + +M: A pprint-delims drop \ A{ \ } ; + +M: A >pprint-sequence ; + +M: A pprint* pprint-object ; + +SYNTAX: A{ \ } [ >A ] parse-literal ; + +INSTANCE: A sequence + +;FUNCTOR diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor new file mode 100644 index 0000000000..58a5373713 --- /dev/null +++ b/basis/math/vectors/simd/simd-tests.factor @@ -0,0 +1,301 @@ +IN: math.vectors.simd.tests +USING: math math.vectors.simd math.vectors.simd.private +math.vectors math.functions kernel.private compiler sequences +tools.test compiler.tree.debugger accessors kernel ; + +[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test + +[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test + +[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test + +[ 4float-array{ 12 12 12 12 } ] [ + 12 [ 4float-array-with ] compile-call +] unit-test + +[ 4float-array{ 1 2 3 4 } ] [ + 1 2 3 4 [ 4float-array-boa ] compile-call +] unit-test + +[ 4float-array{ 11 22 33 44 } ] [ + 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 } + [ { 4float-array 4float-array } declare v+ ] compile-call +] unit-test + +[ 4float-array{ -9 -18 -27 -36 } ] [ + 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 } + [ { 4float-array 4float-array } declare v- ] compile-call +] unit-test + +[ 4float-array{ 10 40 90 160 } ] [ + 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 } + [ { 4float-array 4float-array } declare v* ] compile-call +] unit-test + +[ 4float-array{ 10 100 1000 10000 } ] [ + 4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 } + [ { 4float-array 4float-array } declare v/ ] compile-call +] unit-test + +[ 4float-array{ -10 -20 -30 -40 } ] [ + 4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 } + [ { 4float-array 4float-array } declare vmin ] compile-call +] unit-test + +[ 4float-array{ 10 20 30 40 } ] [ + 4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 } + [ { 4float-array 4float-array } declare vmax ] compile-call +] unit-test + +[ 10.0 ] [ + 4float-array{ 1 2 3 4 } + [ { 4float-array } declare sum ] compile-call +] unit-test + +[ 13.0 ] [ + 4float-array{ 1 2 3 4 } + [ { 4float-array } declare sum 3.0 + ] compile-call +] unit-test + +[ 8.0 ] [ + 4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 } + [ { 4float-array 4float-array } declare v. ] compile-call +] unit-test + +[ 4float-array{ 5 10 15 20 } ] [ + 5.0 4float-array{ 1 2 3 4 } + [ { float 4float-array } declare n*v ] compile-call +] unit-test + +[ 4float-array{ 5 10 15 20 } ] [ + 4float-array{ 1 2 3 4 } 5.0 + [ { float 4float-array } declare v*n ] compile-call +] unit-test + +[ 4float-array{ 10 5 2 5 } ] [ + 10.0 4float-array{ 1 2 5 2 } + [ { float 4float-array } declare n/v ] compile-call +] unit-test + +[ 4float-array{ 0.5 1 1.5 2 } ] [ + 4float-array{ 1 2 3 4 } 2 + [ { float 4float-array } declare v/n ] compile-call +] unit-test + +[ 4float-array{ 1 0 0 0 } ] [ + 4float-array{ 10 0 0 0 } + [ { 4float-array } declare normalize ] compile-call +] unit-test + +[ 30.0 ] [ + 4float-array{ 1 2 3 4 } + [ { 4float-array } declare norm-sq ] compile-call +] unit-test + +[ t ] [ + 4float-array{ 1 0 0 0 } + 4float-array{ 0 1 0 0 } + [ { 4float-array 4float-array } declare distance ] compile-call + 2 sqrt 1.0e-6 ~ +] unit-test + +[ 2double-array{ 12 12 } ] [ + 12 [ 2double-array-with ] compile-call +] unit-test + +[ 2double-array{ 1 2 } ] [ + 1 2 [ 2double-array-boa ] compile-call +] unit-test + +[ 2double-array{ 11 22 } ] [ + 2double-array{ 1 2 } 2double-array{ 10 20 } + [ { 2double-array 2double-array } declare v+ ] compile-call +] unit-test + +[ 2double-array{ -9 -18 } ] [ + 2double-array{ 1 2 } 2double-array{ 10 20 } + [ { 2double-array 2double-array } declare v- ] compile-call +] unit-test + +[ 2double-array{ 10 40 } ] [ + 2double-array{ 1 2 } 2double-array{ 10 20 } + [ { 2double-array 2double-array } declare v* ] compile-call +] unit-test + +[ 2double-array{ 10 100 } ] [ + 2double-array{ 100 2000 } 2double-array{ 10 20 } + [ { 2double-array 2double-array } declare v/ ] compile-call +] unit-test + +[ 2double-array{ -10 -20 } ] [ + 2double-array{ -10 20 } 2double-array{ 10 -20 } + [ { 2double-array 2double-array } declare vmin ] compile-call +] unit-test + +[ 2double-array{ 10 20 } ] [ + 2double-array{ -10 20 } 2double-array{ 10 -20 } + [ { 2double-array 2double-array } declare vmax ] compile-call +] unit-test + +[ 3.0 ] [ + 2double-array{ 1 2 } + [ { 2double-array } declare sum ] compile-call +] unit-test + +[ 7.0 ] [ + 2double-array{ 1 2 } + [ { 2double-array } declare sum 4.0 + ] compile-call +] unit-test + +[ 16.0 ] [ + 2double-array{ 1 2 } 2double-array{ 2 7 } + [ { 2double-array 2double-array } declare v. ] compile-call +] unit-test + +[ 2double-array{ 5 10 } ] [ + 5.0 2double-array{ 1 2 } + [ { float 2double-array } declare n*v ] compile-call +] unit-test + +[ 2double-array{ 5 10 } ] [ + 2double-array{ 1 2 } 5.0 + [ { float 2double-array } declare v*n ] compile-call +] unit-test + +[ 2double-array{ 10 5 } ] [ + 10.0 2double-array{ 1 2 } + [ { float 2double-array } declare n/v ] compile-call +] unit-test + +[ 2double-array{ 0.5 1 } ] [ + 2double-array{ 1 2 } 2 + [ { float 2double-array } declare v/n ] compile-call +] unit-test + +[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test + +[ 2double-array{ 1 0 } ] [ + 2double-array{ 10 0 } + [ { 2double-array } declare normalize ] compile-call +] unit-test + +[ 5.0 ] [ + 2double-array{ 1 2 } + [ { 2double-array } declare norm-sq ] compile-call +] unit-test + +[ t ] [ + 2double-array{ 1 0 } + 2double-array{ 0 1 } + [ { 2double-array 2double-array } declare distance ] compile-call + 2 sqrt 1.0e-6 ~ +] unit-test + +[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test + +[ 4double-array{ 1 2 3 4 } ] [ + 1 2 3 4 4double-array-boa +] unit-test + +[ 4double-array{ 1 1 1 1 } ] [ + 1 4double-array-with +] unit-test + +[ 4double-array{ 0 1 2 3 } ] [ + 1 4double-array-with [ * ] map-index +] unit-test + +[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test + +[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test + +[ 4double-array{ 12 12 12 12 } ] [ + 12 [ 4double-array-with ] compile-call +] unit-test + +[ 4double-array{ 1 2 3 4 } ] [ + 1 2 3 4 [ 4double-array-boa ] compile-call +] unit-test + +[ 4double-array{ 11 22 33 44 } ] [ + 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 } + [ { 4double-array 4double-array } declare v+ ] compile-call +] unit-test + +[ 4double-array{ -9 -18 -27 -36 } ] [ + 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 } + [ { 4double-array 4double-array } declare v- ] compile-call +] unit-test + +[ 4double-array{ 10 40 90 160 } ] [ + 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 } + [ { 4double-array 4double-array } declare v* ] compile-call +] unit-test + +[ 4double-array{ 10 100 1000 10000 } ] [ + 4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 } + [ { 4double-array 4double-array } declare v/ ] compile-call +] unit-test + +[ 4double-array{ -10 -20 -30 -40 } ] [ + 4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 } + [ { 4double-array 4double-array } declare vmin ] compile-call +] unit-test + +[ 4double-array{ 10 20 30 40 } ] [ + 4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 } + [ { 4double-array 4double-array } declare vmax ] compile-call +] unit-test + +[ 10.0 ] [ + 4double-array{ 1 2 3 4 } + [ { 4double-array } declare sum ] compile-call +] unit-test + +[ 13.0 ] [ + 4double-array{ 1 2 3 4 } + [ { 4double-array } declare sum 3.0 + ] compile-call +] unit-test + +[ 8.0 ] [ + 4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 } + [ { 4double-array 4double-array } declare v. ] compile-call +] unit-test + +[ 4double-array{ 5 10 15 20 } ] [ + 5.0 4double-array{ 1 2 3 4 } + [ { float 4double-array } declare n*v ] compile-call +] unit-test + +[ 4double-array{ 5 10 15 20 } ] [ + 4double-array{ 1 2 3 4 } 5.0 + [ { float 4double-array } declare v*n ] compile-call +] unit-test + +[ 4double-array{ 10 5 2 5 } ] [ + 10.0 4double-array{ 1 2 5 2 } + [ { float 4double-array } declare n/v ] compile-call +] unit-test + +[ 4double-array{ 0.5 1 1.5 2 } ] [ + 4double-array{ 1 2 3 4 } 2 + [ { float 4double-array } declare v/n ] compile-call +] unit-test + +[ 4double-array{ 1 0 0 0 } ] [ + 4double-array{ 10 0 0 0 } + [ { 4double-array } declare normalize ] compile-call +] unit-test + +[ 30.0 ] [ + 4double-array{ 1 2 3 4 } + [ { 4double-array } declare norm-sq ] compile-call +] unit-test + +[ t ] [ + 4double-array{ 1 0 0 0 } + 4double-array{ 0 1 0 0 } + [ { 4double-array 4double-array } declare distance ] compile-call + 2 sqrt 1.0e-6 ~ +] unit-test + diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor new file mode 100644 index 0000000000..39ce72356b --- /dev/null +++ b/basis/math/vectors/simd/simd.factor @@ -0,0 +1,214 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien.c-types byte-arrays cpu.architecture +generalizations kernel math math.functions math.vectors +math.vectors.simd.functor math.vectors.specialization parser +prettyprint.custom sequences sequences.private +specialized-arrays.double locals assocs literals ; +IN: math.vectors.simd + + + +<< + +DEFER: 4float-array +DEFER: 2double-array + +"double" 2 define-simd-type +"float" 4 define-simd-type + +>> + +! Constructors +: 4float-array-with ( x -- simd-array ) + >float 4float-array-rep (simd-broadcast) 4float-array boa ; inline + +: 4float-array-boa ( a b c d -- simd-array ) + [ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline + +: 2double-array-with ( x -- simd-array ) + >float 2double-array-rep (simd-broadcast) 2double-array boa ; inline + +: 2double-array-boa ( a b -- simd-array ) + [ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline + +v-op ( v1 v2 quot -- v3 ) + [ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline + +: 4float-array-v->n-op ( v1 quot -- v2 ) + [ underlying>> 4float-array-rep ] dip call ; inline + +: 2double-array-vv->v-op ( v1 v2 quot -- v3 ) + [ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline + +: 2double-array-v->n-op ( v1 quot -- v2 ) + [ underlying>> 2double-array-rep ] dip call ; inline + +PRIVATE> + +<< + + + +\ 4float-array \ 4float-array-with float H{ + { v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] } + { v- [ [ (simd-v-) ] 4float-array-vv->v-op ] } + { v* [ [ (simd-v*) ] 4float-array-vv->v-op ] } + { v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] } + { vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] } + { vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] } + { sum [ [ (simd-sum) ] 4float-array-v->n-op ] } +} simd-vector-words + +\ 2double-array \ 2double-array-with float H{ + { v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] } + { v- [ [ (simd-v-) ] 2double-array-vv->v-op ] } + { v* [ [ (simd-v*) ] 2double-array-vv->v-op ] } + { v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] } + { vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] } + { vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] } + { sum [ [ (simd-sum) ] 2double-array-v->n-op ] } +} simd-vector-words + +>> + +! Synthesize 256-bit vectors from a pair of 128-bit vectors +! Functorize this later so that we can do it for integers, etc +TUPLE: 4double-array +{ underlying1 byte-array initial: $[ 16 ] read-only } +{ underlying2 byte-array initial: $[ 16 ] read-only } ; + +: <4double-array> ( -- simd-array ) + 16 16 4double-array boa ; inline + +: (4double-array) ( -- simd-array ) + 16 (byte-array) 16 (byte-array) 4double-array boa ; inline + +M: 4double-array clone + [ underlying1>> clone ] [ underlying2>> clone ] bi + 4double-array boa ; inline + +M: 4double-array length drop 4 ; inline + +> ] [ [ 2 - ] dip underlying2>> ] if + 2 swap double-array boa ; inline + +PRIVATE> + +M: 4double-array nth-unsafe + 4double-array-deref nth-unsafe ; inline + +M: 4double-array set-nth-unsafe + 4double-array-deref set-nth-unsafe ; inline + +: >4double-array ( seq -- simd-array ) + 4double-array new clone-like ; + +M: 4double-array like + drop dup 4double-array? [ >4double-array ] unless ; inline + +M: 4double-array new-sequence + drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline + +M: 4double-array equal? + over 4double-array? [ sequence= ] [ 2drop f ] if ; + +M: 4double-array byte-length drop 32 ; inline + +SYNTAX: 4double-array{ + \ } [ >4double-array ] parse-literal ; + +M: 4double-array pprint-delims + drop \ 4double-array{ \ } ; + +M: 4double-array >pprint-sequence ; + +M: 4double-array pprint* pprint-object ; + +INSTANCE: 4double-array sequence + +: 4double-array-with ( x -- simd-array ) + dup [ >float 2double-array-rep (simd-broadcast) ] bi@ + 4double-array boa ; inline + +: 4double-array-boa ( a b c d -- simd-array ) + [ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@ + 4double-array boa ; inline + +! SIMD operations on 4double-arrays + +v-op ( v1 v2 quot -- v3 ) + [ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ] + [ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi + 4double-array boa ; inline + +: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 ) + [ + [ [ underlying1>> 2double-array-rep ] dip call ] + [ [ underlying2>> 2double-array-rep ] dip call ] 2bi + ] dip call ; inline + +PRIVATE> + +<< + +\ 4double-array \ 4double-array-with float H{ + { v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] } + { v- [ [ (simd-v-) ] 4double-array-vv->v-op ] } + { v* [ [ (simd-v*) ] 4double-array-vv->v-op ] } + { v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] } + { vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] } + { vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] } + { sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] } +} simd-vector-words + +>> + +USE: vocabs.loader + +"math.vectors.simd.alien" require diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index c9db3e02b3..485cbaeca0 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: words kernel make sequences effects kernel.private accessors combinators math math.intervals math.vectors namespaces assocs fry -splitting classes.algebra generalizations +splitting classes.algebra generalizations locals compiler.tree.propagation.info ; IN: math.vectors.specialization @@ -67,6 +67,7 @@ H{ { vmin { +vector+ +vector+ -> +vector+ } } { vneg { +vector+ -> +vector+ } } { vtruncate { +vector+ -> +vector+ } } + { sum { +vector+ -> +scalar+ } } } SYMBOL: specializations @@ -82,19 +83,23 @@ specializations [ vector-words keys [ V{ } clone ] H{ } map>assoc ] initialize : outputs ( schema -- seq ) { -> } split second ; -: specialize-vector-word ( word array-type elt-type -- word' ) +: loop-vector-op ( word array-type elt-type -- word' ) pick word-schema [ inputs (specialize-vector-word) ] [ outputs record-output-signature ] 3bi ; -: input-signature ( word -- signature ) def>> first ; +:: specialize-vector-word ( word array-type elt-type simd -- word/quot' ) + word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ; -: specialize-vector-words ( array-type elt-type -- ) - [ vector-words keys ] 2dip - '[ - [ _ _ specialize-vector-word ] keep - [ dup input-signature ] dip - add-specialization +:: input-signature ( word array-type elt-type -- signature ) + array-type elt-type word word-schema inputs signature-for-schema ; + +:: specialize-vector-words ( array-type elt-type simd -- ) + vector-words keys [ + [ array-type elt-type simd specialize-vector-word ] + [ array-type elt-type input-signature ] + [ ] + tri add-specialization ] each ; : find-specialization ( classes word -- word/f )