Initial implementation of SSE vector intrinsics:
- cpu.architecture: add SSE vector representations - compiler.cfg.intrinsics.alien: remove an attempt at optimization that value numbering handles now - compiler.cfg.representations: support instructions where the representation is set in the 'rep' slot, and support conversions between single and double floats - alien-float, set-alien-float now use the single float representation, and the conversion is implicit; this fixes a long-standing bug where a register could get clobbered because of how %set-alien-float was defined on x86 - math.vectors.specialization: add support for SIMD specialization (where the vector word's body is replaced by another quotation), also specialize the 'sum' word - math.vectors.simd: 4float-array, 2double-array, 4double-array types, and specializers for the math.vectors wordsdb4
parent
c719b92363
commit
ff8c70dbe0
|
@ -91,6 +91,11 @@ INSN: ##set-string-nth-fast
|
|||
use: src/int-rep obj/int-rep index/int-rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
PURE-INSN: ##copy
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
! Integer arithmetic
|
||||
PURE-INSN: ##add
|
||||
def: dst/int-rep
|
||||
|
@ -201,6 +206,15 @@ use: src/int-rep
|
|||
temp: temp/int-rep ;
|
||||
|
||||
! Float arithmetic
|
||||
PURE-INSN: ##unbox-float
|
||||
def: dst/double-float-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
PURE-INSN: ##box-float
|
||||
def: dst/int-rep
|
||||
use: src/double-float-rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
PURE-INSN: ##add-float
|
||||
def: dst/double-float-rep
|
||||
use: src1/double-float-rep src2/double-float-rep ;
|
||||
|
@ -240,6 +254,15 @@ def: dst/double-float-rep
|
|||
use: src1/double-float-rep src2/double-float-rep
|
||||
literal: func ;
|
||||
|
||||
! Single/double float conversion
|
||||
PURE-INSN: ##single>double-float
|
||||
def: dst/double-float-rep
|
||||
use: src/single-float-rep ;
|
||||
|
||||
PURE-INSN: ##double>single-float
|
||||
def: dst/single-float-rep
|
||||
use: src/double-float-rep ;
|
||||
|
||||
! Float/integer conversion
|
||||
PURE-INSN: ##float>integer
|
||||
def: dst/int-rep
|
||||
|
@ -249,26 +272,80 @@ PURE-INSN: ##integer>float
|
|||
def: dst/double-float-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
! Boxing and unboxing
|
||||
PURE-INSN: ##copy
|
||||
! SIMD operations
|
||||
|
||||
INSN: ##box-vector
|
||||
def: dst/int-rep
|
||||
use: src
|
||||
literal: rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
INSN: ##unbox-vector
|
||||
def: dst
|
||||
use: src/int-rep
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##broadcast-vector
|
||||
def: dst
|
||||
use: src/scalar-rep
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##gather-vector-2
|
||||
def: dst
|
||||
use: src1/scalar-rep src2/scalar-rep
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##gather-vector-4
|
||||
def: dst
|
||||
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##add-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##sub-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##mul-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##div-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##min-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##max-vector
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##sqrt-vector
|
||||
def: dst
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##unbox-float
|
||||
def: dst/double-float-rep
|
||||
use: src/int-rep ;
|
||||
INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
literal: rep ;
|
||||
|
||||
! Boxing and unboxing aliens
|
||||
PURE-INSN: ##unbox-any-c-ptr
|
||||
def: dst/int-rep
|
||||
use: src/int-rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
PURE-INSN: ##box-float
|
||||
def: dst/int-rep
|
||||
use: src/double-float-rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
PURE-INSN: ##box-alien
|
||||
def: dst/int-rep
|
||||
use: src/int-rep
|
||||
|
@ -322,13 +399,18 @@ def: dst/int-rep
|
|||
use: src/int-rep ;
|
||||
|
||||
INSN: ##alien-float
|
||||
def: dst/double-float-rep
|
||||
def: dst/single-float-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
INSN: ##alien-double
|
||||
def: dst/double-float-rep
|
||||
use: src/int-rep ;
|
||||
|
||||
INSN: ##alien-vector
|
||||
def: dst
|
||||
use: src/int-rep
|
||||
literal: rep ;
|
||||
|
||||
INSN: ##set-alien-integer-1
|
||||
use: src/int-rep value/int-rep ;
|
||||
|
||||
|
@ -342,11 +424,15 @@ INSN: ##set-alien-cell
|
|||
use: src/int-rep value/int-rep ;
|
||||
|
||||
INSN: ##set-alien-float
|
||||
use: src/int-rep value/double-float-rep ;
|
||||
use: src/int-rep value/single-float-rep ;
|
||||
|
||||
INSN: ##set-alien-double
|
||||
use: src/int-rep value/double-float-rep ;
|
||||
|
||||
INSN: ##set-alien-vector
|
||||
use: src/int-rep value
|
||||
literal: rep ;
|
||||
|
||||
! Memory allocation
|
||||
INSN: ##allot
|
||||
def: dst/int-rep
|
||||
|
@ -510,6 +596,7 @@ literal: n ;
|
|||
UNION: ##allocation
|
||||
##allot
|
||||
##box-float
|
||||
##box-vector
|
||||
##box-alien
|
||||
##box-displaced-alien
|
||||
##integer>bignum ;
|
||||
|
|
|
@ -7,11 +7,20 @@ IN: compiler.cfg.instructions.syntax
|
|||
|
||||
SYMBOLS: def use temp literal constant ;
|
||||
|
||||
SYMBOL: scalar-rep
|
||||
|
||||
TUPLE: insn-slot-spec type name rep ;
|
||||
|
||||
: parse-rep ( str/f -- rep )
|
||||
{
|
||||
{ [ dup not ] [ ] }
|
||||
{ [ dup "scalar-rep" = ] [ drop scalar-rep ] }
|
||||
[ "cpu.architecture" lookup ]
|
||||
} cond ;
|
||||
|
||||
: parse-insn-slot-spec ( type string -- spec )
|
||||
over [ "Missing type" throw ] unless
|
||||
"/" split1 dup [ "cpu.architecture" lookup ] when
|
||||
"/" split1 parse-rep
|
||||
insn-slot-spec boa ;
|
||||
|
||||
: parse-insn-slot-specs ( seq -- specs )
|
||||
|
|
|
@ -20,22 +20,14 @@ IN: compiler.cfg.intrinsics.alien
|
|||
^^box-displaced-alien ds-push
|
||||
] [ emit-primitive ] if ;
|
||||
|
||||
: (prepare-alien-accessor-imm) ( class offset -- offset-vreg )
|
||||
ds-drop [ ds-pop swap ^^unbox-c-ptr ] dip ^^add-imm ;
|
||||
|
||||
: (prepare-alien-accessor) ( class -- offset-vreg )
|
||||
[ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
|
||||
|
||||
: prepare-alien-accessor ( infos -- offset-vreg )
|
||||
<reversed> [ second class>> ] [ first ] bi
|
||||
dup value-info-small-fixnum? [
|
||||
literal>> (prepare-alien-accessor-imm)
|
||||
] [ drop (prepare-alien-accessor) ] if ;
|
||||
<reversed> second class>>
|
||||
[ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
|
||||
|
||||
:: inline-alien ( node quot test -- )
|
||||
[let | infos [ node node-input-infos ] |
|
||||
infos test call
|
||||
[ infos prepare-alien-accessor quot call ]
|
||||
[ infos quot call ]
|
||||
[ node emit-primitive ]
|
||||
if
|
||||
] ; inline
|
||||
|
@ -46,7 +38,7 @@ IN: compiler.cfg.intrinsics.alien
|
|||
bi and ;
|
||||
|
||||
: inline-alien-getter ( node quot -- )
|
||||
'[ @ ds-push ]
|
||||
'[ prepare-alien-accessor @ ds-push ]
|
||||
[ inline-alien-getter? ] inline-alien ; inline
|
||||
|
||||
: inline-alien-setter? ( infos class -- ? )
|
||||
|
@ -56,18 +48,17 @@ IN: compiler.cfg.intrinsics.alien
|
|||
tri and and ;
|
||||
|
||||
: inline-alien-integer-setter ( node quot -- )
|
||||
'[ ds-pop ^^untag-fixnum @ ]
|
||||
'[ prepare-alien-accessor ds-pop ^^untag-fixnum @ ]
|
||||
[ fixnum inline-alien-setter? ]
|
||||
inline-alien ; inline
|
||||
|
||||
: inline-alien-cell-setter ( node quot -- )
|
||||
[ dup node-input-infos first class>> ] dip
|
||||
'[ ds-pop _ ^^unbox-c-ptr @ ]
|
||||
'[ [ prepare-alien-accessor ds-pop ] [ first class>> ] bi ^^unbox-c-ptr @ ]
|
||||
[ pinned-c-ptr inline-alien-setter? ]
|
||||
inline-alien ; inline
|
||||
|
||||
: inline-alien-float-setter ( node quot -- )
|
||||
'[ ds-pop @ ]
|
||||
'[ prepare-alien-accessor ds-pop @ ]
|
||||
[ float inline-alien-setter? ]
|
||||
inline-alien ; inline
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ compiler.cfg.intrinsics.alien
|
|||
compiler.cfg.intrinsics.allot
|
||||
compiler.cfg.intrinsics.fixnum
|
||||
compiler.cfg.intrinsics.float
|
||||
compiler.cfg.intrinsics.simd
|
||||
compiler.cfg.intrinsics.slots
|
||||
compiler.cfg.intrinsics.misc
|
||||
compiler.cfg.comparisons ;
|
||||
|
@ -22,6 +23,9 @@ QUALIFIED: classes.tuple.private
|
|||
QUALIFIED: math.private
|
||||
QUALIFIED: math.integers.private
|
||||
QUALIFIED: math.floats.private
|
||||
QUALIFIED: math.vectors.simd
|
||||
QUALIFIED: math.vectors.simd.private
|
||||
QUALIFIED: math.vectors.simd.alien
|
||||
QUALIFIED: math.libm
|
||||
IN: compiler.cfg.intrinsics
|
||||
|
||||
|
@ -142,5 +146,27 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-sse2-simd ( -- )
|
||||
{
|
||||
{ math.vectors.simd.private:assert-positive [ drop ] }
|
||||
{ math.vectors.simd.private:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.private:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.private:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
{ math.vectors.simd.alien:alien-vector [ emit-alien-vector ] }
|
||||
{ math.vectors.simd.alien:set-alien-vector [ emit-set-alien-vector ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-sse3-simd ( -- )
|
||||
{
|
||||
{ math.vectors.simd.private:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: emit-intrinsic ( node word -- )
|
||||
"intrinsic" word-prop call( node -- ) ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,57 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel
|
||||
sequences compiler.tree.propagation.info
|
||||
compiler.cfg.builder.blocks compiler.cfg.stacks
|
||||
compiler.cfg.stacks.local compiler.cfg.hats
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.intrinsics.alien ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
: emit-vector-op ( node quot: ( rep -- ) -- )
|
||||
[ dup node-input-infos last literal>> ] dip over representation?
|
||||
[ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
|
||||
|
||||
: emit-binary-vector-op ( node quot -- )
|
||||
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
|
||||
|
||||
: emit-unary-vector-op ( node quot -- )
|
||||
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
|
||||
|
||||
: emit-gather-vector-2 ( node -- )
|
||||
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
||||
|
||||
: emit-gather-vector-4 ( node -- )
|
||||
[
|
||||
ds-drop
|
||||
[
|
||||
D 3 peek-loc
|
||||
D 2 peek-loc
|
||||
D 1 peek-loc
|
||||
D 0 peek-loc
|
||||
-4 inc-d
|
||||
] dip
|
||||
^^gather-vector-4
|
||||
ds-push
|
||||
] emit-vector-op ;
|
||||
|
||||
: inline-alien-vector-setter ( node quot -- )
|
||||
'[ ds-drop prepare-alien-accessor ds-pop @ ]
|
||||
[ byte-array inline-alien-setter? ]
|
||||
inline-alien ; inline
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-alien-accessor
|
||||
_ ^^alien-vector ds-push
|
||||
]
|
||||
[ inline-alien-getter? ] inline-alien
|
||||
] with emit-vector-op ;
|
||||
|
||||
: emit-set-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
_ ##set-alien-vector
|
||||
] inline-alien-vector-setter
|
||||
] with emit-vector-op ;
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel accessors sequences arrays fry namespaces generic
|
||||
words sets cpu.architecture compiler.units
|
||||
words sets combinators generalizations cpu.architecture compiler.units
|
||||
compiler.cfg.utilities compiler.cfg compiler.cfg.rpo
|
||||
compiler.cfg.instructions compiler.cfg.instructions.syntax
|
||||
compiler.cfg.def-use ;
|
||||
|
@ -13,35 +13,41 @@ GENERIC: uses-vreg-reps ( insn -- reps )
|
|||
|
||||
<PRIVATE
|
||||
|
||||
: rep-getter-quot ( rep -- quot )
|
||||
{
|
||||
{ f [ [ rep>> ] ] }
|
||||
{ scalar-rep [ [ rep>> scalar-rep-of ] ] }
|
||||
[ '[ _ nip ] ]
|
||||
} case ;
|
||||
|
||||
: define-defs-vreg-rep-method ( insn -- )
|
||||
[ \ defs-vreg-rep create-method ]
|
||||
[ insn-def-slot dup [ rep>> ] when '[ drop _ ] ] bi
|
||||
define ;
|
||||
[ insn-def-slot [ rep>> rep-getter-quot ] [ [ drop f ] ] if* ]
|
||||
bi define ;
|
||||
|
||||
: reps-getter-quot ( reps -- quot )
|
||||
[ rep>> rep-getter-quot ] map dup length '[ _ cleave _ narray ] ;
|
||||
|
||||
: define-uses-vreg-reps-method ( insn -- )
|
||||
[ \ uses-vreg-reps create-method ]
|
||||
[ insn-use-slots [ rep>> ] map '[ drop _ ] ] bi
|
||||
define ;
|
||||
[ insn-use-slots reps-getter-quot ]
|
||||
bi define ;
|
||||
|
||||
: define-temp-vreg-reps-method ( insn -- )
|
||||
[ \ temp-vreg-reps create-method ]
|
||||
[ insn-temp-slots [ rep>> ] map '[ drop _ ] ] bi
|
||||
define ;
|
||||
[ insn-temp-slots reps-getter-quot ]
|
||||
bi define ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
[
|
||||
insn-classes get
|
||||
[ { ##copy } diff [ define-defs-vreg-rep-method ] each ]
|
||||
[ { ##copy ##phi } diff [ define-uses-vreg-reps-method ] each ]
|
||||
[ [ define-defs-vreg-rep-method ] each ]
|
||||
[ { ##phi } diff [ define-uses-vreg-reps-method ] each ]
|
||||
[ [ define-temp-vreg-reps-method ] each ]
|
||||
tri
|
||||
] with-compilation-unit
|
||||
|
||||
M: ##copy defs-vreg-rep rep>> ;
|
||||
|
||||
M: ##copy uses-vreg-reps rep>> 1array ;
|
||||
|
||||
: each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- )
|
||||
[ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ arrays combinators make locals deques dlists
|
|||
cpu.architecture compiler.utilities
|
||||
compiler.cfg
|
||||
compiler.cfg.rpo
|
||||
compiler.cfg.hats
|
||||
compiler.cfg.registers
|
||||
compiler.cfg.instructions
|
||||
compiler.cfg.def-use
|
||||
|
@ -16,13 +17,47 @@ IN: compiler.cfg.representations
|
|||
|
||||
! Virtual register representation selection.
|
||||
|
||||
ERROR: bad-conversion dst src dst-rep src-rep ;
|
||||
|
||||
GENERIC: emit-box ( dst src rep -- )
|
||||
GENERIC: emit-unbox ( dst src rep -- )
|
||||
|
||||
M: single-float-rep emit-box
|
||||
drop
|
||||
[ double-float-rep next-vreg-rep dup ] dip ##single>double-float
|
||||
int-rep next-vreg-rep ##box-float ;
|
||||
|
||||
M: single-float-rep emit-unbox
|
||||
drop
|
||||
[ double-float-rep next-vreg-rep dup ] dip ##unbox-float
|
||||
##double>single-float ;
|
||||
|
||||
M: double-float-rep emit-box
|
||||
drop
|
||||
int-rep next-vreg-rep ##box-float ;
|
||||
|
||||
M: double-float-rep emit-unbox
|
||||
drop ##unbox-float ;
|
||||
|
||||
M: vector-rep emit-box
|
||||
int-rep next-vreg-rep ##box-vector ;
|
||||
|
||||
M: vector-rep emit-unbox
|
||||
##unbox-vector ;
|
||||
|
||||
: emit-conversion ( dst src dst-rep src-rep -- )
|
||||
2array {
|
||||
{ { int-rep int-rep } [ int-rep ##copy ] }
|
||||
{ { double-float-rep double-float-rep } [ double-float-rep ##copy ] }
|
||||
{ { double-float-rep int-rep } [ ##unbox-float ] }
|
||||
{ { int-rep double-float-rep } [ int-rep next-vreg-rep ##box-float ] }
|
||||
} case ;
|
||||
{
|
||||
{ [ 2dup eq? ] [ drop ##copy ] }
|
||||
{ [ dup int-rep eq? ] [ drop emit-unbox ] }
|
||||
{ [ over int-rep eq? ] [ nip emit-box ] }
|
||||
[
|
||||
2array {
|
||||
{ { double-float-rep single-float-rep } [ ##single>double-float ] }
|
||||
{ { single-float-rep double-float-rep } [ ##double>single-float ] }
|
||||
[ first2 bad-conversion ]
|
||||
} case
|
||||
]
|
||||
} cond ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
|
|
@ -45,7 +45,13 @@ UNION: two-operand-insn
|
|||
##mul-float
|
||||
##div-float
|
||||
##min-float
|
||||
##max-float ;
|
||||
##max-float
|
||||
##add-vector
|
||||
##sub-vector
|
||||
##mul-vector
|
||||
##div-vector
|
||||
##min-vector
|
||||
##max-vector ;
|
||||
|
||||
GENERIC: convert-two-operand* ( insn -- )
|
||||
|
||||
|
|
|
@ -144,8 +144,11 @@ CODEGEN: ##min %min
|
|||
CODEGEN: ##max %max
|
||||
CODEGEN: ##not %not
|
||||
CODEGEN: ##log2 %log2
|
||||
CODEGEN: ##copy %copy
|
||||
CODEGEN: ##integer>bignum %integer>bignum
|
||||
CODEGEN: ##bignum>integer %bignum>integer
|
||||
CODEGEN: ##unbox-float %unbox-float
|
||||
CODEGEN: ##box-float %box-float
|
||||
CODEGEN: ##add-float %add-float
|
||||
CODEGEN: ##sub-float %sub-float
|
||||
CODEGEN: ##mul-float %mul-float
|
||||
|
@ -155,12 +158,24 @@ CODEGEN: ##max-float %max-float
|
|||
CODEGEN: ##sqrt %sqrt
|
||||
CODEGEN: ##unary-float-function %unary-float-function
|
||||
CODEGEN: ##binary-float-function %binary-float-function
|
||||
CODEGEN: ##single>double-float %single>double-float
|
||||
CODEGEN: ##double>single-float %double>single-float
|
||||
CODEGEN: ##integer>float %integer>float
|
||||
CODEGEN: ##float>integer %float>integer
|
||||
CODEGEN: ##copy %copy
|
||||
CODEGEN: ##unbox-float %unbox-float
|
||||
CODEGEN: ##unbox-vector %unbox-vector
|
||||
CODEGEN: ##broadcast-vector %broadcast-vector
|
||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##box-vector %box-vector
|
||||
CODEGEN: ##add-vector %add-vector
|
||||
CODEGEN: ##sub-vector %sub-vector
|
||||
CODEGEN: ##mul-vector %mul-vector
|
||||
CODEGEN: ##div-vector %div-vector
|
||||
CODEGEN: ##min-vector %min-vector
|
||||
CODEGEN: ##max-vector %max-vector
|
||||
CODEGEN: ##sqrt-vector %sqrt-vector
|
||||
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
|
||||
CODEGEN: ##unbox-any-c-ptr %unbox-any-c-ptr
|
||||
CODEGEN: ##box-float %box-float
|
||||
CODEGEN: ##box-alien %box-alien
|
||||
CODEGEN: ##box-displaced-alien %box-displaced-alien
|
||||
CODEGEN: ##alien-unsigned-1 %alien-unsigned-1
|
||||
|
@ -172,12 +187,14 @@ CODEGEN: ##alien-signed-4 %alien-signed-4
|
|||
CODEGEN: ##alien-cell %alien-cell
|
||||
CODEGEN: ##alien-float %alien-float
|
||||
CODEGEN: ##alien-double %alien-double
|
||||
CODEGEN: ##alien-vector %alien-vector
|
||||
CODEGEN: ##set-alien-integer-1 %set-alien-integer-1
|
||||
CODEGEN: ##set-alien-integer-2 %set-alien-integer-2
|
||||
CODEGEN: ##set-alien-integer-4 %set-alien-integer-4
|
||||
CODEGEN: ##set-alien-cell %set-alien-cell
|
||||
CODEGEN: ##set-alien-float %set-alien-float
|
||||
CODEGEN: ##set-alien-double %set-alien-double
|
||||
CODEGEN: ##set-alien-vector %set-alien-vector
|
||||
CODEGEN: ##allot %allot
|
||||
CODEGEN: ##write-barrier %write-barrier
|
||||
CODEGEN: ##compare %compare
|
||||
|
|
|
@ -412,4 +412,6 @@ cell 4 = [
|
|||
[ 1 "0.169967142900241" ] [ 1.4 [ 1 swap fcos ] compile-call number>string ] unit-test
|
||||
[ 1 "0.169967142900241" ] [ 1.4 1 [ swap fcos ] compile-call number>string ] unit-test
|
||||
[ "0.169967142900241" "0.9854497299884601" ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
|
||||
[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
|
||||
[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
|
||||
|
||||
[ 6.0 ] [ 1.0 [ >float 3.0 + [ B{ 0 0 0 0 } 0 set-alien-float ] [ 2.0 + ] bi ] compile-call ] unit-test
|
|
@ -0,0 +1,43 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators
|
||||
compiler.tree.propagation.info cpu.architecture kernel words math
|
||||
math.intervals math.vectors.simd math.vectors.simd.private
|
||||
math.vectors.simd.alien ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-sum) [
|
||||
nip dup literal?>> [
|
||||
literal>> scalar-rep-of {
|
||||
{ single-float-rep [ float ] }
|
||||
{ double-float-rep [ float ] }
|
||||
} case
|
||||
] [ drop real ] if
|
||||
<class-info>
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
|
||||
|
||||
\ assert-positive [
|
||||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ alien-vector { byte-array } "default-output-classes" set-word-prop
|
|
@ -20,7 +20,33 @@ SINGLETONS: tagged-rep int-rep ;
|
|||
! one of these representations
|
||||
SINGLETONS: single-float-rep double-float-rep ;
|
||||
|
||||
UNION: representation any-rep tagged-rep int-rep single-float-rep double-float-rep ;
|
||||
SINGLETONS:
|
||||
4float-array-rep
|
||||
2double-array-rep
|
||||
16char-array-rep
|
||||
16uchar-array-rep
|
||||
8short-array-rep
|
||||
8ushort-array-rep
|
||||
4int-array-rep
|
||||
4uint-array-rep ;
|
||||
|
||||
UNION: vector-rep
|
||||
4float-array-rep
|
||||
2double-array-rep
|
||||
16char-array-rep
|
||||
16uchar-array-rep
|
||||
8short-array-rep
|
||||
8ushort-array-rep
|
||||
4int-array-rep
|
||||
4uint-array-rep ;
|
||||
|
||||
UNION: representation
|
||||
any-rep
|
||||
tagged-rep
|
||||
int-rep
|
||||
single-float-rep
|
||||
double-float-rep
|
||||
vector-rep ;
|
||||
|
||||
! Register classes
|
||||
SINGLETONS: int-regs float-regs ;
|
||||
|
@ -31,23 +57,28 @@ CONSTANT: reg-classes { int-regs float-regs }
|
|||
! A pseudo-register class for parameters spilled on the stack
|
||||
SINGLETON: stack-params
|
||||
|
||||
: reg-class-of ( rep -- reg-class )
|
||||
{
|
||||
{ tagged-rep [ int-regs ] }
|
||||
{ int-rep [ int-regs ] }
|
||||
{ single-float-rep [ float-regs ] }
|
||||
{ double-float-rep [ float-regs ] }
|
||||
{ stack-params [ stack-params ] }
|
||||
} case ;
|
||||
GENERIC: reg-class-of ( rep -- reg-class )
|
||||
|
||||
: rep-size ( rep -- n )
|
||||
{
|
||||
{ tagged-rep [ cell ] }
|
||||
{ int-rep [ cell ] }
|
||||
{ single-float-rep [ 4 ] }
|
||||
{ double-float-rep [ 8 ] }
|
||||
{ stack-params [ cell ] }
|
||||
} case ;
|
||||
M: tagged-rep reg-class-of drop int-regs ;
|
||||
M: int-rep reg-class-of drop int-regs ;
|
||||
M: single-float-rep reg-class-of drop float-regs ;
|
||||
M: double-float-rep reg-class-of drop float-regs ;
|
||||
M: vector-rep reg-class-of drop float-regs ;
|
||||
M: stack-params reg-class-of drop stack-params ;
|
||||
|
||||
GENERIC: rep-size ( rep -- n )
|
||||
|
||||
M: tagged-rep rep-size drop cell ;
|
||||
M: int-rep rep-size drop cell ;
|
||||
M: single-float-rep rep-size drop 4 ;
|
||||
M: double-float-rep rep-size drop 8 ;
|
||||
M: stack-params rep-size drop cell ;
|
||||
M: vector-rep rep-size drop 16 ;
|
||||
|
||||
GENERIC: scalar-rep-of ( rep -- rep' )
|
||||
|
||||
M: 4float-array-rep scalar-rep-of drop single-float-rep ;
|
||||
M: 2double-array-rep scalar-rep-of drop double-float-rep ;
|
||||
|
||||
! Mapping from register class to machine registers
|
||||
HOOK: machine-registers cpu ( -- assoc )
|
||||
|
@ -101,6 +132,8 @@ HOOK: %max cpu ( dst src1 src2 -- )
|
|||
HOOK: %not cpu ( dst src -- )
|
||||
HOOK: %log2 cpu ( dst src -- )
|
||||
|
||||
HOOK: %copy cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %fixnum-add cpu ( label dst src1 src2 -- )
|
||||
HOOK: %fixnum-sub cpu ( label dst src1 src2 -- )
|
||||
HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
|
||||
|
@ -108,6 +141,9 @@ HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
|
|||
HOOK: %integer>bignum cpu ( dst src temp -- )
|
||||
HOOK: %bignum>integer cpu ( dst src temp -- )
|
||||
|
||||
HOOK: %unbox-float cpu ( dst src -- )
|
||||
HOOK: %box-float cpu ( dst src temp -- )
|
||||
|
||||
HOOK: %add-float cpu ( dst src1 src2 -- )
|
||||
HOOK: %sub-float cpu ( dst src1 src2 -- )
|
||||
HOOK: %mul-float cpu ( dst src1 src2 -- )
|
||||
|
@ -118,13 +154,29 @@ HOOK: %sqrt cpu ( dst src -- )
|
|||
HOOK: %unary-float-function cpu ( dst src func -- )
|
||||
HOOK: %binary-float-function cpu ( dst src1 src2 func -- )
|
||||
|
||||
HOOK: %single>double-float cpu ( dst src -- )
|
||||
HOOK: %double>single-float cpu ( dst src -- )
|
||||
|
||||
HOOK: %integer>float cpu ( dst src -- )
|
||||
HOOK: %float>integer cpu ( dst src -- )
|
||||
|
||||
HOOK: %copy cpu ( dst src rep -- )
|
||||
HOOK: %unbox-float cpu ( dst src -- )
|
||||
HOOK: %box-vector cpu ( dst src temp rep -- )
|
||||
HOOK: %unbox-vector cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
|
||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
|
||||
HOOK: %box-float cpu ( dst src temp -- )
|
||||
HOOK: %box-alien cpu ( dst src temp -- )
|
||||
HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- )
|
||||
|
||||
|
@ -137,6 +189,7 @@ HOOK: %alien-signed-4 cpu ( dst src -- )
|
|||
HOOK: %alien-cell cpu ( dst src -- )
|
||||
HOOK: %alien-float cpu ( dst src -- )
|
||||
HOOK: %alien-double cpu ( dst src -- )
|
||||
HOOK: %alien-vector cpu ( dst src rep -- )
|
||||
|
||||
HOOK: %set-alien-integer-1 cpu ( ptr value -- )
|
||||
HOOK: %set-alien-integer-2 cpu ( ptr value -- )
|
||||
|
@ -144,6 +197,7 @@ HOOK: %set-alien-integer-4 cpu ( ptr value -- )
|
|||
HOOK: %set-alien-cell cpu ( ptr value -- )
|
||||
HOOK: %set-alien-float cpu ( ptr value -- )
|
||||
HOOK: %set-alien-double cpu ( ptr value -- )
|
||||
HOOK: %set-alien-vector cpu ( ptr value rep -- )
|
||||
|
||||
HOOK: %alien-global cpu ( dst symbol library -- )
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ USING: accessors assocs alien alien.c-types arrays strings
|
|||
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
|
||||
cpu.architecture kernel kernel.private math memory namespaces make
|
||||
sequences words system layouts combinators math.order fry locals
|
||||
compiler.constants
|
||||
compiler.constants byte-arrays
|
||||
compiler.cfg.registers
|
||||
compiler.cfg.instructions
|
||||
compiler.cfg.intrinsics
|
||||
|
@ -130,6 +130,21 @@ M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
|
|||
M: x86 %not drop NOT ;
|
||||
M: x86 %log2 BSR ;
|
||||
|
||||
GENERIC: copy-register* ( dst src rep -- )
|
||||
|
||||
M: int-rep copy-register* drop MOV ;
|
||||
M: tagged-rep copy-register* drop MOV ;
|
||||
M: single-float-rep copy-register* drop MOVSS ;
|
||||
M: double-float-rep copy-register* drop MOVSD ;
|
||||
M: 4float-array-rep copy-register* drop MOVUPS ;
|
||||
M: 2double-array-rep copy-register* drop MOVUPD ;
|
||||
M: vector-rep copy-register* drop MOVDQU ;
|
||||
|
||||
: copy-register ( dst src rep -- )
|
||||
2over eq? [ 3drop ] [ copy-register* ] if ;
|
||||
|
||||
M: x86 %copy ( dst src rep -- ) copy-register ;
|
||||
|
||||
:: overflow-template ( label dst src1 src2 insn -- )
|
||||
src1 src2 insn call
|
||||
label JO ; inline
|
||||
|
@ -211,24 +226,120 @@ M: x86 %min-float nip MINSD ;
|
|||
M: x86 %max-float nip MAXSD ;
|
||||
M: x86 %sqrt SQRTSD ;
|
||||
|
||||
M: x86 %single>double-float CVTSS2SD ;
|
||||
M: x86 %double>single-float CVTSD2SS ;
|
||||
|
||||
M: x86 %integer>float CVTSI2SD ;
|
||||
M: x86 %float>integer CVTTSD2SI ;
|
||||
|
||||
GENERIC: copy-register* ( dst src rep -- )
|
||||
|
||||
M: int-rep copy-register* drop MOV ;
|
||||
M: tagged-rep copy-register* drop MOV ;
|
||||
M: single-float-rep copy-register* drop MOVSS ;
|
||||
M: double-float-rep copy-register* drop MOVSD ;
|
||||
|
||||
: copy-register ( dst src rep -- )
|
||||
2over eq? [ 3drop ] [ copy-register* ] if ;
|
||||
|
||||
M: x86 %copy ( dst src rep -- ) copy-register ;
|
||||
|
||||
M: x86 %unbox-float ( dst src -- )
|
||||
float-offset [+] MOVSD ;
|
||||
|
||||
M:: x86 %box-float ( dst src temp -- )
|
||||
dst 16 float temp %allot
|
||||
dst float-offset [+] src MOVSD ;
|
||||
|
||||
M:: x86 %box-vector ( dst src rep temp -- )
|
||||
dst rep rep-size 2 cells + byte-array temp %allot
|
||||
16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
|
||||
dst byte-array-offset [+]
|
||||
src rep copy-register ;
|
||||
|
||||
M:: x86 %unbox-vector ( dst src rep -- )
|
||||
dst src byte-array-offset [+]
|
||||
rep copy-register ;
|
||||
|
||||
M: x86 %broadcast-vector ( dst src rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
|
||||
{ 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||
rep {
|
||||
{
|
||||
4float-array-rep
|
||||
[
|
||||
dst src1 MOVSS
|
||||
dst src2 UNPCKLPS
|
||||
src3 src4 UNPCKLPS
|
||||
dst src3 HEX: 44 SHUFPS
|
||||
]
|
||||
}
|
||||
} case ;
|
||||
|
||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||
rep {
|
||||
{
|
||||
2double-array-rep
|
||||
[
|
||||
dst src1 MOVAPD
|
||||
dst src2 0 SHUFPD
|
||||
]
|
||||
}
|
||||
} case ;
|
||||
|
||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ ADDPS ] }
|
||||
{ 2double-array-rep [ ADDPD ] }
|
||||
{ 16char-array-rep [ PADDB ] }
|
||||
{ 16uchar-array-rep [ PADDB ] }
|
||||
{ 8short-array-rep [ PADDW ] }
|
||||
{ 8ushort-array-rep [ PADDW ] }
|
||||
{ 4int-array-rep [ PADDD ] }
|
||||
{ 4uint-array-rep [ PADDD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ SUBPS ] }
|
||||
{ 2double-array-rep [ SUBPD ] }
|
||||
{ 16char-array-rep [ PSUBB ] }
|
||||
{ 16uchar-array-rep [ PSUBB ] }
|
||||
{ 8short-array-rep [ PSUBW ] }
|
||||
{ 8ushort-array-rep [ PSUBW ] }
|
||||
{ 4int-array-rep [ PSUBD ] }
|
||||
{ 4uint-array-rep [ PSUBD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ MULPS ] }
|
||||
{ 2double-array-rep [ MULPD ] }
|
||||
{ 4int-array-rep [ PMULLW ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ DIVPS ] }
|
||||
{ 2double-array-rep [ DIVPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ MINPS ] }
|
||||
{ 2double-array-rep [ MINPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ MAXPS ] }
|
||||
{ 2double-array-rep [ MAXPD ] }
|
||||
} case drop ;
|
||||
|
||||
M: x86 %sqrt-vector ( dst src rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ SQRTPS ] }
|
||||
{ 2double-array-rep [ SQRTPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
{
|
||||
{ 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %unbox-any-c-ptr ( dst src temp -- )
|
||||
[
|
||||
{ "is-byte-array" "end" "start" } [ define-label ] each
|
||||
|
@ -255,10 +366,6 @@ M:: x86 %unbox-any-c-ptr ( dst src temp -- )
|
|||
"end" resolve-label
|
||||
] with-scope ;
|
||||
|
||||
M:: x86 %box-float ( dst src temp -- )
|
||||
dst 16 float temp %allot
|
||||
dst float-offset [+] src MOVSD ;
|
||||
|
||||
: alien@ ( reg n -- op ) cells alien tag-number - [+] ;
|
||||
|
||||
:: %allot-alien ( dst displacement base temp -- )
|
||||
|
@ -405,8 +512,9 @@ M: x86 %alien-signed-2 16 %alien-signed-getter ;
|
|||
M: x86 %alien-signed-4 32 %alien-signed-getter ;
|
||||
|
||||
M: x86 %alien-cell [] MOV ;
|
||||
M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ;
|
||||
M: x86 %alien-float [] MOVSS ;
|
||||
M: x86 %alien-double [] MOVSD ;
|
||||
M: x86 %alien-vector [ [] ] dip copy-register ;
|
||||
|
||||
:: %alien-integer-setter ( ptr value size -- )
|
||||
value { ptr } size [| new-value |
|
||||
|
@ -418,8 +526,9 @@ M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
|
|||
M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
|
||||
M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
|
||||
M: x86 %set-alien-cell [ [] ] dip MOV ;
|
||||
M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ;
|
||||
M: x86 %set-alien-float [ [] ] dip MOVSS ;
|
||||
M: x86 %set-alien-double [ [] ] dip MOVSD ;
|
||||
M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
|
||||
|
||||
: shift-count? ( reg -- ? ) { ECX RCX } memq? ;
|
||||
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
IN: math.vectors.simd.alien.tests
|
||||
USING: cpu.architecture math.vectors.simd accessors
|
||||
math.vectors.simd.alien kernel classes.struct tools.test
|
||||
compiler sequences byte-arrays alien math kernel.private
|
||||
specialized-arrays.float ;
|
||||
|
||||
! Vector alien intrinsics
|
||||
[ 4float-array{ 1 2 3 4 } ] [
|
||||
[
|
||||
4float-array{ 1 2 3 4 }
|
||||
underlying>> 0 4float-array-rep alien-vector
|
||||
] compile-call 4float-array boa
|
||||
] unit-test
|
||||
|
||||
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
|
||||
16 [ 1 ] B{ } replicate-as 16 <byte-array>
|
||||
[
|
||||
0 [
|
||||
{ byte-array c-ptr fixnum } declare
|
||||
4float-array-rep set-alien-vector
|
||||
] compile-call
|
||||
] keep
|
||||
] unit-test
|
||||
|
||||
[ float-array{ 1 2 3 4 } ] [
|
||||
[
|
||||
float-array{ 1 2 3 4 } underlying>>
|
||||
float-array{ 4 3 2 1 } clone
|
||||
[ underlying>> 0 4float-array-rep set-alien-vector ] keep
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
||||
STRUCT: simd-struct
|
||||
{ x 4float-array }
|
||||
{ y 2double-array }
|
||||
{ z 4double-array } ;
|
||||
|
||||
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
|
||||
|
||||
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
|
||||
simd-struct <struct>
|
||||
4float-array{ 1 2 3 4 } >>x
|
||||
2double-array{ 2 1 } >>y
|
||||
4double-array{ 4 3 2 1 } >>z
|
||||
[ x>> ] [ y>> ] [ z>> ] tri
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
|
||||
[
|
||||
simd-struct <struct>
|
||||
4float-array{ 1 2 3 4 } >>x
|
||||
2double-array{ 2 1 } >>y
|
||||
4double-array{ 4 3 2 1 } >>z
|
||||
[ x>> ] [ y>> ] [ z>> ] tri
|
||||
] compile-call
|
||||
] unit-test
|
|
@ -0,0 +1,51 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien accessors alien.c-types byte-arrays compiler.units
|
||||
cpu.architecture libc locals kernel math math.vectors.simd
|
||||
math.vectors.simd.private ;
|
||||
IN: math.vectors.simd.alien
|
||||
|
||||
: alien-vector ( c-ptr n rep -- value )
|
||||
! Inefficient version for when intrinsics are missing
|
||||
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
|
||||
|
||||
: set-alien-vector ( value c-ptr n rep -- )
|
||||
! Inefficient version for when intrinsics are missing
|
||||
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
||||
|
||||
:: define-simd-type ( class rep -- )
|
||||
<c-type>
|
||||
byte-array >>class
|
||||
class >>boxed-class
|
||||
[ rep alien-vector ] >>getter
|
||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||
16 >>size
|
||||
8 >>align
|
||||
rep >>rep
|
||||
[ class boa ] >>boxer-quot
|
||||
[ underlying>> ] >>unboxer-quot
|
||||
class name>> typedef ;
|
||||
|
||||
: define-4double-array-type ( -- )
|
||||
<c-type>
|
||||
4double-array >>class
|
||||
4double-array >>boxed-class
|
||||
[
|
||||
[ 2double-array-rep alien-vector ]
|
||||
[ 16 + >fixnum 2double-array-rep alien-vector ] 2bi
|
||||
4double-array boa
|
||||
] >>getter
|
||||
[
|
||||
[ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ]
|
||||
[ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ]
|
||||
3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
8 >>align
|
||||
2double-array-rep >>rep
|
||||
"4double-array" typedef ;
|
||||
[
|
||||
4float-array 4float-array-rep define-simd-type
|
||||
2double-array 2double-array-rep define-simd-type
|
||||
define-4double-array-type
|
||||
] with-compilation-unit
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,61 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types byte-arrays classes functors
|
||||
kernel math parser prettyprint.custom sequences
|
||||
sequences.private ;
|
||||
IN: math.vectors.simd.functor
|
||||
|
||||
ERROR: bad-length got expected ;
|
||||
|
||||
FUNCTOR: define-simd-type ( T N -- )
|
||||
|
||||
A DEFINES-CLASS ${N}${T}-array
|
||||
<A> DEFINES <${A}>
|
||||
(A) DEFINES (${A})
|
||||
>A DEFINES >${A}
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
NTH [ T dup c-type-getter-boxer array-accessor ]
|
||||
SET-NTH [ T dup c-setter array-accessor ]
|
||||
|
||||
BYTES [ T heap-size N * ]
|
||||
INITIAL [ BYTES <byte-array> ]
|
||||
|
||||
WHERE
|
||||
|
||||
TUPLE: A
|
||||
{ underlying byte-array read-only initial: INITIAL } ;
|
||||
|
||||
: <A> ( -- simd-array ) BYTES <byte-array> A boa ; inline
|
||||
|
||||
: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline
|
||||
|
||||
M: A clone underlying>> clone \ A boa ; inline
|
||||
|
||||
M: A length drop N ; inline
|
||||
|
||||
M: A nth-unsafe underlying>> NTH call ; inline
|
||||
|
||||
M: A set-nth-unsafe underlying>> SET-NTH call ; inline
|
||||
|
||||
: >A ( seq -- simd-array ) \ A new clone-like ;
|
||||
|
||||
M: A like drop dup \ A instance? [ >A ] unless ; inline
|
||||
|
||||
M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline
|
||||
|
||||
M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
|
||||
|
||||
M: A byte-length underlying>> length ; inline
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
;FUNCTOR
|
|
@ -0,0 +1,301 @@
|
|||
IN: math.vectors.simd.tests
|
||||
USING: math math.vectors.simd math.vectors.simd.private
|
||||
math.vectors math.functions kernel.private compiler sequences
|
||||
tools.test compiler.tree.debugger accessors kernel ;
|
||||
|
||||
[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test
|
||||
|
||||
[ 4float-array{ 12 12 12 12 } ] [
|
||||
12 [ 4float-array-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 1 2 3 4 } ] [
|
||||
1 2 3 4 [ 4float-array-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 11 22 33 44 } ] [
|
||||
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
|
||||
[ { 4float-array 4float-array } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ -9 -18 -27 -36 } ] [
|
||||
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
|
||||
[ { 4float-array 4float-array } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 10 40 90 160 } ] [
|
||||
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
|
||||
[ { 4float-array 4float-array } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 10 100 1000 10000 } ] [
|
||||
4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 }
|
||||
[ { 4float-array 4float-array } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ -10 -20 -30 -40 } ] [
|
||||
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
|
||||
[ { 4float-array 4float-array } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 10 20 30 40 } ] [
|
||||
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
|
||||
[ { 4float-array 4float-array } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 10.0 ] [
|
||||
4float-array{ 1 2 3 4 }
|
||||
[ { 4float-array } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 13.0 ] [
|
||||
4float-array{ 1 2 3 4 }
|
||||
[ { 4float-array } declare sum 3.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 8.0 ] [
|
||||
4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 }
|
||||
[ { 4float-array 4float-array } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 5 10 15 20 } ] [
|
||||
5.0 4float-array{ 1 2 3 4 }
|
||||
[ { float 4float-array } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 5 10 15 20 } ] [
|
||||
4float-array{ 1 2 3 4 } 5.0
|
||||
[ { float 4float-array } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 10 5 2 5 } ] [
|
||||
10.0 4float-array{ 1 2 5 2 }
|
||||
[ { float 4float-array } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 0.5 1 1.5 2 } ] [
|
||||
4float-array{ 1 2 3 4 } 2
|
||||
[ { float 4float-array } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4float-array{ 1 0 0 0 } ] [
|
||||
4float-array{ 10 0 0 0 }
|
||||
[ { 4float-array } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 30.0 ] [
|
||||
4float-array{ 1 2 3 4 }
|
||||
[ { 4float-array } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
4float-array{ 1 0 0 0 }
|
||||
4float-array{ 0 1 0 0 }
|
||||
[ { 4float-array 4float-array } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 12 12 } ] [
|
||||
12 [ 2double-array-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 1 2 } ] [
|
||||
1 2 [ 2double-array-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 11 22 } ] [
|
||||
2double-array{ 1 2 } 2double-array{ 10 20 }
|
||||
[ { 2double-array 2double-array } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ -9 -18 } ] [
|
||||
2double-array{ 1 2 } 2double-array{ 10 20 }
|
||||
[ { 2double-array 2double-array } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 10 40 } ] [
|
||||
2double-array{ 1 2 } 2double-array{ 10 20 }
|
||||
[ { 2double-array 2double-array } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 10 100 } ] [
|
||||
2double-array{ 100 2000 } 2double-array{ 10 20 }
|
||||
[ { 2double-array 2double-array } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ -10 -20 } ] [
|
||||
2double-array{ -10 20 } 2double-array{ 10 -20 }
|
||||
[ { 2double-array 2double-array } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 10 20 } ] [
|
||||
2double-array{ -10 20 } 2double-array{ 10 -20 }
|
||||
[ { 2double-array 2double-array } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 3.0 ] [
|
||||
2double-array{ 1 2 }
|
||||
[ { 2double-array } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 7.0 ] [
|
||||
2double-array{ 1 2 }
|
||||
[ { 2double-array } declare sum 4.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 16.0 ] [
|
||||
2double-array{ 1 2 } 2double-array{ 2 7 }
|
||||
[ { 2double-array 2double-array } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 5 10 } ] [
|
||||
5.0 2double-array{ 1 2 }
|
||||
[ { float 2double-array } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 5 10 } ] [
|
||||
2double-array{ 1 2 } 5.0
|
||||
[ { float 2double-array } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 10 5 } ] [
|
||||
10.0 2double-array{ 1 2 }
|
||||
[ { float 2double-array } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 0.5 1 } ] [
|
||||
2double-array{ 1 2 } 2
|
||||
[ { float 2double-array } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test
|
||||
|
||||
[ 2double-array{ 1 0 } ] [
|
||||
2double-array{ 10 0 }
|
||||
[ { 2double-array } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 5.0 ] [
|
||||
2double-array{ 1 2 }
|
||||
[ { 2double-array } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
2double-array{ 1 0 }
|
||||
2double-array{ 0 1 }
|
||||
[ { 2double-array 2double-array } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test
|
||||
|
||||
[ 4double-array{ 1 2 3 4 } ] [
|
||||
1 2 3 4 4double-array-boa
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 1 1 1 1 } ] [
|
||||
1 4double-array-with
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 0 1 2 3 } ] [
|
||||
1 4double-array-with [ * ] map-index
|
||||
] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test
|
||||
|
||||
[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test
|
||||
|
||||
[ 4double-array{ 12 12 12 12 } ] [
|
||||
12 [ 4double-array-with ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 1 2 3 4 } ] [
|
||||
1 2 3 4 [ 4double-array-boa ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 11 22 33 44 } ] [
|
||||
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
|
||||
[ { 4double-array 4double-array } declare v+ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ -9 -18 -27 -36 } ] [
|
||||
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
|
||||
[ { 4double-array 4double-array } declare v- ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 10 40 90 160 } ] [
|
||||
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
|
||||
[ { 4double-array 4double-array } declare v* ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 10 100 1000 10000 } ] [
|
||||
4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 }
|
||||
[ { 4double-array 4double-array } declare v/ ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ -10 -20 -30 -40 } ] [
|
||||
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
|
||||
[ { 4double-array 4double-array } declare vmin ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 10 20 30 40 } ] [
|
||||
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
|
||||
[ { 4double-array 4double-array } declare vmax ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 10.0 ] [
|
||||
4double-array{ 1 2 3 4 }
|
||||
[ { 4double-array } declare sum ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 13.0 ] [
|
||||
4double-array{ 1 2 3 4 }
|
||||
[ { 4double-array } declare sum 3.0 + ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 8.0 ] [
|
||||
4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 }
|
||||
[ { 4double-array 4double-array } declare v. ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 5 10 15 20 } ] [
|
||||
5.0 4double-array{ 1 2 3 4 }
|
||||
[ { float 4double-array } declare n*v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 5 10 15 20 } ] [
|
||||
4double-array{ 1 2 3 4 } 5.0
|
||||
[ { float 4double-array } declare v*n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 10 5 2 5 } ] [
|
||||
10.0 4double-array{ 1 2 5 2 }
|
||||
[ { float 4double-array } declare n/v ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 0.5 1 1.5 2 } ] [
|
||||
4double-array{ 1 2 3 4 } 2
|
||||
[ { float 4double-array } declare v/n ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 4double-array{ 1 0 0 0 } ] [
|
||||
4double-array{ 10 0 0 0 }
|
||||
[ { 4double-array } declare normalize ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ 30.0 ] [
|
||||
4double-array{ 1 2 3 4 }
|
||||
[ { 4double-array } declare norm-sq ] compile-call
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
4double-array{ 1 0 0 0 }
|
||||
4double-array{ 0 1 0 0 }
|
||||
[ { 4double-array 4double-array } declare distance ] compile-call
|
||||
2 sqrt 1.0e-6 ~
|
||||
] unit-test
|
||||
|
|
@ -0,0 +1,214 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types byte-arrays cpu.architecture
|
||||
generalizations kernel math math.functions math.vectors
|
||||
math.vectors.simd.functor math.vectors.specialization parser
|
||||
prettyprint.custom sequences sequences.private
|
||||
specialized-arrays.double locals assocs literals ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
<PRIVATE
|
||||
|
||||
ERROR: bad-simd-call ;
|
||||
|
||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
|
||||
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
: assert-positive ( x -- y ) ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
<<
|
||||
|
||||
DEFER: 4float-array
|
||||
DEFER: 2double-array
|
||||
|
||||
"double" 2 define-simd-type
|
||||
"float" 4 define-simd-type
|
||||
|
||||
>>
|
||||
|
||||
! Constructors
|
||||
: 4float-array-with ( x -- simd-array )
|
||||
>float 4float-array-rep (simd-broadcast) 4float-array boa ; inline
|
||||
|
||||
: 4float-array-boa ( a b c d -- simd-array )
|
||||
[ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline
|
||||
|
||||
: 2double-array-with ( x -- simd-array )
|
||||
>float 2double-array-rep (simd-broadcast) 2double-array boa ; inline
|
||||
|
||||
: 2double-array-boa ( a b -- simd-array )
|
||||
[ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: 4float-array-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline
|
||||
|
||||
: 4float-array-v->n-op ( v1 quot -- v2 )
|
||||
[ underlying>> 4float-array-rep ] dip call ; inline
|
||||
|
||||
: 2double-array-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline
|
||||
|
||||
: 2double-array-v->n-op ( v1 quot -- v2 )
|
||||
[ underlying>> 2double-array-rep ] dip call ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
<<
|
||||
|
||||
<PRIVATE
|
||||
|
||||
:: simd-vector-words ( class ctor elt-type assoc -- )
|
||||
class elt-type assoc {
|
||||
{ vneg [ [ dup v- ] keep v- ] }
|
||||
{ v. [ v* sum ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
{ v+n [ ctor execute v+ ] }
|
||||
{ n-v [ [ ctor execute ] dip v- ] }
|
||||
{ v-n [ ctor execute v- ] }
|
||||
{ n*v [ [ ctor execute ] dip v* ] }
|
||||
{ v*n [ ctor execute v* ] }
|
||||
{ n/v [ [ ctor execute ] dip v/ ] }
|
||||
{ v/n [ ctor execute v/ ] }
|
||||
{ norm-sq [ dup v. assert-positive ] }
|
||||
{ norm [ norm-sq sqrt ] }
|
||||
{ normalize [ dup norm v/n ] }
|
||||
{ distance [ v- norm ] }
|
||||
} assoc-union
|
||||
specialize-vector-words ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
\ 4float-array \ 4float-array-with float H{
|
||||
{ v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] 4float-array-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] 4float-array-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] 4float-array-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
\ 2double-array \ 2double-array-with float H{
|
||||
{ v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] 2double-array-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] 2double-array-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] 2double-array-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
>>
|
||||
|
||||
! Synthesize 256-bit vectors from a pair of 128-bit vectors
|
||||
! Functorize this later so that we can do it for integers, etc
|
||||
TUPLE: 4double-array
|
||||
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
|
||||
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
|
||||
|
||||
: <4double-array> ( -- simd-array )
|
||||
16 <byte-array> 16 <byte-array> 4double-array boa ; inline
|
||||
|
||||
: (4double-array) ( -- simd-array )
|
||||
16 (byte-array) 16 (byte-array) 4double-array boa ; inline
|
||||
|
||||
M: 4double-array clone
|
||||
[ underlying1>> clone ] [ underlying2>> clone ] bi
|
||||
4double-array boa ; inline
|
||||
|
||||
M: 4double-array length drop 4 ; inline
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: 4double-array-deref ( n seq -- n' seq' )
|
||||
over 2 < [ underlying1>> ] [ [ 2 - ] dip underlying2>> ] if
|
||||
2 swap double-array boa ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
M: 4double-array nth-unsafe
|
||||
4double-array-deref nth-unsafe ; inline
|
||||
|
||||
M: 4double-array set-nth-unsafe
|
||||
4double-array-deref set-nth-unsafe ; inline
|
||||
|
||||
: >4double-array ( seq -- simd-array )
|
||||
4double-array new clone-like ;
|
||||
|
||||
M: 4double-array like
|
||||
drop dup 4double-array? [ >4double-array ] unless ; inline
|
||||
|
||||
M: 4double-array new-sequence
|
||||
drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline
|
||||
|
||||
M: 4double-array equal?
|
||||
over 4double-array? [ sequence= ] [ 2drop f ] if ;
|
||||
|
||||
M: 4double-array byte-length drop 32 ; inline
|
||||
|
||||
SYNTAX: 4double-array{
|
||||
\ } [ >4double-array ] parse-literal ;
|
||||
|
||||
M: 4double-array pprint-delims
|
||||
drop \ 4double-array{ \ } ;
|
||||
|
||||
M: 4double-array >pprint-sequence ;
|
||||
|
||||
M: 4double-array pprint* pprint-object ;
|
||||
|
||||
INSTANCE: 4double-array sequence
|
||||
|
||||
: 4double-array-with ( x -- simd-array )
|
||||
dup [ >float 2double-array-rep (simd-broadcast) ] bi@
|
||||
4double-array boa ; inline
|
||||
|
||||
: 4double-array-boa ( a b c d -- simd-array )
|
||||
[ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@
|
||||
4double-array boa ; inline
|
||||
|
||||
! SIMD operations on 4double-arrays
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: 4double-array-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ]
|
||||
[ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi
|
||||
4double-array boa ; inline
|
||||
|
||||
: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 )
|
||||
[
|
||||
[ [ underlying1>> 2double-array-rep ] dip call ]
|
||||
[ [ underlying2>> 2double-array-rep ] dip call ] 2bi
|
||||
] dip call ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
<<
|
||||
|
||||
\ 4double-array \ 4double-array-with float H{
|
||||
{ v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] }
|
||||
{ v- [ [ (simd-v-) ] 4double-array-vv->v-op ] }
|
||||
{ v* [ [ (simd-v*) ] 4double-array-vv->v-op ] }
|
||||
{ v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] }
|
||||
{ vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] }
|
||||
{ vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] }
|
||||
{ sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] }
|
||||
} simd-vector-words
|
||||
|
||||
>>
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
"math.vectors.simd.alien" require
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: words kernel make sequences effects kernel.private accessors
|
||||
combinators math math.intervals math.vectors namespaces assocs fry
|
||||
splitting classes.algebra generalizations
|
||||
splitting classes.algebra generalizations locals
|
||||
compiler.tree.propagation.info ;
|
||||
IN: math.vectors.specialization
|
||||
|
||||
|
@ -67,6 +67,7 @@ H{
|
|||
{ vmin { +vector+ +vector+ -> +vector+ } }
|
||||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vtruncate { +vector+ -> +vector+ } }
|
||||
{ sum { +vector+ -> +scalar+ } }
|
||||
}
|
||||
|
||||
SYMBOL: specializations
|
||||
|
@ -82,19 +83,23 @@ specializations [ vector-words keys [ V{ } clone ] H{ } map>assoc ] initialize
|
|||
|
||||
: outputs ( schema -- seq ) { -> } split second ;
|
||||
|
||||
: specialize-vector-word ( word array-type elt-type -- word' )
|
||||
: loop-vector-op ( word array-type elt-type -- word' )
|
||||
pick word-schema
|
||||
[ inputs (specialize-vector-word) ]
|
||||
[ outputs record-output-signature ] 3bi ;
|
||||
|
||||
: input-signature ( word -- signature ) def>> first ;
|
||||
:: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
|
||||
word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
|
||||
|
||||
: specialize-vector-words ( array-type elt-type -- )
|
||||
[ vector-words keys ] 2dip
|
||||
'[
|
||||
[ _ _ specialize-vector-word ] keep
|
||||
[ dup input-signature ] dip
|
||||
add-specialization
|
||||
:: input-signature ( word array-type elt-type -- signature )
|
||||
array-type elt-type word word-schema inputs signature-for-schema ;
|
||||
|
||||
:: specialize-vector-words ( array-type elt-type simd -- )
|
||||
vector-words keys [
|
||||
[ array-type elt-type simd specialize-vector-word ]
|
||||
[ array-type elt-type input-signature ]
|
||||
[ ]
|
||||
tri add-specialization
|
||||
] each ;
|
||||
|
||||
: find-specialization ( classes word -- word/f )
|
||||
|
|
Loading…
Reference in New Issue