Initial implementation of SSE vector intrinsics:

- cpu.architecture: add SSE vector representations
- compiler.cfg.intrinsics.alien: remove an attempt at optimization that value numbering handles now
- compiler.cfg.representations: support instructions where the representation is set in the 'rep' slot, and support conversions between single and double floats
- alien-float, set-alien-float now use the single float representation, and the conversion is implicit; this fixes a long-standing bug where a register could get clobbered because of how %set-alien-float was defined on x86
- math.vectors.specialization: add support for SIMD specialization (where the vector word's body is replaced by another quotation), also specialize the 'sum' word
- math.vectors.simd: 4float-array, 2double-array, 4double-array types, and specializers for the math.vectors words
db4
Slava Pestov 2009-09-03 02:33:07 -05:00
parent c719b92363
commit ff8c70dbe0
23 changed files with 1235 additions and 101 deletions

View File

@ -91,6 +91,11 @@ INSN: ##set-string-nth-fast
use: src/int-rep obj/int-rep index/int-rep
temp: temp/int-rep ;
PURE-INSN: ##copy
def: dst
use: src
literal: rep ;
! Integer arithmetic
PURE-INSN: ##add
def: dst/int-rep
@ -201,6 +206,15 @@ use: src/int-rep
temp: temp/int-rep ;
! Float arithmetic
PURE-INSN: ##unbox-float
def: dst/double-float-rep
use: src/int-rep ;
PURE-INSN: ##box-float
def: dst/int-rep
use: src/double-float-rep
temp: temp/int-rep ;
PURE-INSN: ##add-float
def: dst/double-float-rep
use: src1/double-float-rep src2/double-float-rep ;
@ -240,6 +254,15 @@ def: dst/double-float-rep
use: src1/double-float-rep src2/double-float-rep
literal: func ;
! Single/double float conversion
PURE-INSN: ##single>double-float
def: dst/double-float-rep
use: src/single-float-rep ;
PURE-INSN: ##double>single-float
def: dst/single-float-rep
use: src/double-float-rep ;
! Float/integer conversion
PURE-INSN: ##float>integer
def: dst/int-rep
@ -249,26 +272,80 @@ PURE-INSN: ##integer>float
def: dst/double-float-rep
use: src/int-rep ;
! Boxing and unboxing
PURE-INSN: ##copy
! SIMD operations
INSN: ##box-vector
def: dst/int-rep
use: src
literal: rep
temp: temp/int-rep ;
INSN: ##unbox-vector
def: dst
use: src/int-rep
literal: rep ;
INSN: ##broadcast-vector
def: dst
use: src/scalar-rep
literal: rep ;
INSN: ##gather-vector-2
def: dst
use: src1/scalar-rep src2/scalar-rep
literal: rep ;
INSN: ##gather-vector-4
def: dst
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
literal: rep ;
INSN: ##add-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##div-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##min-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##max-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##sqrt-vector
def: dst
use: src
literal: rep ;
PURE-INSN: ##unbox-float
def: dst/double-float-rep
use: src/int-rep ;
INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
literal: rep ;
! Boxing and unboxing aliens
PURE-INSN: ##unbox-any-c-ptr
def: dst/int-rep
use: src/int-rep
temp: temp/int-rep ;
PURE-INSN: ##box-float
def: dst/int-rep
use: src/double-float-rep
temp: temp/int-rep ;
PURE-INSN: ##box-alien
def: dst/int-rep
use: src/int-rep
@ -322,13 +399,18 @@ def: dst/int-rep
use: src/int-rep ;
INSN: ##alien-float
def: dst/double-float-rep
def: dst/single-float-rep
use: src/int-rep ;
INSN: ##alien-double
def: dst/double-float-rep
use: src/int-rep ;
INSN: ##alien-vector
def: dst
use: src/int-rep
literal: rep ;
INSN: ##set-alien-integer-1
use: src/int-rep value/int-rep ;
@ -342,11 +424,15 @@ INSN: ##set-alien-cell
use: src/int-rep value/int-rep ;
INSN: ##set-alien-float
use: src/int-rep value/double-float-rep ;
use: src/int-rep value/single-float-rep ;
INSN: ##set-alien-double
use: src/int-rep value/double-float-rep ;
INSN: ##set-alien-vector
use: src/int-rep value
literal: rep ;
! Memory allocation
INSN: ##allot
def: dst/int-rep
@ -510,6 +596,7 @@ literal: n ;
UNION: ##allocation
##allot
##box-float
##box-vector
##box-alien
##box-displaced-alien
##integer>bignum ;

View File

@ -7,11 +7,20 @@ IN: compiler.cfg.instructions.syntax
SYMBOLS: def use temp literal constant ;
SYMBOL: scalar-rep
TUPLE: insn-slot-spec type name rep ;
: parse-rep ( str/f -- rep )
{
{ [ dup not ] [ ] }
{ [ dup "scalar-rep" = ] [ drop scalar-rep ] }
[ "cpu.architecture" lookup ]
} cond ;
: parse-insn-slot-spec ( type string -- spec )
over [ "Missing type" throw ] unless
"/" split1 dup [ "cpu.architecture" lookup ] when
"/" split1 parse-rep
insn-slot-spec boa ;
: parse-insn-slot-specs ( seq -- specs )

View File

@ -20,22 +20,14 @@ IN: compiler.cfg.intrinsics.alien
^^box-displaced-alien ds-push
] [ emit-primitive ] if ;
: (prepare-alien-accessor-imm) ( class offset -- offset-vreg )
ds-drop [ ds-pop swap ^^unbox-c-ptr ] dip ^^add-imm ;
: (prepare-alien-accessor) ( class -- offset-vreg )
[ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
: prepare-alien-accessor ( infos -- offset-vreg )
<reversed> [ second class>> ] [ first ] bi
dup value-info-small-fixnum? [
literal>> (prepare-alien-accessor-imm)
] [ drop (prepare-alien-accessor) ] if ;
<reversed> second class>>
[ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
:: inline-alien ( node quot test -- )
[let | infos [ node node-input-infos ] |
infos test call
[ infos prepare-alien-accessor quot call ]
[ infos quot call ]
[ node emit-primitive ]
if
] ; inline
@ -46,7 +38,7 @@ IN: compiler.cfg.intrinsics.alien
bi and ;
: inline-alien-getter ( node quot -- )
'[ @ ds-push ]
'[ prepare-alien-accessor @ ds-push ]
[ inline-alien-getter? ] inline-alien ; inline
: inline-alien-setter? ( infos class -- ? )
@ -56,18 +48,17 @@ IN: compiler.cfg.intrinsics.alien
tri and and ;
: inline-alien-integer-setter ( node quot -- )
'[ ds-pop ^^untag-fixnum @ ]
'[ prepare-alien-accessor ds-pop ^^untag-fixnum @ ]
[ fixnum inline-alien-setter? ]
inline-alien ; inline
: inline-alien-cell-setter ( node quot -- )
[ dup node-input-infos first class>> ] dip
'[ ds-pop _ ^^unbox-c-ptr @ ]
'[ [ prepare-alien-accessor ds-pop ] [ first class>> ] bi ^^unbox-c-ptr @ ]
[ pinned-c-ptr inline-alien-setter? ]
inline-alien ; inline
: inline-alien-float-setter ( node quot -- )
'[ ds-pop @ ]
'[ prepare-alien-accessor ds-pop @ ]
[ float inline-alien-setter? ]
inline-alien ; inline

View File

@ -7,6 +7,7 @@ compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.allot
compiler.cfg.intrinsics.fixnum
compiler.cfg.intrinsics.float
compiler.cfg.intrinsics.simd
compiler.cfg.intrinsics.slots
compiler.cfg.intrinsics.misc
compiler.cfg.comparisons ;
@ -22,6 +23,9 @@ QUALIFIED: classes.tuple.private
QUALIFIED: math.private
QUALIFIED: math.integers.private
QUALIFIED: math.floats.private
QUALIFIED: math.vectors.simd
QUALIFIED: math.vectors.simd.private
QUALIFIED: math.vectors.simd.alien
QUALIFIED: math.libm
IN: compiler.cfg.intrinsics
@ -142,5 +146,27 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ;
: enable-sse2-simd ( -- )
{
{ math.vectors.simd.private:assert-positive [ drop ] }
{ math.vectors.simd.private:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.private:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.private:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.alien:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.alien:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
: enable-sse3-simd ( -- )
{
{ math.vectors.simd.private:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,57 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel
sequences compiler.tree.propagation.info
compiler.cfg.builder.blocks compiler.cfg.stacks
compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien ;
IN: compiler.cfg.intrinsics.simd
: emit-vector-op ( node quot: ( rep -- ) -- )
[ dup node-input-infos last literal>> ] dip over representation?
[ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
: emit-binary-vector-op ( node quot -- )
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
: emit-unary-vector-op ( node quot -- )
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
: emit-gather-vector-2 ( node -- )
[ ^^gather-vector-2 ] emit-binary-vector-op ;
: emit-gather-vector-4 ( node -- )
[
ds-drop
[
D 3 peek-loc
D 2 peek-loc
D 1 peek-loc
D 0 peek-loc
-4 inc-d
] dip
^^gather-vector-4
ds-push
] emit-vector-op ;
: inline-alien-vector-setter ( node quot -- )
'[ ds-drop prepare-alien-accessor ds-pop @ ]
[ byte-array inline-alien-setter? ]
inline-alien ; inline
: emit-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-accessor
_ ^^alien-vector ds-push
]
[ inline-alien-getter? ] inline-alien
] with emit-vector-op ;
: emit-set-alien-vector ( node -- )
dup [
'[
_ ##set-alien-vector
] inline-alien-vector-setter
] with emit-vector-op ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors sequences arrays fry namespaces generic
words sets cpu.architecture compiler.units
words sets combinators generalizations cpu.architecture compiler.units
compiler.cfg.utilities compiler.cfg compiler.cfg.rpo
compiler.cfg.instructions compiler.cfg.instructions.syntax
compiler.cfg.def-use ;
@ -13,35 +13,41 @@ GENERIC: uses-vreg-reps ( insn -- reps )
<PRIVATE
: rep-getter-quot ( rep -- quot )
{
{ f [ [ rep>> ] ] }
{ scalar-rep [ [ rep>> scalar-rep-of ] ] }
[ '[ _ nip ] ]
} case ;
: define-defs-vreg-rep-method ( insn -- )
[ \ defs-vreg-rep create-method ]
[ insn-def-slot dup [ rep>> ] when '[ drop _ ] ] bi
define ;
[ insn-def-slot [ rep>> rep-getter-quot ] [ [ drop f ] ] if* ]
bi define ;
: reps-getter-quot ( reps -- quot )
[ rep>> rep-getter-quot ] map dup length '[ _ cleave _ narray ] ;
: define-uses-vreg-reps-method ( insn -- )
[ \ uses-vreg-reps create-method ]
[ insn-use-slots [ rep>> ] map '[ drop _ ] ] bi
define ;
[ insn-use-slots reps-getter-quot ]
bi define ;
: define-temp-vreg-reps-method ( insn -- )
[ \ temp-vreg-reps create-method ]
[ insn-temp-slots [ rep>> ] map '[ drop _ ] ] bi
define ;
[ insn-temp-slots reps-getter-quot ]
bi define ;
PRIVATE>
[
insn-classes get
[ { ##copy } diff [ define-defs-vreg-rep-method ] each ]
[ { ##copy ##phi } diff [ define-uses-vreg-reps-method ] each ]
[ [ define-defs-vreg-rep-method ] each ]
[ { ##phi } diff [ define-uses-vreg-reps-method ] each ]
[ [ define-temp-vreg-reps-method ] each ]
tri
] with-compilation-unit
M: ##copy defs-vreg-rep rep>> ;
M: ##copy uses-vreg-reps rep>> 1array ;
: each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- )
[ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline

View File

@ -5,6 +5,7 @@ arrays combinators make locals deques dlists
cpu.architecture compiler.utilities
compiler.cfg
compiler.cfg.rpo
compiler.cfg.hats
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.def-use
@ -16,13 +17,47 @@ IN: compiler.cfg.representations
! Virtual register representation selection.
ERROR: bad-conversion dst src dst-rep src-rep ;
GENERIC: emit-box ( dst src rep -- )
GENERIC: emit-unbox ( dst src rep -- )
M: single-float-rep emit-box
drop
[ double-float-rep next-vreg-rep dup ] dip ##single>double-float
int-rep next-vreg-rep ##box-float ;
M: single-float-rep emit-unbox
drop
[ double-float-rep next-vreg-rep dup ] dip ##unbox-float
##double>single-float ;
M: double-float-rep emit-box
drop
int-rep next-vreg-rep ##box-float ;
M: double-float-rep emit-unbox
drop ##unbox-float ;
M: vector-rep emit-box
int-rep next-vreg-rep ##box-vector ;
M: vector-rep emit-unbox
##unbox-vector ;
: emit-conversion ( dst src dst-rep src-rep -- )
2array {
{ { int-rep int-rep } [ int-rep ##copy ] }
{ { double-float-rep double-float-rep } [ double-float-rep ##copy ] }
{ { double-float-rep int-rep } [ ##unbox-float ] }
{ { int-rep double-float-rep } [ int-rep next-vreg-rep ##box-float ] }
} case ;
{
{ [ 2dup eq? ] [ drop ##copy ] }
{ [ dup int-rep eq? ] [ drop emit-unbox ] }
{ [ over int-rep eq? ] [ nip emit-box ] }
[
2array {
{ { double-float-rep single-float-rep } [ ##single>double-float ] }
{ { single-float-rep double-float-rep } [ ##double>single-float ] }
[ first2 bad-conversion ]
} case
]
} cond ;
<PRIVATE

View File

@ -45,7 +45,13 @@ UNION: two-operand-insn
##mul-float
##div-float
##min-float
##max-float ;
##max-float
##add-vector
##sub-vector
##mul-vector
##div-vector
##min-vector
##max-vector ;
GENERIC: convert-two-operand* ( insn -- )

View File

@ -144,8 +144,11 @@ CODEGEN: ##min %min
CODEGEN: ##max %max
CODEGEN: ##not %not
CODEGEN: ##log2 %log2
CODEGEN: ##copy %copy
CODEGEN: ##integer>bignum %integer>bignum
CODEGEN: ##bignum>integer %bignum>integer
CODEGEN: ##unbox-float %unbox-float
CODEGEN: ##box-float %box-float
CODEGEN: ##add-float %add-float
CODEGEN: ##sub-float %sub-float
CODEGEN: ##mul-float %mul-float
@ -155,12 +158,24 @@ CODEGEN: ##max-float %max-float
CODEGEN: ##sqrt %sqrt
CODEGEN: ##unary-float-function %unary-float-function
CODEGEN: ##binary-float-function %binary-float-function
CODEGEN: ##single>double-float %single>double-float
CODEGEN: ##double>single-float %double>single-float
CODEGEN: ##integer>float %integer>float
CODEGEN: ##float>integer %float>integer
CODEGEN: ##copy %copy
CODEGEN: ##unbox-float %unbox-float
CODEGEN: ##unbox-vector %unbox-vector
CODEGEN: ##broadcast-vector %broadcast-vector
CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##unbox-any-c-ptr %unbox-any-c-ptr
CODEGEN: ##box-float %box-float
CODEGEN: ##box-alien %box-alien
CODEGEN: ##box-displaced-alien %box-displaced-alien
CODEGEN: ##alien-unsigned-1 %alien-unsigned-1
@ -172,12 +187,14 @@ CODEGEN: ##alien-signed-4 %alien-signed-4
CODEGEN: ##alien-cell %alien-cell
CODEGEN: ##alien-float %alien-float
CODEGEN: ##alien-double %alien-double
CODEGEN: ##alien-vector %alien-vector
CODEGEN: ##set-alien-integer-1 %set-alien-integer-1
CODEGEN: ##set-alien-integer-2 %set-alien-integer-2
CODEGEN: ##set-alien-integer-4 %set-alien-integer-4
CODEGEN: ##set-alien-cell %set-alien-cell
CODEGEN: ##set-alien-float %set-alien-float
CODEGEN: ##set-alien-double %set-alien-double
CODEGEN: ##set-alien-vector %set-alien-vector
CODEGEN: ##allot %allot
CODEGEN: ##write-barrier %write-barrier
CODEGEN: ##compare %compare

View File

@ -412,4 +412,6 @@ cell 4 = [
[ 1 "0.169967142900241" ] [ 1.4 [ 1 swap fcos ] compile-call number>string ] unit-test
[ 1 "0.169967142900241" ] [ 1.4 1 [ swap fcos ] compile-call number>string ] unit-test
[ "0.169967142900241" "0.9854497299884601" ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
[ 6.0 ] [ 1.0 [ >float 3.0 + [ B{ 0 0 0 0 } 0 set-alien-float ] [ 2.0 + ] bi ] compile-call ] unit-test

View File

@ -0,0 +1,43 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd math.vectors.simd.private
math.vectors.simd.alien ;
IN: compiler.tree.propagation.simd
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
\ (simd-sum) [
nip dup literal?>> [
literal>> scalar-rep-of {
{ single-float-rep [ float ] }
{ double-float-rep [ float ] }
} case
] [ drop real ] if
<class-info>
] "outputs" set-word-prop
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
\ alien-vector { byte-array } "default-output-classes" set-word-prop

View File

@ -20,7 +20,33 @@ SINGLETONS: tagged-rep int-rep ;
! one of these representations
SINGLETONS: single-float-rep double-float-rep ;
UNION: representation any-rep tagged-rep int-rep single-float-rep double-float-rep ;
SINGLETONS:
4float-array-rep
2double-array-rep
16char-array-rep
16uchar-array-rep
8short-array-rep
8ushort-array-rep
4int-array-rep
4uint-array-rep ;
UNION: vector-rep
4float-array-rep
2double-array-rep
16char-array-rep
16uchar-array-rep
8short-array-rep
8ushort-array-rep
4int-array-rep
4uint-array-rep ;
UNION: representation
any-rep
tagged-rep
int-rep
single-float-rep
double-float-rep
vector-rep ;
! Register classes
SINGLETONS: int-regs float-regs ;
@ -31,23 +57,28 @@ CONSTANT: reg-classes { int-regs float-regs }
! A pseudo-register class for parameters spilled on the stack
SINGLETON: stack-params
: reg-class-of ( rep -- reg-class )
{
{ tagged-rep [ int-regs ] }
{ int-rep [ int-regs ] }
{ single-float-rep [ float-regs ] }
{ double-float-rep [ float-regs ] }
{ stack-params [ stack-params ] }
} case ;
GENERIC: reg-class-of ( rep -- reg-class )
: rep-size ( rep -- n )
{
{ tagged-rep [ cell ] }
{ int-rep [ cell ] }
{ single-float-rep [ 4 ] }
{ double-float-rep [ 8 ] }
{ stack-params [ cell ] }
} case ;
M: tagged-rep reg-class-of drop int-regs ;
M: int-rep reg-class-of drop int-regs ;
M: single-float-rep reg-class-of drop float-regs ;
M: double-float-rep reg-class-of drop float-regs ;
M: vector-rep reg-class-of drop float-regs ;
M: stack-params reg-class-of drop stack-params ;
GENERIC: rep-size ( rep -- n )
M: tagged-rep rep-size drop cell ;
M: int-rep rep-size drop cell ;
M: single-float-rep rep-size drop 4 ;
M: double-float-rep rep-size drop 8 ;
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
GENERIC: scalar-rep-of ( rep -- rep' )
M: 4float-array-rep scalar-rep-of drop single-float-rep ;
M: 2double-array-rep scalar-rep-of drop double-float-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
@ -101,6 +132,8 @@ HOOK: %max cpu ( dst src1 src2 -- )
HOOK: %not cpu ( dst src -- )
HOOK: %log2 cpu ( dst src -- )
HOOK: %copy cpu ( dst src rep -- )
HOOK: %fixnum-add cpu ( label dst src1 src2 -- )
HOOK: %fixnum-sub cpu ( label dst src1 src2 -- )
HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
@ -108,6 +141,9 @@ HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
HOOK: %integer>bignum cpu ( dst src temp -- )
HOOK: %bignum>integer cpu ( dst src temp -- )
HOOK: %unbox-float cpu ( dst src -- )
HOOK: %box-float cpu ( dst src temp -- )
HOOK: %add-float cpu ( dst src1 src2 -- )
HOOK: %sub-float cpu ( dst src1 src2 -- )
HOOK: %mul-float cpu ( dst src1 src2 -- )
@ -118,13 +154,29 @@ HOOK: %sqrt cpu ( dst src -- )
HOOK: %unary-float-function cpu ( dst src func -- )
HOOK: %binary-float-function cpu ( dst src1 src2 func -- )
HOOK: %single>double-float cpu ( dst src -- )
HOOK: %double>single-float cpu ( dst src -- )
HOOK: %integer>float cpu ( dst src -- )
HOOK: %float>integer cpu ( dst src -- )
HOOK: %copy cpu ( dst src rep -- )
HOOK: %unbox-float cpu ( dst src -- )
HOOK: %box-vector cpu ( dst src temp rep -- )
HOOK: %unbox-vector cpu ( dst src rep -- )
HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
HOOK: %box-float cpu ( dst src temp -- )
HOOK: %box-alien cpu ( dst src temp -- )
HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- )
@ -137,6 +189,7 @@ HOOK: %alien-signed-4 cpu ( dst src -- )
HOOK: %alien-cell cpu ( dst src -- )
HOOK: %alien-float cpu ( dst src -- )
HOOK: %alien-double cpu ( dst src -- )
HOOK: %alien-vector cpu ( dst src rep -- )
HOOK: %set-alien-integer-1 cpu ( ptr value -- )
HOOK: %set-alien-integer-2 cpu ( ptr value -- )
@ -144,6 +197,7 @@ HOOK: %set-alien-integer-4 cpu ( ptr value -- )
HOOK: %set-alien-cell cpu ( ptr value -- )
HOOK: %set-alien-float cpu ( ptr value -- )
HOOK: %set-alien-double cpu ( ptr value -- )
HOOK: %set-alien-vector cpu ( ptr value rep -- )
HOOK: %alien-global cpu ( dst symbol library -- )

View File

@ -4,7 +4,7 @@ USING: accessors assocs alien alien.c-types arrays strings
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
cpu.architecture kernel kernel.private math memory namespaces make
sequences words system layouts combinators math.order fry locals
compiler.constants
compiler.constants byte-arrays
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.intrinsics
@ -130,6 +130,21 @@ M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
M: x86 %not drop NOT ;
M: x86 %log2 BSR ;
GENERIC: copy-register* ( dst src rep -- )
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
M: single-float-rep copy-register* drop MOVSS ;
M: double-float-rep copy-register* drop MOVSD ;
M: 4float-array-rep copy-register* drop MOVUPS ;
M: 2double-array-rep copy-register* drop MOVUPD ;
M: vector-rep copy-register* drop MOVDQU ;
: copy-register ( dst src rep -- )
2over eq? [ 3drop ] [ copy-register* ] if ;
M: x86 %copy ( dst src rep -- ) copy-register ;
:: overflow-template ( label dst src1 src2 insn -- )
src1 src2 insn call
label JO ; inline
@ -211,24 +226,120 @@ M: x86 %min-float nip MINSD ;
M: x86 %max-float nip MAXSD ;
M: x86 %sqrt SQRTSD ;
M: x86 %single>double-float CVTSS2SD ;
M: x86 %double>single-float CVTSD2SS ;
M: x86 %integer>float CVTSI2SD ;
M: x86 %float>integer CVTTSD2SI ;
GENERIC: copy-register* ( dst src rep -- )
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
M: single-float-rep copy-register* drop MOVSS ;
M: double-float-rep copy-register* drop MOVSD ;
: copy-register ( dst src rep -- )
2over eq? [ 3drop ] [ copy-register* ] if ;
M: x86 %copy ( dst src rep -- ) copy-register ;
M: x86 %unbox-float ( dst src -- )
float-offset [+] MOVSD ;
M:: x86 %box-float ( dst src temp -- )
dst 16 float temp %allot
dst float-offset [+] src MOVSD ;
M:: x86 %box-vector ( dst src rep temp -- )
dst rep rep-size 2 cells + byte-array temp %allot
16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
dst byte-array-offset [+]
src rep copy-register ;
M:: x86 %unbox-vector ( dst src rep -- )
dst src byte-array-offset [+]
rep copy-register ;
M: x86 %broadcast-vector ( dst src rep -- )
{
{ 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
{ 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
} case ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
4float-array-rep
[
dst src1 MOVSS
dst src2 UNPCKLPS
src3 src4 UNPCKLPS
dst src3 HEX: 44 SHUFPS
]
}
} case ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
2double-array-rep
[
dst src1 MOVAPD
dst src2 0 SHUFPD
]
}
} case ;
M: x86 %add-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ ADDPS ] }
{ 2double-array-rep [ ADDPD ] }
{ 16char-array-rep [ PADDB ] }
{ 16uchar-array-rep [ PADDB ] }
{ 8short-array-rep [ PADDW ] }
{ 8ushort-array-rep [ PADDW ] }
{ 4int-array-rep [ PADDD ] }
{ 4uint-array-rep [ PADDD ] }
} case drop ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ SUBPS ] }
{ 2double-array-rep [ SUBPD ] }
{ 16char-array-rep [ PSUBB ] }
{ 16uchar-array-rep [ PSUBB ] }
{ 8short-array-rep [ PSUBW ] }
{ 8ushort-array-rep [ PSUBW ] }
{ 4int-array-rep [ PSUBD ] }
{ 4uint-array-rep [ PSUBD ] }
} case drop ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MULPS ] }
{ 2double-array-rep [ MULPD ] }
{ 4int-array-rep [ PMULLW ] }
} case drop ;
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ DIVPS ] }
{ 2double-array-rep [ DIVPD ] }
} case drop ;
M: x86 %min-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MINPS ] }
{ 2double-array-rep [ MINPD ] }
} case drop ;
M: x86 %max-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MAXPS ] }
{ 2double-array-rep [ MAXPD ] }
} case drop ;
M: x86 %sqrt-vector ( dst src rep -- )
{
{ 4float-array-rep [ SQRTPS ] }
{ 2double-array-rep [ SQRTPD ] }
} case ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
M:: x86 %unbox-any-c-ptr ( dst src temp -- )
[
{ "is-byte-array" "end" "start" } [ define-label ] each
@ -255,10 +366,6 @@ M:: x86 %unbox-any-c-ptr ( dst src temp -- )
"end" resolve-label
] with-scope ;
M:: x86 %box-float ( dst src temp -- )
dst 16 float temp %allot
dst float-offset [+] src MOVSD ;
: alien@ ( reg n -- op ) cells alien tag-number - [+] ;
:: %allot-alien ( dst displacement base temp -- )
@ -405,8 +512,9 @@ M: x86 %alien-signed-2 16 %alien-signed-getter ;
M: x86 %alien-signed-4 32 %alien-signed-getter ;
M: x86 %alien-cell [] MOV ;
M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ;
M: x86 %alien-float [] MOVSS ;
M: x86 %alien-double [] MOVSD ;
M: x86 %alien-vector [ [] ] dip copy-register ;
:: %alien-integer-setter ( ptr value size -- )
value { ptr } size [| new-value |
@ -418,8 +526,9 @@ M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
M: x86 %set-alien-cell [ [] ] dip MOV ;
M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ;
M: x86 %set-alien-float [ [] ] dip MOVSS ;
M: x86 %set-alien-double [ [] ] dip MOVSD ;
M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
: shift-count? ( reg -- ? ) { ECX RCX } memq? ;

View File

@ -0,0 +1,56 @@
IN: math.vectors.simd.alien.tests
USING: cpu.architecture math.vectors.simd accessors
math.vectors.simd.alien kernel classes.struct tools.test
compiler sequences byte-arrays alien math kernel.private
specialized-arrays.float ;
! Vector alien intrinsics
[ 4float-array{ 1 2 3 4 } ] [
[
4float-array{ 1 2 3 4 }
underlying>> 0 4float-array-rep alien-vector
] compile-call 4float-array boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
4float-array-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 4float-array-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x 4float-array }
{ y 2double-array }
{ z 4double-array } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
simd-struct <struct>
4float-array{ 1 2 3 4 } >>x
2double-array{ 2 1 } >>y
4double-array{ 4 3 2 1 } >>z
[ x>> ] [ y>> ] [ z>> ] tri
] unit-test
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
[
simd-struct <struct>
4float-array{ 1 2 3 4 } >>x
2double-array{ 2 1 } >>y
4double-array{ 4 3 2 1 } >>z
[ x>> ] [ y>> ] [ z>> ] tri
] compile-call
] unit-test

View File

@ -0,0 +1,51 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien accessors alien.c-types byte-arrays compiler.units
cpu.architecture libc locals kernel math math.vectors.simd
math.vectors.simd.private ;
IN: math.vectors.simd.alien
: alien-vector ( c-ptr n rep -- value )
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
: set-alien-vector ( value c-ptr n rep -- )
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
:: define-simd-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
[ class boa ] >>boxer-quot
[ underlying>> ] >>unboxer-quot
class name>> typedef ;
: define-4double-array-type ( -- )
<c-type>
4double-array >>class
4double-array >>boxed-class
[
[ 2double-array-rep alien-vector ]
[ 16 + >fixnum 2double-array-rep alien-vector ] 2bi
4double-array boa
] >>getter
[
[ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
2double-array-rep >>rep
"4double-array" typedef ;
[
4float-array 4float-array-rep define-simd-type
2double-array 2double-array-rep define-simd-type
define-4double-array-type
] with-compilation-unit

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,61 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays classes functors
kernel math parser prettyprint.custom sequences
sequences.private ;
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
FUNCTOR: define-simd-type ( T N -- )
A DEFINES-CLASS ${N}${T}-array
<A> DEFINES <${A}>
(A) DEFINES (${A})
>A DEFINES >${A}
A{ DEFINES ${A}{
NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ]
BYTES [ T heap-size N * ]
INITIAL [ BYTES <byte-array> ]
WHERE
TUPLE: A
{ underlying byte-array read-only initial: INITIAL } ;
: <A> ( -- simd-array ) BYTES <byte-array> A boa ; inline
: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline
M: A clone underlying>> clone \ A boa ; inline
M: A length drop N ; inline
M: A nth-unsafe underlying>> NTH call ; inline
M: A set-nth-unsafe underlying>> SET-NTH call ; inline
: >A ( seq -- simd-array ) \ A new clone-like ;
M: A like drop dup \ A instance? [ >A ] unless ; inline
M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline
M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length underlying>> length ; inline
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
INSTANCE: A sequence
;FUNCTOR

View File

@ -0,0 +1,301 @@
IN: math.vectors.simd.tests
USING: math math.vectors.simd math.vectors.simd.private
math.vectors math.functions kernel.private compiler sequences
tools.test compiler.tree.debugger accessors kernel ;
[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test
[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test
[ 4float-array{ 12 12 12 12 } ] [
12 [ 4float-array-with ] compile-call
] unit-test
[ 4float-array{ 1 2 3 4 } ] [
1 2 3 4 [ 4float-array-boa ] compile-call
] unit-test
[ 4float-array{ 11 22 33 44 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v+ ] compile-call
] unit-test
[ 4float-array{ -9 -18 -27 -36 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v- ] compile-call
] unit-test
[ 4float-array{ 10 40 90 160 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v* ] compile-call
] unit-test
[ 4float-array{ 10 100 1000 10000 } ] [
4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v/ ] compile-call
] unit-test
[ 4float-array{ -10 -20 -30 -40 } ] [
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
[ { 4float-array 4float-array } declare vmin ] compile-call
] unit-test
[ 4float-array{ 10 20 30 40 } ] [
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
[ { 4float-array 4float-array } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare sum ] compile-call
] unit-test
[ 13.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 }
[ { 4float-array 4float-array } declare v. ] compile-call
] unit-test
[ 4float-array{ 5 10 15 20 } ] [
5.0 4float-array{ 1 2 3 4 }
[ { float 4float-array } declare n*v ] compile-call
] unit-test
[ 4float-array{ 5 10 15 20 } ] [
4float-array{ 1 2 3 4 } 5.0
[ { float 4float-array } declare v*n ] compile-call
] unit-test
[ 4float-array{ 10 5 2 5 } ] [
10.0 4float-array{ 1 2 5 2 }
[ { float 4float-array } declare n/v ] compile-call
] unit-test
[ 4float-array{ 0.5 1 1.5 2 } ] [
4float-array{ 1 2 3 4 } 2
[ { float 4float-array } declare v/n ] compile-call
] unit-test
[ 4float-array{ 1 0 0 0 } ] [
4float-array{ 10 0 0 0 }
[ { 4float-array } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
4float-array{ 1 0 0 0 }
4float-array{ 0 1 0 0 }
[ { 4float-array 4float-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ 2double-array{ 12 12 } ] [
12 [ 2double-array-with ] compile-call
] unit-test
[ 2double-array{ 1 2 } ] [
1 2 [ 2double-array-boa ] compile-call
] unit-test
[ 2double-array{ 11 22 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v+ ] compile-call
] unit-test
[ 2double-array{ -9 -18 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v- ] compile-call
] unit-test
[ 2double-array{ 10 40 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v* ] compile-call
] unit-test
[ 2double-array{ 10 100 } ] [
2double-array{ 100 2000 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v/ ] compile-call
] unit-test
[ 2double-array{ -10 -20 } ] [
2double-array{ -10 20 } 2double-array{ 10 -20 }
[ { 2double-array 2double-array } declare vmin ] compile-call
] unit-test
[ 2double-array{ 10 20 } ] [
2double-array{ -10 20 } 2double-array{ 10 -20 }
[ { 2double-array 2double-array } declare vmax ] compile-call
] unit-test
[ 3.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare sum ] compile-call
] unit-test
[ 7.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare sum 4.0 + ] compile-call
] unit-test
[ 16.0 ] [
2double-array{ 1 2 } 2double-array{ 2 7 }
[ { 2double-array 2double-array } declare v. ] compile-call
] unit-test
[ 2double-array{ 5 10 } ] [
5.0 2double-array{ 1 2 }
[ { float 2double-array } declare n*v ] compile-call
] unit-test
[ 2double-array{ 5 10 } ] [
2double-array{ 1 2 } 5.0
[ { float 2double-array } declare v*n ] compile-call
] unit-test
[ 2double-array{ 10 5 } ] [
10.0 2double-array{ 1 2 }
[ { float 2double-array } declare n/v ] compile-call
] unit-test
[ 2double-array{ 0.5 1 } ] [
2double-array{ 1 2 } 2
[ { float 2double-array } declare v/n ] compile-call
] unit-test
[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test
[ 2double-array{ 1 0 } ] [
2double-array{ 10 0 }
[ { 2double-array } declare normalize ] compile-call
] unit-test
[ 5.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
2double-array{ 1 0 }
2double-array{ 0 1 }
[ { 2double-array 2double-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test
[ 4double-array{ 1 2 3 4 } ] [
1 2 3 4 4double-array-boa
] unit-test
[ 4double-array{ 1 1 1 1 } ] [
1 4double-array-with
] unit-test
[ 4double-array{ 0 1 2 3 } ] [
1 4double-array-with [ * ] map-index
] unit-test
[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test
[ 4double-array{ 12 12 12 12 } ] [
12 [ 4double-array-with ] compile-call
] unit-test
[ 4double-array{ 1 2 3 4 } ] [
1 2 3 4 [ 4double-array-boa ] compile-call
] unit-test
[ 4double-array{ 11 22 33 44 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v+ ] compile-call
] unit-test
[ 4double-array{ -9 -18 -27 -36 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v- ] compile-call
] unit-test
[ 4double-array{ 10 40 90 160 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v* ] compile-call
] unit-test
[ 4double-array{ 10 100 1000 10000 } ] [
4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v/ ] compile-call
] unit-test
[ 4double-array{ -10 -20 -30 -40 } ] [
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
[ { 4double-array 4double-array } declare vmin ] compile-call
] unit-test
[ 4double-array{ 10 20 30 40 } ] [
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
[ { 4double-array 4double-array } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare sum ] compile-call
] unit-test
[ 13.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 }
[ { 4double-array 4double-array } declare v. ] compile-call
] unit-test
[ 4double-array{ 5 10 15 20 } ] [
5.0 4double-array{ 1 2 3 4 }
[ { float 4double-array } declare n*v ] compile-call
] unit-test
[ 4double-array{ 5 10 15 20 } ] [
4double-array{ 1 2 3 4 } 5.0
[ { float 4double-array } declare v*n ] compile-call
] unit-test
[ 4double-array{ 10 5 2 5 } ] [
10.0 4double-array{ 1 2 5 2 }
[ { float 4double-array } declare n/v ] compile-call
] unit-test
[ 4double-array{ 0.5 1 1.5 2 } ] [
4double-array{ 1 2 3 4 } 2
[ { float 4double-array } declare v/n ] compile-call
] unit-test
[ 4double-array{ 1 0 0 0 } ] [
4double-array{ 10 0 0 0 }
[ { 4double-array } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
4double-array{ 1 0 0 0 }
4double-array{ 0 1 0 0 }
[ { 4double-array 4double-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test

View File

@ -0,0 +1,214 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays cpu.architecture
generalizations kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.specialization parser
prettyprint.custom sequences sequences.private
specialized-arrays.double locals assocs literals ;
IN: math.vectors.simd
<PRIVATE
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
: assert-positive ( x -- y ) ;
PRIVATE>
<<
DEFER: 4float-array
DEFER: 2double-array
"double" 2 define-simd-type
"float" 4 define-simd-type
>>
! Constructors
: 4float-array-with ( x -- simd-array )
>float 4float-array-rep (simd-broadcast) 4float-array boa ; inline
: 4float-array-boa ( a b c d -- simd-array )
[ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline
: 2double-array-with ( x -- simd-array )
>float 2double-array-rep (simd-broadcast) 2double-array boa ; inline
: 2double-array-boa ( a b -- simd-array )
[ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline
<PRIVATE
: 4float-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline
: 4float-array-v->n-op ( v1 quot -- v2 )
[ underlying>> 4float-array-rep ] dip call ; inline
: 2double-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline
: 2double-array-v->n-op ( v1 quot -- v2 )
[ underlying>> 2double-array-rep ] dip call ; inline
PRIVATE>
<<
<PRIVATE
:: simd-vector-words ( class ctor elt-type assoc -- )
class elt-type assoc {
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] }
{ v-n [ ctor execute v- ] }
{ n*v [ [ ctor execute ] dip v* ] }
{ v*n [ ctor execute v* ] }
{ n/v [ [ ctor execute ] dip v/ ] }
{ v/n [ ctor execute v/ ] }
{ norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
{ distance [ v- norm ] }
} assoc-union
specialize-vector-words ;
PRIVATE>
\ 4float-array \ 4float-array-with float H{
{ v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 4float-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 4float-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] 4float-array-v->n-op ] }
} simd-vector-words
\ 2double-array \ 2double-array-with float H{
{ v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 2double-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 2double-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] 2double-array-v->n-op ] }
} simd-vector-words
>>
! Synthesize 256-bit vectors from a pair of 128-bit vectors
! Functorize this later so that we can do it for integers, etc
TUPLE: 4double-array
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
: <4double-array> ( -- simd-array )
16 <byte-array> 16 <byte-array> 4double-array boa ; inline
: (4double-array) ( -- simd-array )
16 (byte-array) 16 (byte-array) 4double-array boa ; inline
M: 4double-array clone
[ underlying1>> clone ] [ underlying2>> clone ] bi
4double-array boa ; inline
M: 4double-array length drop 4 ; inline
<PRIVATE
: 4double-array-deref ( n seq -- n' seq' )
over 2 < [ underlying1>> ] [ [ 2 - ] dip underlying2>> ] if
2 swap double-array boa ; inline
PRIVATE>
M: 4double-array nth-unsafe
4double-array-deref nth-unsafe ; inline
M: 4double-array set-nth-unsafe
4double-array-deref set-nth-unsafe ; inline
: >4double-array ( seq -- simd-array )
4double-array new clone-like ;
M: 4double-array like
drop dup 4double-array? [ >4double-array ] unless ; inline
M: 4double-array new-sequence
drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline
M: 4double-array equal?
over 4double-array? [ sequence= ] [ 2drop f ] if ;
M: 4double-array byte-length drop 32 ; inline
SYNTAX: 4double-array{
\ } [ >4double-array ] parse-literal ;
M: 4double-array pprint-delims
drop \ 4double-array{ \ } ;
M: 4double-array >pprint-sequence ;
M: 4double-array pprint* pprint-object ;
INSTANCE: 4double-array sequence
: 4double-array-with ( x -- simd-array )
dup [ >float 2double-array-rep (simd-broadcast) ] bi@
4double-array boa ; inline
: 4double-array-boa ( a b c d -- simd-array )
[ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@
4double-array boa ; inline
! SIMD operations on 4double-arrays
<PRIVATE
: 4double-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ]
[ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi
4double-array boa ; inline
: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 )
[
[ [ underlying1>> 2double-array-rep ] dip call ]
[ [ underlying2>> 2double-array-rep ] dip call ] 2bi
] dip call ; inline
PRIVATE>
<<
\ 4double-array \ 4double-array-with float H{
{ v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 4double-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 4double-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] }
} simd-vector-words
>>
USE: vocabs.loader
"math.vectors.simd.alien" require

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: words kernel make sequences effects kernel.private accessors
combinators math math.intervals math.vectors namespaces assocs fry
splitting classes.algebra generalizations
splitting classes.algebra generalizations locals
compiler.tree.propagation.info ;
IN: math.vectors.specialization
@ -67,6 +67,7 @@ H{
{ vmin { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
}
SYMBOL: specializations
@ -82,19 +83,23 @@ specializations [ vector-words keys [ V{ } clone ] H{ } map>assoc ] initialize
: outputs ( schema -- seq ) { -> } split second ;
: specialize-vector-word ( word array-type elt-type -- word' )
: loop-vector-op ( word array-type elt-type -- word' )
pick word-schema
[ inputs (specialize-vector-word) ]
[ outputs record-output-signature ] 3bi ;
: input-signature ( word -- signature ) def>> first ;
:: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
: specialize-vector-words ( array-type elt-type -- )
[ vector-words keys ] 2dip
'[
[ _ _ specialize-vector-word ] keep
[ dup input-signature ] dip
add-specialization
:: input-signature ( word array-type elt-type -- signature )
array-type elt-type word word-schema inputs signature-for-schema ;
:: specialize-vector-words ( array-type elt-type simd -- )
vector-words keys [
[ array-type elt-type simd specialize-vector-word ]
[ array-type elt-type input-signature ]
[ ]
tri add-specialization
] each ;
: find-specialization ( classes word -- word/f )