Initial implementation of SSE vector intrinsics:

- cpu.architecture: add SSE vector representations
- compiler.cfg.intrinsics.alien: remove an attempt at optimization that value numbering handles now
- compiler.cfg.representations: support instructions where the representation is set in the 'rep' slot, and support conversions between single and double floats
- alien-float, set-alien-float now use the single float representation, and the conversion is implicit; this fixes a long-standing bug where a register could get clobbered because of how %set-alien-float was defined on x86
- math.vectors.specialization: add support for SIMD specialization (where the vector word's body is replaced by another quotation), also specialize the 'sum' word
- math.vectors.simd: 4float-array, 2double-array, 4double-array types, and specializers for the math.vectors words
db4
Slava Pestov 2009-09-03 02:33:07 -05:00
parent c719b92363
commit ff8c70dbe0
23 changed files with 1235 additions and 101 deletions

View File

@ -91,6 +91,11 @@ INSN: ##set-string-nth-fast
use: src/int-rep obj/int-rep index/int-rep use: src/int-rep obj/int-rep index/int-rep
temp: temp/int-rep ; temp: temp/int-rep ;
PURE-INSN: ##copy
def: dst
use: src
literal: rep ;
! Integer arithmetic ! Integer arithmetic
PURE-INSN: ##add PURE-INSN: ##add
def: dst/int-rep def: dst/int-rep
@ -201,6 +206,15 @@ use: src/int-rep
temp: temp/int-rep ; temp: temp/int-rep ;
! Float arithmetic ! Float arithmetic
PURE-INSN: ##unbox-float
def: dst/double-float-rep
use: src/int-rep ;
PURE-INSN: ##box-float
def: dst/int-rep
use: src/double-float-rep
temp: temp/int-rep ;
PURE-INSN: ##add-float PURE-INSN: ##add-float
def: dst/double-float-rep def: dst/double-float-rep
use: src1/double-float-rep src2/double-float-rep ; use: src1/double-float-rep src2/double-float-rep ;
@ -240,6 +254,15 @@ def: dst/double-float-rep
use: src1/double-float-rep src2/double-float-rep use: src1/double-float-rep src2/double-float-rep
literal: func ; literal: func ;
! Single/double float conversion
PURE-INSN: ##single>double-float
def: dst/double-float-rep
use: src/single-float-rep ;
PURE-INSN: ##double>single-float
def: dst/single-float-rep
use: src/double-float-rep ;
! Float/integer conversion ! Float/integer conversion
PURE-INSN: ##float>integer PURE-INSN: ##float>integer
def: dst/int-rep def: dst/int-rep
@ -249,26 +272,80 @@ PURE-INSN: ##integer>float
def: dst/double-float-rep def: dst/double-float-rep
use: src/int-rep ; use: src/int-rep ;
! Boxing and unboxing ! SIMD operations
PURE-INSN: ##copy
INSN: ##box-vector
def: dst/int-rep
use: src
literal: rep
temp: temp/int-rep ;
INSN: ##unbox-vector
def: dst
use: src/int-rep
literal: rep ;
INSN: ##broadcast-vector
def: dst
use: src/scalar-rep
literal: rep ;
INSN: ##gather-vector-2
def: dst
use: src1/scalar-rep src2/scalar-rep
literal: rep ;
INSN: ##gather-vector-4
def: dst
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
literal: rep ;
INSN: ##add-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##div-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##min-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##max-vector
def: dst
use: src1 src2
literal: rep ;
INSN: ##sqrt-vector
def: dst def: dst
use: src use: src
literal: rep ; literal: rep ;
PURE-INSN: ##unbox-float INSN: ##horizontal-add-vector
def: dst/double-float-rep def: dst/scalar-rep
use: src/int-rep ; use: src
literal: rep ;
! Boxing and unboxing aliens
PURE-INSN: ##unbox-any-c-ptr PURE-INSN: ##unbox-any-c-ptr
def: dst/int-rep def: dst/int-rep
use: src/int-rep use: src/int-rep
temp: temp/int-rep ; temp: temp/int-rep ;
PURE-INSN: ##box-float
def: dst/int-rep
use: src/double-float-rep
temp: temp/int-rep ;
PURE-INSN: ##box-alien PURE-INSN: ##box-alien
def: dst/int-rep def: dst/int-rep
use: src/int-rep use: src/int-rep
@ -322,13 +399,18 @@ def: dst/int-rep
use: src/int-rep ; use: src/int-rep ;
INSN: ##alien-float INSN: ##alien-float
def: dst/double-float-rep def: dst/single-float-rep
use: src/int-rep ; use: src/int-rep ;
INSN: ##alien-double INSN: ##alien-double
def: dst/double-float-rep def: dst/double-float-rep
use: src/int-rep ; use: src/int-rep ;
INSN: ##alien-vector
def: dst
use: src/int-rep
literal: rep ;
INSN: ##set-alien-integer-1 INSN: ##set-alien-integer-1
use: src/int-rep value/int-rep ; use: src/int-rep value/int-rep ;
@ -342,11 +424,15 @@ INSN: ##set-alien-cell
use: src/int-rep value/int-rep ; use: src/int-rep value/int-rep ;
INSN: ##set-alien-float INSN: ##set-alien-float
use: src/int-rep value/double-float-rep ; use: src/int-rep value/single-float-rep ;
INSN: ##set-alien-double INSN: ##set-alien-double
use: src/int-rep value/double-float-rep ; use: src/int-rep value/double-float-rep ;
INSN: ##set-alien-vector
use: src/int-rep value
literal: rep ;
! Memory allocation ! Memory allocation
INSN: ##allot INSN: ##allot
def: dst/int-rep def: dst/int-rep
@ -510,6 +596,7 @@ literal: n ;
UNION: ##allocation UNION: ##allocation
##allot ##allot
##box-float ##box-float
##box-vector
##box-alien ##box-alien
##box-displaced-alien ##box-displaced-alien
##integer>bignum ; ##integer>bignum ;

View File

@ -7,11 +7,20 @@ IN: compiler.cfg.instructions.syntax
SYMBOLS: def use temp literal constant ; SYMBOLS: def use temp literal constant ;
SYMBOL: scalar-rep
TUPLE: insn-slot-spec type name rep ; TUPLE: insn-slot-spec type name rep ;
: parse-rep ( str/f -- rep )
{
{ [ dup not ] [ ] }
{ [ dup "scalar-rep" = ] [ drop scalar-rep ] }
[ "cpu.architecture" lookup ]
} cond ;
: parse-insn-slot-spec ( type string -- spec ) : parse-insn-slot-spec ( type string -- spec )
over [ "Missing type" throw ] unless over [ "Missing type" throw ] unless
"/" split1 dup [ "cpu.architecture" lookup ] when "/" split1 parse-rep
insn-slot-spec boa ; insn-slot-spec boa ;
: parse-insn-slot-specs ( seq -- specs ) : parse-insn-slot-specs ( seq -- specs )

View File

@ -20,22 +20,14 @@ IN: compiler.cfg.intrinsics.alien
^^box-displaced-alien ds-push ^^box-displaced-alien ds-push
] [ emit-primitive ] if ; ] [ emit-primitive ] if ;
: (prepare-alien-accessor-imm) ( class offset -- offset-vreg )
ds-drop [ ds-pop swap ^^unbox-c-ptr ] dip ^^add-imm ;
: (prepare-alien-accessor) ( class -- offset-vreg )
[ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
: prepare-alien-accessor ( infos -- offset-vreg ) : prepare-alien-accessor ( infos -- offset-vreg )
<reversed> [ second class>> ] [ first ] bi <reversed> second class>>
dup value-info-small-fixnum? [ [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ;
literal>> (prepare-alien-accessor-imm)
] [ drop (prepare-alien-accessor) ] if ;
:: inline-alien ( node quot test -- ) :: inline-alien ( node quot test -- )
[let | infos [ node node-input-infos ] | [let | infos [ node node-input-infos ] |
infos test call infos test call
[ infos prepare-alien-accessor quot call ] [ infos quot call ]
[ node emit-primitive ] [ node emit-primitive ]
if if
] ; inline ] ; inline
@ -46,7 +38,7 @@ IN: compiler.cfg.intrinsics.alien
bi and ; bi and ;
: inline-alien-getter ( node quot -- ) : inline-alien-getter ( node quot -- )
'[ @ ds-push ] '[ prepare-alien-accessor @ ds-push ]
[ inline-alien-getter? ] inline-alien ; inline [ inline-alien-getter? ] inline-alien ; inline
: inline-alien-setter? ( infos class -- ? ) : inline-alien-setter? ( infos class -- ? )
@ -56,18 +48,17 @@ IN: compiler.cfg.intrinsics.alien
tri and and ; tri and and ;
: inline-alien-integer-setter ( node quot -- ) : inline-alien-integer-setter ( node quot -- )
'[ ds-pop ^^untag-fixnum @ ] '[ prepare-alien-accessor ds-pop ^^untag-fixnum @ ]
[ fixnum inline-alien-setter? ] [ fixnum inline-alien-setter? ]
inline-alien ; inline inline-alien ; inline
: inline-alien-cell-setter ( node quot -- ) : inline-alien-cell-setter ( node quot -- )
[ dup node-input-infos first class>> ] dip '[ [ prepare-alien-accessor ds-pop ] [ first class>> ] bi ^^unbox-c-ptr @ ]
'[ ds-pop _ ^^unbox-c-ptr @ ]
[ pinned-c-ptr inline-alien-setter? ] [ pinned-c-ptr inline-alien-setter? ]
inline-alien ; inline inline-alien ; inline
: inline-alien-float-setter ( node quot -- ) : inline-alien-float-setter ( node quot -- )
'[ ds-pop @ ] '[ prepare-alien-accessor ds-pop @ ]
[ float inline-alien-setter? ] [ float inline-alien-setter? ]
inline-alien ; inline inline-alien ; inline

View File

@ -7,6 +7,7 @@ compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.allot compiler.cfg.intrinsics.allot
compiler.cfg.intrinsics.fixnum compiler.cfg.intrinsics.fixnum
compiler.cfg.intrinsics.float compiler.cfg.intrinsics.float
compiler.cfg.intrinsics.simd
compiler.cfg.intrinsics.slots compiler.cfg.intrinsics.slots
compiler.cfg.intrinsics.misc compiler.cfg.intrinsics.misc
compiler.cfg.comparisons ; compiler.cfg.comparisons ;
@ -22,6 +23,9 @@ QUALIFIED: classes.tuple.private
QUALIFIED: math.private QUALIFIED: math.private
QUALIFIED: math.integers.private QUALIFIED: math.integers.private
QUALIFIED: math.floats.private QUALIFIED: math.floats.private
QUALIFIED: math.vectors.simd
QUALIFIED: math.vectors.simd.private
QUALIFIED: math.vectors.simd.alien
QUALIFIED: math.libm QUALIFIED: math.libm
IN: compiler.cfg.intrinsics IN: compiler.cfg.intrinsics
@ -142,5 +146,27 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ; } enable-intrinsics ;
: enable-sse2-simd ( -- )
{
{ math.vectors.simd.private:assert-positive [ drop ] }
{ math.vectors.simd.private:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.private:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.private:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.private:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.alien:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.alien:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
: enable-sse3-simd ( -- )
{
{ math.vectors.simd.private:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- ) : emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ; "intrinsic" word-prop call( node -- ) ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,57 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel
sequences compiler.tree.propagation.info
compiler.cfg.builder.blocks compiler.cfg.stacks
compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien ;
IN: compiler.cfg.intrinsics.simd
: emit-vector-op ( node quot: ( rep -- ) -- )
[ dup node-input-infos last literal>> ] dip over representation?
[ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
: emit-binary-vector-op ( node quot -- )
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
: emit-unary-vector-op ( node quot -- )
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
: emit-gather-vector-2 ( node -- )
[ ^^gather-vector-2 ] emit-binary-vector-op ;
: emit-gather-vector-4 ( node -- )
[
ds-drop
[
D 3 peek-loc
D 2 peek-loc
D 1 peek-loc
D 0 peek-loc
-4 inc-d
] dip
^^gather-vector-4
ds-push
] emit-vector-op ;
: inline-alien-vector-setter ( node quot -- )
'[ ds-drop prepare-alien-accessor ds-pop @ ]
[ byte-array inline-alien-setter? ]
inline-alien ; inline
: emit-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-accessor
_ ^^alien-vector ds-push
]
[ inline-alien-getter? ] inline-alien
] with emit-vector-op ;
: emit-set-alien-vector ( node -- )
dup [
'[
_ ##set-alien-vector
] inline-alien-vector-setter
] with emit-vector-op ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov. ! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors sequences arrays fry namespaces generic USING: kernel accessors sequences arrays fry namespaces generic
words sets cpu.architecture compiler.units words sets combinators generalizations cpu.architecture compiler.units
compiler.cfg.utilities compiler.cfg compiler.cfg.rpo compiler.cfg.utilities compiler.cfg compiler.cfg.rpo
compiler.cfg.instructions compiler.cfg.instructions.syntax compiler.cfg.instructions compiler.cfg.instructions.syntax
compiler.cfg.def-use ; compiler.cfg.def-use ;
@ -13,35 +13,41 @@ GENERIC: uses-vreg-reps ( insn -- reps )
<PRIVATE <PRIVATE
: rep-getter-quot ( rep -- quot )
{
{ f [ [ rep>> ] ] }
{ scalar-rep [ [ rep>> scalar-rep-of ] ] }
[ '[ _ nip ] ]
} case ;
: define-defs-vreg-rep-method ( insn -- ) : define-defs-vreg-rep-method ( insn -- )
[ \ defs-vreg-rep create-method ] [ \ defs-vreg-rep create-method ]
[ insn-def-slot dup [ rep>> ] when '[ drop _ ] ] bi [ insn-def-slot [ rep>> rep-getter-quot ] [ [ drop f ] ] if* ]
define ; bi define ;
: reps-getter-quot ( reps -- quot )
[ rep>> rep-getter-quot ] map dup length '[ _ cleave _ narray ] ;
: define-uses-vreg-reps-method ( insn -- ) : define-uses-vreg-reps-method ( insn -- )
[ \ uses-vreg-reps create-method ] [ \ uses-vreg-reps create-method ]
[ insn-use-slots [ rep>> ] map '[ drop _ ] ] bi [ insn-use-slots reps-getter-quot ]
define ; bi define ;
: define-temp-vreg-reps-method ( insn -- ) : define-temp-vreg-reps-method ( insn -- )
[ \ temp-vreg-reps create-method ] [ \ temp-vreg-reps create-method ]
[ insn-temp-slots [ rep>> ] map '[ drop _ ] ] bi [ insn-temp-slots reps-getter-quot ]
define ; bi define ;
PRIVATE> PRIVATE>
[ [
insn-classes get insn-classes get
[ { ##copy } diff [ define-defs-vreg-rep-method ] each ] [ [ define-defs-vreg-rep-method ] each ]
[ { ##copy ##phi } diff [ define-uses-vreg-reps-method ] each ] [ { ##phi } diff [ define-uses-vreg-reps-method ] each ]
[ [ define-temp-vreg-reps-method ] each ] [ [ define-temp-vreg-reps-method ] each ]
tri tri
] with-compilation-unit ] with-compilation-unit
M: ##copy defs-vreg-rep rep>> ;
M: ##copy uses-vreg-reps rep>> 1array ;
: each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- ) : each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- )
[ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline [ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline

View File

@ -5,6 +5,7 @@ arrays combinators make locals deques dlists
cpu.architecture compiler.utilities cpu.architecture compiler.utilities
compiler.cfg compiler.cfg
compiler.cfg.rpo compiler.cfg.rpo
compiler.cfg.hats
compiler.cfg.registers compiler.cfg.registers
compiler.cfg.instructions compiler.cfg.instructions
compiler.cfg.def-use compiler.cfg.def-use
@ -16,13 +17,47 @@ IN: compiler.cfg.representations
! Virtual register representation selection. ! Virtual register representation selection.
ERROR: bad-conversion dst src dst-rep src-rep ;
GENERIC: emit-box ( dst src rep -- )
GENERIC: emit-unbox ( dst src rep -- )
M: single-float-rep emit-box
drop
[ double-float-rep next-vreg-rep dup ] dip ##single>double-float
int-rep next-vreg-rep ##box-float ;
M: single-float-rep emit-unbox
drop
[ double-float-rep next-vreg-rep dup ] dip ##unbox-float
##double>single-float ;
M: double-float-rep emit-box
drop
int-rep next-vreg-rep ##box-float ;
M: double-float-rep emit-unbox
drop ##unbox-float ;
M: vector-rep emit-box
int-rep next-vreg-rep ##box-vector ;
M: vector-rep emit-unbox
##unbox-vector ;
: emit-conversion ( dst src dst-rep src-rep -- ) : emit-conversion ( dst src dst-rep src-rep -- )
2array { {
{ { int-rep int-rep } [ int-rep ##copy ] } { [ 2dup eq? ] [ drop ##copy ] }
{ { double-float-rep double-float-rep } [ double-float-rep ##copy ] } { [ dup int-rep eq? ] [ drop emit-unbox ] }
{ { double-float-rep int-rep } [ ##unbox-float ] } { [ over int-rep eq? ] [ nip emit-box ] }
{ { int-rep double-float-rep } [ int-rep next-vreg-rep ##box-float ] } [
} case ; 2array {
{ { double-float-rep single-float-rep } [ ##single>double-float ] }
{ { single-float-rep double-float-rep } [ ##double>single-float ] }
[ first2 bad-conversion ]
} case
]
} cond ;
<PRIVATE <PRIVATE

View File

@ -45,7 +45,13 @@ UNION: two-operand-insn
##mul-float ##mul-float
##div-float ##div-float
##min-float ##min-float
##max-float ; ##max-float
##add-vector
##sub-vector
##mul-vector
##div-vector
##min-vector
##max-vector ;
GENERIC: convert-two-operand* ( insn -- ) GENERIC: convert-two-operand* ( insn -- )

View File

@ -144,8 +144,11 @@ CODEGEN: ##min %min
CODEGEN: ##max %max CODEGEN: ##max %max
CODEGEN: ##not %not CODEGEN: ##not %not
CODEGEN: ##log2 %log2 CODEGEN: ##log2 %log2
CODEGEN: ##copy %copy
CODEGEN: ##integer>bignum %integer>bignum CODEGEN: ##integer>bignum %integer>bignum
CODEGEN: ##bignum>integer %bignum>integer CODEGEN: ##bignum>integer %bignum>integer
CODEGEN: ##unbox-float %unbox-float
CODEGEN: ##box-float %box-float
CODEGEN: ##add-float %add-float CODEGEN: ##add-float %add-float
CODEGEN: ##sub-float %sub-float CODEGEN: ##sub-float %sub-float
CODEGEN: ##mul-float %mul-float CODEGEN: ##mul-float %mul-float
@ -155,12 +158,24 @@ CODEGEN: ##max-float %max-float
CODEGEN: ##sqrt %sqrt CODEGEN: ##sqrt %sqrt
CODEGEN: ##unary-float-function %unary-float-function CODEGEN: ##unary-float-function %unary-float-function
CODEGEN: ##binary-float-function %binary-float-function CODEGEN: ##binary-float-function %binary-float-function
CODEGEN: ##single>double-float %single>double-float
CODEGEN: ##double>single-float %double>single-float
CODEGEN: ##integer>float %integer>float CODEGEN: ##integer>float %integer>float
CODEGEN: ##float>integer %float>integer CODEGEN: ##float>integer %float>integer
CODEGEN: ##copy %copy CODEGEN: ##unbox-vector %unbox-vector
CODEGEN: ##unbox-float %unbox-float CODEGEN: ##broadcast-vector %broadcast-vector
CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##unbox-any-c-ptr %unbox-any-c-ptr CODEGEN: ##unbox-any-c-ptr %unbox-any-c-ptr
CODEGEN: ##box-float %box-float
CODEGEN: ##box-alien %box-alien CODEGEN: ##box-alien %box-alien
CODEGEN: ##box-displaced-alien %box-displaced-alien CODEGEN: ##box-displaced-alien %box-displaced-alien
CODEGEN: ##alien-unsigned-1 %alien-unsigned-1 CODEGEN: ##alien-unsigned-1 %alien-unsigned-1
@ -172,12 +187,14 @@ CODEGEN: ##alien-signed-4 %alien-signed-4
CODEGEN: ##alien-cell %alien-cell CODEGEN: ##alien-cell %alien-cell
CODEGEN: ##alien-float %alien-float CODEGEN: ##alien-float %alien-float
CODEGEN: ##alien-double %alien-double CODEGEN: ##alien-double %alien-double
CODEGEN: ##alien-vector %alien-vector
CODEGEN: ##set-alien-integer-1 %set-alien-integer-1 CODEGEN: ##set-alien-integer-1 %set-alien-integer-1
CODEGEN: ##set-alien-integer-2 %set-alien-integer-2 CODEGEN: ##set-alien-integer-2 %set-alien-integer-2
CODEGEN: ##set-alien-integer-4 %set-alien-integer-4 CODEGEN: ##set-alien-integer-4 %set-alien-integer-4
CODEGEN: ##set-alien-cell %set-alien-cell CODEGEN: ##set-alien-cell %set-alien-cell
CODEGEN: ##set-alien-float %set-alien-float CODEGEN: ##set-alien-float %set-alien-float
CODEGEN: ##set-alien-double %set-alien-double CODEGEN: ##set-alien-double %set-alien-double
CODEGEN: ##set-alien-vector %set-alien-vector
CODEGEN: ##allot %allot CODEGEN: ##allot %allot
CODEGEN: ##write-barrier %write-barrier CODEGEN: ##write-barrier %write-barrier
CODEGEN: ##compare %compare CODEGEN: ##compare %compare

View File

@ -413,3 +413,5 @@ cell 4 = [
[ 1 "0.169967142900241" ] [ 1.4 1 [ swap fcos ] compile-call number>string ] unit-test [ 1 "0.169967142900241" ] [ 1.4 1 [ swap fcos ] compile-call number>string ] unit-test
[ "0.169967142900241" "0.9854497299884601" ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test [ "0.169967142900241" "0.9854497299884601" ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
[ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test [ 1 "0.169967142900241" "0.9854497299884601" ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call [ number>string ] bi@ ] unit-test
[ 6.0 ] [ 1.0 [ >float 3.0 + [ B{ 0 0 0 0 } 0 set-alien-float ] [ 2.0 + ] bi ] compile-call ] unit-test

View File

@ -0,0 +1,43 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd math.vectors.simd.private
math.vectors.simd.alien ;
IN: compiler.tree.propagation.simd
\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
\ (simd-sum) [
nip dup literal?>> [
literal>> scalar-rep-of {
{ single-float-rep [ float ] }
{ double-float-rep [ float ] }
} case
] [ drop real ] if
<class-info>
] "outputs" set-word-prop
\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
\ alien-vector { byte-array } "default-output-classes" set-word-prop

View File

@ -20,7 +20,33 @@ SINGLETONS: tagged-rep int-rep ;
! one of these representations ! one of these representations
SINGLETONS: single-float-rep double-float-rep ; SINGLETONS: single-float-rep double-float-rep ;
UNION: representation any-rep tagged-rep int-rep single-float-rep double-float-rep ; SINGLETONS:
4float-array-rep
2double-array-rep
16char-array-rep
16uchar-array-rep
8short-array-rep
8ushort-array-rep
4int-array-rep
4uint-array-rep ;
UNION: vector-rep
4float-array-rep
2double-array-rep
16char-array-rep
16uchar-array-rep
8short-array-rep
8ushort-array-rep
4int-array-rep
4uint-array-rep ;
UNION: representation
any-rep
tagged-rep
int-rep
single-float-rep
double-float-rep
vector-rep ;
! Register classes ! Register classes
SINGLETONS: int-regs float-regs ; SINGLETONS: int-regs float-regs ;
@ -31,23 +57,28 @@ CONSTANT: reg-classes { int-regs float-regs }
! A pseudo-register class for parameters spilled on the stack ! A pseudo-register class for parameters spilled on the stack
SINGLETON: stack-params SINGLETON: stack-params
: reg-class-of ( rep -- reg-class ) GENERIC: reg-class-of ( rep -- reg-class )
{
{ tagged-rep [ int-regs ] }
{ int-rep [ int-regs ] }
{ single-float-rep [ float-regs ] }
{ double-float-rep [ float-regs ] }
{ stack-params [ stack-params ] }
} case ;
: rep-size ( rep -- n ) M: tagged-rep reg-class-of drop int-regs ;
{ M: int-rep reg-class-of drop int-regs ;
{ tagged-rep [ cell ] } M: single-float-rep reg-class-of drop float-regs ;
{ int-rep [ cell ] } M: double-float-rep reg-class-of drop float-regs ;
{ single-float-rep [ 4 ] } M: vector-rep reg-class-of drop float-regs ;
{ double-float-rep [ 8 ] } M: stack-params reg-class-of drop stack-params ;
{ stack-params [ cell ] }
} case ; GENERIC: rep-size ( rep -- n )
M: tagged-rep rep-size drop cell ;
M: int-rep rep-size drop cell ;
M: single-float-rep rep-size drop 4 ;
M: double-float-rep rep-size drop 8 ;
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
GENERIC: scalar-rep-of ( rep -- rep' )
M: 4float-array-rep scalar-rep-of drop single-float-rep ;
M: 2double-array-rep scalar-rep-of drop double-float-rep ;
! Mapping from register class to machine registers ! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc ) HOOK: machine-registers cpu ( -- assoc )
@ -101,6 +132,8 @@ HOOK: %max cpu ( dst src1 src2 -- )
HOOK: %not cpu ( dst src -- ) HOOK: %not cpu ( dst src -- )
HOOK: %log2 cpu ( dst src -- ) HOOK: %log2 cpu ( dst src -- )
HOOK: %copy cpu ( dst src rep -- )
HOOK: %fixnum-add cpu ( label dst src1 src2 -- ) HOOK: %fixnum-add cpu ( label dst src1 src2 -- )
HOOK: %fixnum-sub cpu ( label dst src1 src2 -- ) HOOK: %fixnum-sub cpu ( label dst src1 src2 -- )
HOOK: %fixnum-mul cpu ( label dst src1 src2 -- ) HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
@ -108,6 +141,9 @@ HOOK: %fixnum-mul cpu ( label dst src1 src2 -- )
HOOK: %integer>bignum cpu ( dst src temp -- ) HOOK: %integer>bignum cpu ( dst src temp -- )
HOOK: %bignum>integer cpu ( dst src temp -- ) HOOK: %bignum>integer cpu ( dst src temp -- )
HOOK: %unbox-float cpu ( dst src -- )
HOOK: %box-float cpu ( dst src temp -- )
HOOK: %add-float cpu ( dst src1 src2 -- ) HOOK: %add-float cpu ( dst src1 src2 -- )
HOOK: %sub-float cpu ( dst src1 src2 -- ) HOOK: %sub-float cpu ( dst src1 src2 -- )
HOOK: %mul-float cpu ( dst src1 src2 -- ) HOOK: %mul-float cpu ( dst src1 src2 -- )
@ -118,13 +154,29 @@ HOOK: %sqrt cpu ( dst src -- )
HOOK: %unary-float-function cpu ( dst src func -- ) HOOK: %unary-float-function cpu ( dst src func -- )
HOOK: %binary-float-function cpu ( dst src1 src2 func -- ) HOOK: %binary-float-function cpu ( dst src1 src2 func -- )
HOOK: %single>double-float cpu ( dst src -- )
HOOK: %double>single-float cpu ( dst src -- )
HOOK: %integer>float cpu ( dst src -- ) HOOK: %integer>float cpu ( dst src -- )
HOOK: %float>integer cpu ( dst src -- ) HOOK: %float>integer cpu ( dst src -- )
HOOK: %copy cpu ( dst src rep -- ) HOOK: %box-vector cpu ( dst src temp rep -- )
HOOK: %unbox-float cpu ( dst src -- ) HOOK: %unbox-vector cpu ( dst src rep -- )
HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
HOOK: %box-float cpu ( dst src temp -- )
HOOK: %box-alien cpu ( dst src temp -- ) HOOK: %box-alien cpu ( dst src temp -- )
HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- ) HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- )
@ -137,6 +189,7 @@ HOOK: %alien-signed-4 cpu ( dst src -- )
HOOK: %alien-cell cpu ( dst src -- ) HOOK: %alien-cell cpu ( dst src -- )
HOOK: %alien-float cpu ( dst src -- ) HOOK: %alien-float cpu ( dst src -- )
HOOK: %alien-double cpu ( dst src -- ) HOOK: %alien-double cpu ( dst src -- )
HOOK: %alien-vector cpu ( dst src rep -- )
HOOK: %set-alien-integer-1 cpu ( ptr value -- ) HOOK: %set-alien-integer-1 cpu ( ptr value -- )
HOOK: %set-alien-integer-2 cpu ( ptr value -- ) HOOK: %set-alien-integer-2 cpu ( ptr value -- )
@ -144,6 +197,7 @@ HOOK: %set-alien-integer-4 cpu ( ptr value -- )
HOOK: %set-alien-cell cpu ( ptr value -- ) HOOK: %set-alien-cell cpu ( ptr value -- )
HOOK: %set-alien-float cpu ( ptr value -- ) HOOK: %set-alien-float cpu ( ptr value -- )
HOOK: %set-alien-double cpu ( ptr value -- ) HOOK: %set-alien-double cpu ( ptr value -- )
HOOK: %set-alien-vector cpu ( ptr value rep -- )
HOOK: %alien-global cpu ( dst symbol library -- ) HOOK: %alien-global cpu ( dst symbol library -- )

View File

@ -4,7 +4,7 @@ USING: accessors assocs alien alien.c-types arrays strings
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
cpu.architecture kernel kernel.private math memory namespaces make cpu.architecture kernel kernel.private math memory namespaces make
sequences words system layouts combinators math.order fry locals sequences words system layouts combinators math.order fry locals
compiler.constants compiler.constants byte-arrays
compiler.cfg.registers compiler.cfg.registers
compiler.cfg.instructions compiler.cfg.instructions
compiler.cfg.intrinsics compiler.cfg.intrinsics
@ -130,6 +130,21 @@ M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
M: x86 %not drop NOT ; M: x86 %not drop NOT ;
M: x86 %log2 BSR ; M: x86 %log2 BSR ;
GENERIC: copy-register* ( dst src rep -- )
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
M: single-float-rep copy-register* drop MOVSS ;
M: double-float-rep copy-register* drop MOVSD ;
M: 4float-array-rep copy-register* drop MOVUPS ;
M: 2double-array-rep copy-register* drop MOVUPD ;
M: vector-rep copy-register* drop MOVDQU ;
: copy-register ( dst src rep -- )
2over eq? [ 3drop ] [ copy-register* ] if ;
M: x86 %copy ( dst src rep -- ) copy-register ;
:: overflow-template ( label dst src1 src2 insn -- ) :: overflow-template ( label dst src1 src2 insn -- )
src1 src2 insn call src1 src2 insn call
label JO ; inline label JO ; inline
@ -211,24 +226,120 @@ M: x86 %min-float nip MINSD ;
M: x86 %max-float nip MAXSD ; M: x86 %max-float nip MAXSD ;
M: x86 %sqrt SQRTSD ; M: x86 %sqrt SQRTSD ;
M: x86 %single>double-float CVTSS2SD ;
M: x86 %double>single-float CVTSD2SS ;
M: x86 %integer>float CVTSI2SD ; M: x86 %integer>float CVTSI2SD ;
M: x86 %float>integer CVTTSD2SI ; M: x86 %float>integer CVTTSD2SI ;
GENERIC: copy-register* ( dst src rep -- )
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
M: single-float-rep copy-register* drop MOVSS ;
M: double-float-rep copy-register* drop MOVSD ;
: copy-register ( dst src rep -- )
2over eq? [ 3drop ] [ copy-register* ] if ;
M: x86 %copy ( dst src rep -- ) copy-register ;
M: x86 %unbox-float ( dst src -- ) M: x86 %unbox-float ( dst src -- )
float-offset [+] MOVSD ; float-offset [+] MOVSD ;
M:: x86 %box-float ( dst src temp -- )
dst 16 float temp %allot
dst float-offset [+] src MOVSD ;
M:: x86 %box-vector ( dst src rep temp -- )
dst rep rep-size 2 cells + byte-array temp %allot
16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
dst byte-array-offset [+]
src rep copy-register ;
M:: x86 %unbox-vector ( dst src rep -- )
dst src byte-array-offset [+]
rep copy-register ;
M: x86 %broadcast-vector ( dst src rep -- )
{
{ 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
{ 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
} case ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
4float-array-rep
[
dst src1 MOVSS
dst src2 UNPCKLPS
src3 src4 UNPCKLPS
dst src3 HEX: 44 SHUFPS
]
}
} case ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
2double-array-rep
[
dst src1 MOVAPD
dst src2 0 SHUFPD
]
}
} case ;
M: x86 %add-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ ADDPS ] }
{ 2double-array-rep [ ADDPD ] }
{ 16char-array-rep [ PADDB ] }
{ 16uchar-array-rep [ PADDB ] }
{ 8short-array-rep [ PADDW ] }
{ 8ushort-array-rep [ PADDW ] }
{ 4int-array-rep [ PADDD ] }
{ 4uint-array-rep [ PADDD ] }
} case drop ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ SUBPS ] }
{ 2double-array-rep [ SUBPD ] }
{ 16char-array-rep [ PSUBB ] }
{ 16uchar-array-rep [ PSUBB ] }
{ 8short-array-rep [ PSUBW ] }
{ 8ushort-array-rep [ PSUBW ] }
{ 4int-array-rep [ PSUBD ] }
{ 4uint-array-rep [ PSUBD ] }
} case drop ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MULPS ] }
{ 2double-array-rep [ MULPD ] }
{ 4int-array-rep [ PMULLW ] }
} case drop ;
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ DIVPS ] }
{ 2double-array-rep [ DIVPD ] }
} case drop ;
M: x86 %min-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MINPS ] }
{ 2double-array-rep [ MINPD ] }
} case drop ;
M: x86 %max-vector ( dst src1 src2 rep -- )
{
{ 4float-array-rep [ MAXPS ] }
{ 2double-array-rep [ MAXPD ] }
} case drop ;
M: x86 %sqrt-vector ( dst src rep -- )
{
{ 4float-array-rep [ SQRTPS ] }
{ 2double-array-rep [ SQRTPD ] }
} case ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
M:: x86 %unbox-any-c-ptr ( dst src temp -- ) M:: x86 %unbox-any-c-ptr ( dst src temp -- )
[ [
{ "is-byte-array" "end" "start" } [ define-label ] each { "is-byte-array" "end" "start" } [ define-label ] each
@ -255,10 +366,6 @@ M:: x86 %unbox-any-c-ptr ( dst src temp -- )
"end" resolve-label "end" resolve-label
] with-scope ; ] with-scope ;
M:: x86 %box-float ( dst src temp -- )
dst 16 float temp %allot
dst float-offset [+] src MOVSD ;
: alien@ ( reg n -- op ) cells alien tag-number - [+] ; : alien@ ( reg n -- op ) cells alien tag-number - [+] ;
:: %allot-alien ( dst displacement base temp -- ) :: %allot-alien ( dst displacement base temp -- )
@ -405,8 +512,9 @@ M: x86 %alien-signed-2 16 %alien-signed-getter ;
M: x86 %alien-signed-4 32 %alien-signed-getter ; M: x86 %alien-signed-4 32 %alien-signed-getter ;
M: x86 %alien-cell [] MOV ; M: x86 %alien-cell [] MOV ;
M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ; M: x86 %alien-float [] MOVSS ;
M: x86 %alien-double [] MOVSD ; M: x86 %alien-double [] MOVSD ;
M: x86 %alien-vector [ [] ] dip copy-register ;
:: %alien-integer-setter ( ptr value size -- ) :: %alien-integer-setter ( ptr value size -- )
value { ptr } size [| new-value | value { ptr } size [| new-value |
@ -418,8 +526,9 @@ M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
M: x86 %set-alien-integer-2 16 %alien-integer-setter ; M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
M: x86 %set-alien-integer-4 32 %alien-integer-setter ; M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
M: x86 %set-alien-cell [ [] ] dip MOV ; M: x86 %set-alien-cell [ [] ] dip MOV ;
M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ; M: x86 %set-alien-float [ [] ] dip MOVSS ;
M: x86 %set-alien-double [ [] ] dip MOVSD ; M: x86 %set-alien-double [ [] ] dip MOVSD ;
M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
: shift-count? ( reg -- ? ) { ECX RCX } memq? ; : shift-count? ( reg -- ? ) { ECX RCX } memq? ;

View File

@ -0,0 +1,56 @@
IN: math.vectors.simd.alien.tests
USING: cpu.architecture math.vectors.simd accessors
math.vectors.simd.alien kernel classes.struct tools.test
compiler sequences byte-arrays alien math kernel.private
specialized-arrays.float ;
! Vector alien intrinsics
[ 4float-array{ 1 2 3 4 } ] [
[
4float-array{ 1 2 3 4 }
underlying>> 0 4float-array-rep alien-vector
] compile-call 4float-array boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
16 [ 1 ] B{ } replicate-as 16 <byte-array>
[
0 [
{ byte-array c-ptr fixnum } declare
4float-array-rep set-alien-vector
] compile-call
] keep
] unit-test
[ float-array{ 1 2 3 4 } ] [
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
[ underlying>> 0 4float-array-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
{ x 4float-array }
{ y 2double-array }
{ z 4double-array } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
simd-struct <struct>
4float-array{ 1 2 3 4 } >>x
2double-array{ 2 1 } >>y
4double-array{ 4 3 2 1 } >>z
[ x>> ] [ y>> ] [ z>> ] tri
] unit-test
[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
[
simd-struct <struct>
4float-array{ 1 2 3 4 } >>x
2double-array{ 2 1 } >>y
4double-array{ 4 3 2 1 } >>z
[ x>> ] [ y>> ] [ z>> ] tri
] compile-call
] unit-test

View File

@ -0,0 +1,51 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien accessors alien.c-types byte-arrays compiler.units
cpu.architecture libc locals kernel math math.vectors.simd
math.vectors.simd.private ;
IN: math.vectors.simd.alien
: alien-vector ( c-ptr n rep -- value )
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
: set-alien-vector ( value c-ptr n rep -- )
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
:: define-simd-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
[ class boa ] >>boxer-quot
[ underlying>> ] >>unboxer-quot
class name>> typedef ;
: define-4double-array-type ( -- )
<c-type>
4double-array >>class
4double-array >>boxed-class
[
[ 2double-array-rep alien-vector ]
[ 16 + >fixnum 2double-array-rep alien-vector ] 2bi
4double-array boa
] >>getter
[
[ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
2double-array-rep >>rep
"4double-array" typedef ;
[
4float-array 4float-array-rep define-simd-type
2double-array 2double-array-rep define-simd-type
define-4double-array-type
] with-compilation-unit

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,61 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays classes functors
kernel math parser prettyprint.custom sequences
sequences.private ;
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
FUNCTOR: define-simd-type ( T N -- )
A DEFINES-CLASS ${N}${T}-array
<A> DEFINES <${A}>
(A) DEFINES (${A})
>A DEFINES >${A}
A{ DEFINES ${A}{
NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ]
BYTES [ T heap-size N * ]
INITIAL [ BYTES <byte-array> ]
WHERE
TUPLE: A
{ underlying byte-array read-only initial: INITIAL } ;
: <A> ( -- simd-array ) BYTES <byte-array> A boa ; inline
: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline
M: A clone underlying>> clone \ A boa ; inline
M: A length drop N ; inline
M: A nth-unsafe underlying>> NTH call ; inline
M: A set-nth-unsafe underlying>> SET-NTH call ; inline
: >A ( seq -- simd-array ) \ A new clone-like ;
M: A like drop dup \ A instance? [ >A ] unless ; inline
M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline
M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length underlying>> length ; inline
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
INSTANCE: A sequence
;FUNCTOR

View File

@ -0,0 +1,301 @@
IN: math.vectors.simd.tests
USING: math math.vectors.simd math.vectors.simd.private
math.vectors math.functions kernel.private compiler sequences
tools.test compiler.tree.debugger accessors kernel ;
[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test
[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test
[ 4float-array{ 12 12 12 12 } ] [
12 [ 4float-array-with ] compile-call
] unit-test
[ 4float-array{ 1 2 3 4 } ] [
1 2 3 4 [ 4float-array-boa ] compile-call
] unit-test
[ 4float-array{ 11 22 33 44 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v+ ] compile-call
] unit-test
[ 4float-array{ -9 -18 -27 -36 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v- ] compile-call
] unit-test
[ 4float-array{ 10 40 90 160 } ] [
4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v* ] compile-call
] unit-test
[ 4float-array{ 10 100 1000 10000 } ] [
4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 }
[ { 4float-array 4float-array } declare v/ ] compile-call
] unit-test
[ 4float-array{ -10 -20 -30 -40 } ] [
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
[ { 4float-array 4float-array } declare vmin ] compile-call
] unit-test
[ 4float-array{ 10 20 30 40 } ] [
4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
[ { 4float-array 4float-array } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare sum ] compile-call
] unit-test
[ 13.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 }
[ { 4float-array 4float-array } declare v. ] compile-call
] unit-test
[ 4float-array{ 5 10 15 20 } ] [
5.0 4float-array{ 1 2 3 4 }
[ { float 4float-array } declare n*v ] compile-call
] unit-test
[ 4float-array{ 5 10 15 20 } ] [
4float-array{ 1 2 3 4 } 5.0
[ { float 4float-array } declare v*n ] compile-call
] unit-test
[ 4float-array{ 10 5 2 5 } ] [
10.0 4float-array{ 1 2 5 2 }
[ { float 4float-array } declare n/v ] compile-call
] unit-test
[ 4float-array{ 0.5 1 1.5 2 } ] [
4float-array{ 1 2 3 4 } 2
[ { float 4float-array } declare v/n ] compile-call
] unit-test
[ 4float-array{ 1 0 0 0 } ] [
4float-array{ 10 0 0 0 }
[ { 4float-array } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
4float-array{ 1 2 3 4 }
[ { 4float-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
4float-array{ 1 0 0 0 }
4float-array{ 0 1 0 0 }
[ { 4float-array 4float-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ 2double-array{ 12 12 } ] [
12 [ 2double-array-with ] compile-call
] unit-test
[ 2double-array{ 1 2 } ] [
1 2 [ 2double-array-boa ] compile-call
] unit-test
[ 2double-array{ 11 22 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v+ ] compile-call
] unit-test
[ 2double-array{ -9 -18 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v- ] compile-call
] unit-test
[ 2double-array{ 10 40 } ] [
2double-array{ 1 2 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v* ] compile-call
] unit-test
[ 2double-array{ 10 100 } ] [
2double-array{ 100 2000 } 2double-array{ 10 20 }
[ { 2double-array 2double-array } declare v/ ] compile-call
] unit-test
[ 2double-array{ -10 -20 } ] [
2double-array{ -10 20 } 2double-array{ 10 -20 }
[ { 2double-array 2double-array } declare vmin ] compile-call
] unit-test
[ 2double-array{ 10 20 } ] [
2double-array{ -10 20 } 2double-array{ 10 -20 }
[ { 2double-array 2double-array } declare vmax ] compile-call
] unit-test
[ 3.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare sum ] compile-call
] unit-test
[ 7.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare sum 4.0 + ] compile-call
] unit-test
[ 16.0 ] [
2double-array{ 1 2 } 2double-array{ 2 7 }
[ { 2double-array 2double-array } declare v. ] compile-call
] unit-test
[ 2double-array{ 5 10 } ] [
5.0 2double-array{ 1 2 }
[ { float 2double-array } declare n*v ] compile-call
] unit-test
[ 2double-array{ 5 10 } ] [
2double-array{ 1 2 } 5.0
[ { float 2double-array } declare v*n ] compile-call
] unit-test
[ 2double-array{ 10 5 } ] [
10.0 2double-array{ 1 2 }
[ { float 2double-array } declare n/v ] compile-call
] unit-test
[ 2double-array{ 0.5 1 } ] [
2double-array{ 1 2 } 2
[ { float 2double-array } declare v/n ] compile-call
] unit-test
[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test
[ 2double-array{ 1 0 } ] [
2double-array{ 10 0 }
[ { 2double-array } declare normalize ] compile-call
] unit-test
[ 5.0 ] [
2double-array{ 1 2 }
[ { 2double-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
2double-array{ 1 0 }
2double-array{ 0 1 }
[ { 2double-array 2double-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test
[ 4double-array{ 1 2 3 4 } ] [
1 2 3 4 4double-array-boa
] unit-test
[ 4double-array{ 1 1 1 1 } ] [
1 4double-array-with
] unit-test
[ 4double-array{ 0 1 2 3 } ] [
1 4double-array-with [ * ] map-index
] unit-test
[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test
[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test
[ 4double-array{ 12 12 12 12 } ] [
12 [ 4double-array-with ] compile-call
] unit-test
[ 4double-array{ 1 2 3 4 } ] [
1 2 3 4 [ 4double-array-boa ] compile-call
] unit-test
[ 4double-array{ 11 22 33 44 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v+ ] compile-call
] unit-test
[ 4double-array{ -9 -18 -27 -36 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v- ] compile-call
] unit-test
[ 4double-array{ 10 40 90 160 } ] [
4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v* ] compile-call
] unit-test
[ 4double-array{ 10 100 1000 10000 } ] [
4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 }
[ { 4double-array 4double-array } declare v/ ] compile-call
] unit-test
[ 4double-array{ -10 -20 -30 -40 } ] [
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
[ { 4double-array 4double-array } declare vmin ] compile-call
] unit-test
[ 4double-array{ 10 20 30 40 } ] [
4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
[ { 4double-array 4double-array } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare sum ] compile-call
] unit-test
[ 13.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 }
[ { 4double-array 4double-array } declare v. ] compile-call
] unit-test
[ 4double-array{ 5 10 15 20 } ] [
5.0 4double-array{ 1 2 3 4 }
[ { float 4double-array } declare n*v ] compile-call
] unit-test
[ 4double-array{ 5 10 15 20 } ] [
4double-array{ 1 2 3 4 } 5.0
[ { float 4double-array } declare v*n ] compile-call
] unit-test
[ 4double-array{ 10 5 2 5 } ] [
10.0 4double-array{ 1 2 5 2 }
[ { float 4double-array } declare n/v ] compile-call
] unit-test
[ 4double-array{ 0.5 1 1.5 2 } ] [
4double-array{ 1 2 3 4 } 2
[ { float 4double-array } declare v/n ] compile-call
] unit-test
[ 4double-array{ 1 0 0 0 } ] [
4double-array{ 10 0 0 0 }
[ { 4double-array } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
4double-array{ 1 2 3 4 }
[ { 4double-array } declare norm-sq ] compile-call
] unit-test
[ t ] [
4double-array{ 1 0 0 0 }
4double-array{ 0 1 0 0 }
[ { 4double-array 4double-array } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test

View File

@ -0,0 +1,214 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays cpu.architecture
generalizations kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.specialization parser
prettyprint.custom sequences sequences.private
specialized-arrays.double locals assocs literals ;
IN: math.vectors.simd
<PRIVATE
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ;
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
: assert-positive ( x -- y ) ;
PRIVATE>
<<
DEFER: 4float-array
DEFER: 2double-array
"double" 2 define-simd-type
"float" 4 define-simd-type
>>
! Constructors
: 4float-array-with ( x -- simd-array )
>float 4float-array-rep (simd-broadcast) 4float-array boa ; inline
: 4float-array-boa ( a b c d -- simd-array )
[ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline
: 2double-array-with ( x -- simd-array )
>float 2double-array-rep (simd-broadcast) 2double-array boa ; inline
: 2double-array-boa ( a b -- simd-array )
[ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline
<PRIVATE
: 4float-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline
: 4float-array-v->n-op ( v1 quot -- v2 )
[ underlying>> 4float-array-rep ] dip call ; inline
: 2double-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline
: 2double-array-v->n-op ( v1 quot -- v2 )
[ underlying>> 2double-array-rep ] dip call ; inline
PRIVATE>
<<
<PRIVATE
:: simd-vector-words ( class ctor elt-type assoc -- )
class elt-type assoc {
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] }
{ v-n [ ctor execute v- ] }
{ n*v [ [ ctor execute ] dip v* ] }
{ v*n [ ctor execute v* ] }
{ n/v [ [ ctor execute ] dip v/ ] }
{ v/n [ ctor execute v/ ] }
{ norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
{ distance [ v- norm ] }
} assoc-union
specialize-vector-words ;
PRIVATE>
\ 4float-array \ 4float-array-with float H{
{ v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 4float-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 4float-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] 4float-array-v->n-op ] }
} simd-vector-words
\ 2double-array \ 2double-array-with float H{
{ v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 2double-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 2double-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] 2double-array-v->n-op ] }
} simd-vector-words
>>
! Synthesize 256-bit vectors from a pair of 128-bit vectors
! Functorize this later so that we can do it for integers, etc
TUPLE: 4double-array
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
: <4double-array> ( -- simd-array )
16 <byte-array> 16 <byte-array> 4double-array boa ; inline
: (4double-array) ( -- simd-array )
16 (byte-array) 16 (byte-array) 4double-array boa ; inline
M: 4double-array clone
[ underlying1>> clone ] [ underlying2>> clone ] bi
4double-array boa ; inline
M: 4double-array length drop 4 ; inline
<PRIVATE
: 4double-array-deref ( n seq -- n' seq' )
over 2 < [ underlying1>> ] [ [ 2 - ] dip underlying2>> ] if
2 swap double-array boa ; inline
PRIVATE>
M: 4double-array nth-unsafe
4double-array-deref nth-unsafe ; inline
M: 4double-array set-nth-unsafe
4double-array-deref set-nth-unsafe ; inline
: >4double-array ( seq -- simd-array )
4double-array new clone-like ;
M: 4double-array like
drop dup 4double-array? [ >4double-array ] unless ; inline
M: 4double-array new-sequence
drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline
M: 4double-array equal?
over 4double-array? [ sequence= ] [ 2drop f ] if ;
M: 4double-array byte-length drop 32 ; inline
SYNTAX: 4double-array{
\ } [ >4double-array ] parse-literal ;
M: 4double-array pprint-delims
drop \ 4double-array{ \ } ;
M: 4double-array >pprint-sequence ;
M: 4double-array pprint* pprint-object ;
INSTANCE: 4double-array sequence
: 4double-array-with ( x -- simd-array )
dup [ >float 2double-array-rep (simd-broadcast) ] bi@
4double-array boa ; inline
: 4double-array-boa ( a b c d -- simd-array )
[ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@
4double-array boa ; inline
! SIMD operations on 4double-arrays
<PRIVATE
: 4double-array-vv->v-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ]
[ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi
4double-array boa ; inline
: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 )
[
[ [ underlying1>> 2double-array-rep ] dip call ]
[ [ underlying2>> 2double-array-rep ] dip call ] 2bi
] dip call ; inline
PRIVATE>
<<
\ 4double-array \ 4double-array-with float H{
{ v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] }
{ v- [ [ (simd-v-) ] 4double-array-vv->v-op ] }
{ v* [ [ (simd-v*) ] 4double-array-vv->v-op ] }
{ v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] }
{ vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] }
{ vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] }
{ sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] }
} simd-vector-words
>>
USE: vocabs.loader
"math.vectors.simd.alien" require

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: words kernel make sequences effects kernel.private accessors USING: words kernel make sequences effects kernel.private accessors
combinators math math.intervals math.vectors namespaces assocs fry combinators math math.intervals math.vectors namespaces assocs fry
splitting classes.algebra generalizations splitting classes.algebra generalizations locals
compiler.tree.propagation.info ; compiler.tree.propagation.info ;
IN: math.vectors.specialization IN: math.vectors.specialization
@ -67,6 +67,7 @@ H{
{ vmin { +vector+ +vector+ -> +vector+ } } { vmin { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } } { vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } } { vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
} }
SYMBOL: specializations SYMBOL: specializations
@ -82,19 +83,23 @@ specializations [ vector-words keys [ V{ } clone ] H{ } map>assoc ] initialize
: outputs ( schema -- seq ) { -> } split second ; : outputs ( schema -- seq ) { -> } split second ;
: specialize-vector-word ( word array-type elt-type -- word' ) : loop-vector-op ( word array-type elt-type -- word' )
pick word-schema pick word-schema
[ inputs (specialize-vector-word) ] [ inputs (specialize-vector-word) ]
[ outputs record-output-signature ] 3bi ; [ outputs record-output-signature ] 3bi ;
: input-signature ( word -- signature ) def>> first ; :: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
: specialize-vector-words ( array-type elt-type -- ) :: input-signature ( word array-type elt-type -- signature )
[ vector-words keys ] 2dip array-type elt-type word word-schema inputs signature-for-schema ;
'[
[ _ _ specialize-vector-word ] keep :: specialize-vector-words ( array-type elt-type simd -- )
[ dup input-signature ] dip vector-words keys [
add-specialization [ array-type elt-type simd specialize-vector-word ]
[ array-type elt-type input-signature ]
[ ]
tri add-specialization
] each ; ] each ;
: find-specialization ( classes word -- word/f ) : find-specialization ( classes word -- word/f )