Merge branch 'master' of git://factorcode.org/git/factor
commit
0a48b946b6
|
@ -538,6 +538,9 @@ M: ulonglong-2-rep rep-component-type drop ulonglong ;
|
|||
M: float-4-rep rep-component-type drop float ;
|
||||
M: double-2-rep rep-component-type drop double ;
|
||||
|
||||
: rep-length ( rep -- n )
|
||||
16 swap rep-component-type heap-size /i ; foldable
|
||||
|
||||
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
|
||||
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
|
||||
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
|
||||
|
@ -550,4 +553,6 @@ M: double-2-rep rep-component-type drop double ;
|
|||
{ [ dup { uchar ushort uint ulong ulonglong } member-eq? ] [ unsigned-interval ] }
|
||||
} cond ; foldable
|
||||
|
||||
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
|
||||
: c-type-clamp ( value c-type -- value' )
|
||||
dup { float double } member-eq?
|
||||
[ drop ] [ c-type-interval clamp ] if ; inline
|
||||
|
|
|
@ -189,9 +189,6 @@ M: struct-c-type c-struct? drop t ;
|
|||
\ cleave [ ] 2sequence
|
||||
\ output>array [ ] 2sequence ;
|
||||
|
||||
: define-inline-method ( class generic quot -- )
|
||||
[ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
|
||||
|
||||
: (define-struct-slot-values-method) ( class -- )
|
||||
[ \ struct-slot-values ] [ struct-slot-values-quot ] bi
|
||||
define-inline-method ;
|
||||
|
|
|
@ -45,6 +45,12 @@ SYMBOL: loops
|
|||
end-stack-analysis
|
||||
] with-scope ; inline
|
||||
|
||||
: with-dummy-cfg-builder ( node quot -- )
|
||||
[
|
||||
[ V{ } clone procedures ] 2dip
|
||||
'[ _ t t [ _ call( node -- ) ] with-cfg-builder ] with-variable
|
||||
] { } make drop ;
|
||||
|
||||
GENERIC: emit-node ( node -- )
|
||||
|
||||
: emit-nodes ( nodes -- )
|
||||
|
|
|
@ -408,13 +408,13 @@ use: src1 src2
|
|||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-add-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-sub-vector
|
||||
def: dst/scalar-rep
|
||||
use: src
|
||||
def: dst
|
||||
use: src1 src2
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##horizontal-shl-vector-imm
|
||||
|
|
|
@ -7,7 +7,6 @@ compiler.cfg.intrinsics.alien
|
|||
compiler.cfg.intrinsics.allot
|
||||
compiler.cfg.intrinsics.fixnum
|
||||
compiler.cfg.intrinsics.float
|
||||
compiler.cfg.intrinsics.simd
|
||||
compiler.cfg.intrinsics.slots
|
||||
compiler.cfg.intrinsics.misc
|
||||
compiler.cfg.comparisons ;
|
||||
|
@ -23,7 +22,6 @@ QUALIFIED: classes.tuple.private
|
|||
QUALIFIED: math.private
|
||||
QUALIFIED: math.integers.private
|
||||
QUALIFIED: math.floats.private
|
||||
QUALIFIED: math.vectors.simd.intrinsics
|
||||
QUALIFIED: math.libm
|
||||
IN: compiler.cfg.intrinsics
|
||||
|
||||
|
@ -152,64 +150,5 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-simd ( -- )
|
||||
{
|
||||
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: emit-intrinsic ( node word -- )
|
||||
"intrinsic" word-prop call( node -- ) ;
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
! (c)2009 Joe Groff bsd license
|
||||
USING: accessors arrays assocs classes combinators
|
||||
combinators.short-circuit compiler.cfg.builder.blocks
|
||||
compiler.cfg.registers compiler.cfg.stacks
|
||||
compiler.cfg.stacks.local compiler.tree.propagation.info
|
||||
cpu.architecture effects fry generalizations
|
||||
kernel locals macros math namespaces quotations sequences
|
||||
splitting stack-checker words ;
|
||||
IN: compiler.cfg.intrinsics.simd.backend
|
||||
|
||||
! Selection of implementation based on available CPU instructions
|
||||
|
||||
: can-has? ( quot -- ? )
|
||||
[ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
|
||||
|
||||
: can-has-rep? ( rep reps -- )
|
||||
member? \ can-has? [ and ] change ; inline
|
||||
|
||||
GENERIC: create-can-has ( word -- word' )
|
||||
|
||||
PREDICATE: hat-word < word
|
||||
{
|
||||
[ name>> { [ "^" head? ] [ "##" head? ] } 1|| ]
|
||||
[ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
|
||||
} 1&& ;
|
||||
|
||||
PREDICATE: vector-op-word < hat-word
|
||||
name>> "-vector" swap subseq? ;
|
||||
|
||||
: reps-word ( word -- word' )
|
||||
name>> "^^" ?head drop "##" ?head drop
|
||||
"%" "-reps" surround "cpu.architecture" lookup ;
|
||||
|
||||
SYMBOL: blub
|
||||
|
||||
:: can-has-^^-quot ( word def effect -- quot )
|
||||
effect in>> { "rep" } split1 [ length ] bi@ 1 +
|
||||
word reps-word 1quotation
|
||||
effect out>> length blub <array> >quotation
|
||||
'[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ;
|
||||
|
||||
:: can-has-^-quot ( word def effect -- quot )
|
||||
def create-can-has first ;
|
||||
|
||||
: map-concat-like ( seq quot -- seq' )
|
||||
'[ _ map ] [ concat-as ] bi ; inline
|
||||
|
||||
M: object create-can-has 1quotation ;
|
||||
|
||||
M: array create-can-has
|
||||
[ create-can-has ] map-concat-like 1quotation ;
|
||||
M: callable create-can-has
|
||||
[ create-can-has ] map-concat-like 1quotation ;
|
||||
|
||||
: (can-has-word) ( word -- word' )
|
||||
name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
|
||||
|
||||
: (can-has-quot) ( word -- quot )
|
||||
[ ] [ def>> ] [ stack-effect ] tri {
|
||||
{ [ pick name>> "^^" head? ] [ can-has-^^-quot ] }
|
||||
{ [ pick name>> "##" head? ] [ can-has-^^-quot ] }
|
||||
{ [ pick name>> "^" head? ] [ can-has-^-quot ] }
|
||||
} cond ;
|
||||
|
||||
: (can-has-nop-quot) ( word -- quot )
|
||||
stack-effect in>> length '[ _ ndrop blub ] ;
|
||||
|
||||
DEFER: can-has-words
|
||||
|
||||
M: word create-can-has
|
||||
can-has-words ?at drop 1quotation ;
|
||||
|
||||
M: hat-word create-can-has
|
||||
(can-has-nop-quot) ;
|
||||
|
||||
M: vector-op-word create-can-has
|
||||
dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
|
||||
|
||||
GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
|
||||
M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
|
||||
#dup quot create-can-has '[ _ ndup @ can-has? ] quot 2array ;
|
||||
|
||||
M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
|
||||
pair first2 :> ( class quot )
|
||||
#pick class #dup quot create-can-has
|
||||
'[ _ npick _ instance? [ _ ndup @ can-has? ] dip and ]
|
||||
quot 2array ;
|
||||
|
||||
MACRO: v-vector-op ( trials -- )
|
||||
[ 1 2 >can-has-cond ] map '[ _ cond ] ;
|
||||
MACRO: vl-vector-op ( trials -- )
|
||||
[ 1 3 >can-has-cond ] map '[ _ cond ] ;
|
||||
MACRO: vv-vector-op ( trials -- )
|
||||
[ 1 3 >can-has-cond ] map '[ _ cond ] ;
|
||||
MACRO: vv-cc-vector-op ( trials -- )
|
||||
[ 2 4 >can-has-cond ] map '[ _ cond ] ;
|
||||
MACRO: vvvv-vector-op ( trials -- )
|
||||
[ 1 5 >can-has-cond ] map '[ _ cond ] ;
|
||||
|
||||
! Special-case conditional instructions
|
||||
|
||||
: can-has-^(compare-vector) ( src1 src2 rep cc -- dst )
|
||||
[ 2drop ] 2dip %compare-vector-reps member?
|
||||
\ can-has? [ and ] change
|
||||
blub ;
|
||||
|
||||
: can-has-^^test-vector ( src rep vcc -- dst )
|
||||
[ drop ] 2dip drop %test-vector-reps member?
|
||||
\ can-has? [ and ] change
|
||||
blub ;
|
||||
|
||||
MACRO: can-has-case ( cases -- )
|
||||
dup first second infer in>> length 1 +
|
||||
'[ _ ndrop f ] suffix '[ _ case ] ;
|
||||
|
||||
GENERIC# >can-has-trial 1 ( obj #pick -- quot )
|
||||
|
||||
M: callable >can-has-trial
|
||||
drop '[ _ can-has? ] ;
|
||||
M: pair >can-has-trial
|
||||
swap first2 dup infer in>> length
|
||||
'[ _ npick _ instance? [ _ can-has? ] [ _ ndrop blub ] if ] ;
|
||||
|
||||
MACRO: can-has-vector-op ( trials #pick #dup -- )
|
||||
[ '[ _ >can-has-trial ] map ] dip '[ _ _ n|| \ can-has? [ and ] change blub ] ;
|
||||
|
||||
: can-has-v-vector-op ( trials -- ? )
|
||||
1 2 can-has-vector-op ; inline
|
||||
: can-has-vv-vector-op ( trials -- ? )
|
||||
1 3 can-has-vector-op ; inline
|
||||
: can-has-vv-cc-vector-op ( trials -- ? )
|
||||
2 4 can-has-vector-op ; inline
|
||||
: can-has-vvvv-vector-op ( trials -- ? )
|
||||
1 5 can-has-vector-op ; inline
|
||||
|
||||
CONSTANT: can-has-words
|
||||
H{
|
||||
{ case can-has-case }
|
||||
{ v-vector-op can-has-v-vector-op }
|
||||
{ vl-vector-op can-has-vv-vector-op }
|
||||
{ vv-vector-op can-has-vv-vector-op }
|
||||
{ vv-cc-vector-op can-has-vv-cc-vector-op }
|
||||
{ vvvv-vector-op can-has-vvvv-vector-op }
|
||||
}
|
||||
|
||||
! Intrinsic code emission
|
||||
|
||||
MACRO: check-elements ( quots -- )
|
||||
[ length '[ _ firstn ] ]
|
||||
[ '[ _ spread ] ]
|
||||
[ length 1 - \ and <repetition> [ ] like ]
|
||||
tri 3append ;
|
||||
|
||||
ERROR: bad-simd-intrinsic node ;
|
||||
|
||||
MACRO: if-literals-match ( quots -- )
|
||||
[ length ] [ ] [ length ] tri
|
||||
! n quots n
|
||||
'[
|
||||
! node quot
|
||||
[
|
||||
dup node-input-infos
|
||||
_ tail-slice* [ literal>> ] map
|
||||
dup _ check-elements
|
||||
] dip
|
||||
swap [
|
||||
! node literals quot
|
||||
[ _ firstn ] dip call
|
||||
drop
|
||||
] [ 2drop bad-simd-intrinsic ] if
|
||||
] ;
|
||||
|
||||
CONSTANT: [unary] [ ds-drop ds-pop ]
|
||||
CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
|
||||
CONSTANT: [binary] [ ds-drop 2inputs ]
|
||||
CONSTANT: [quaternary]
|
||||
[
|
||||
ds-drop
|
||||
D 3 peek-loc
|
||||
D 2 peek-loc
|
||||
D 1 peek-loc
|
||||
D 0 peek-loc
|
||||
-4 inc-d
|
||||
]
|
||||
|
||||
:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot )
|
||||
params-quot trials op-quot literal-preds
|
||||
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
|
||||
|
||||
MACRO: emit-v-vector-op ( trials -- )
|
||||
[unary] [ v-vector-op ] { [ representation? ] } [emit-vector-op] ;
|
||||
MACRO: emit-vl-vector-op ( trials literal-pred -- )
|
||||
[ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
|
||||
MACRO: emit-vv-vector-op ( trials -- )
|
||||
[binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ;
|
||||
MACRO: emit-vvvv-vector-op ( trials -- )
|
||||
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
|
||||
|
||||
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- )
|
||||
literal-pred imm-trials literal-pred var-trials
|
||||
'[
|
||||
dup node-input-infos 2 tail-slice* first literal>> @
|
||||
[ _ _ emit-vl-vector-op ]
|
||||
[ _ emit-vv-vector-op ] if
|
||||
] ;
|
||||
|
|
@ -0,0 +1,544 @@
|
|||
! (c)2009 Joe Groff bsd license
|
||||
USING: arrays assocs biassocs byte-arrays byte-arrays.hex
|
||||
classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
|
||||
compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
|
||||
compiler.cfg.registers compiler.cfg.stacks.height
|
||||
compiler.cfg.stacks.local compiler.tree compiler.tree.propagation.info
|
||||
cpu.architecture fry hashtables kernel locals make namespaces sequences
|
||||
system tools.test words ;
|
||||
IN: compiler.cfg.intrinsics.simd.tests
|
||||
|
||||
:: test-node ( rep -- node )
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info { class byte-array } } }
|
||||
{ 2 T{ value-info { class byte-array } } }
|
||||
{ 3 T{ value-info { class byte-array } } }
|
||||
{ 4 T{ value-info { class word } { literal? t } { literal rep } } }
|
||||
{ 5 T{ value-info { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
:: test-node-literal ( lit rep -- node )
|
||||
lit class :> lit-class
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info { class byte-array } } }
|
||||
{ 2 T{ value-info { class byte-array } } }
|
||||
{ 3 T{ value-info { class lit-class } { literal? t } { literal lit } } }
|
||||
{ 4 T{ value-info { class word } { literal? t } { literal rep } } }
|
||||
{ 5 T{ value-info { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
: test-node-nonliteral-rep ( -- node )
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info { class byte-array } } }
|
||||
{ 2 T{ value-info { class byte-array } } }
|
||||
{ 3 T{ value-info { class byte-array } } }
|
||||
{ 4 T{ value-info { class object } } }
|
||||
{ 5 T{ value-info { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
: test-compiler-env ( -- x )
|
||||
H{ } clone
|
||||
T{ basic-block { id 0 } }
|
||||
[ \ basic-block pick set-at ]
|
||||
[ 0 swap associate \ ds-heights pick set-at ]
|
||||
[ 0 swap associate \ rs-heights pick set-at ] tri
|
||||
T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at
|
||||
H{ } clone \ local-peek-set pick set-at
|
||||
H{ } clone \ replace-mapping pick set-at
|
||||
H{ } <biassoc> \ locs>vregs pick set-at
|
||||
H{ } clone \ peek-sets pick set-at
|
||||
H{ } clone \ replace-sets pick set-at
|
||||
H{ } clone \ kill-sets pick set-at ;
|
||||
|
||||
: make-classes ( quot -- seq )
|
||||
{ } make [ class ] map ; inline
|
||||
|
||||
: test-emit ( cpu rep quot -- node )
|
||||
[
|
||||
[ new \ cpu ] 2dip '[
|
||||
test-compiler-env [ _ test-node @ ] bind
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
: test-emit-literal ( cpu lit rep quot -- node )
|
||||
[
|
||||
[ new \ cpu ] 3dip '[
|
||||
test-compiler-env [ _ _ test-node-literal @ ] bind
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
: test-emit-nonliteral-rep ( cpu quot -- node )
|
||||
[
|
||||
[ new \ cpu ] dip '[
|
||||
test-compiler-env [ test-node-nonliteral-rep @ ] bind
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
CONSTANT: signed-reps
|
||||
{ char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
|
||||
CONSTANT: all-reps
|
||||
{
|
||||
char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
|
||||
uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
|
||||
}
|
||||
|
||||
TUPLE: scalar-cpu ;
|
||||
|
||||
TUPLE: simple-ops-cpu ;
|
||||
M: simple-ops-cpu %zero-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %fill-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %add-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %sub-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %mul-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %div-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %andn-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %and-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %or-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %xor-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %merge-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %test-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
|
||||
M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
|
||||
M: simple-ops-cpu %alien-vector-reps all-reps ;
|
||||
|
||||
! v+
|
||||
[ { ##add-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v-
|
||||
[ { ##sub-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vneg
|
||||
[ { ##load-constant ##sub-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##sub-vector } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v*
|
||||
[ { ##mul-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v/
|
||||
[ { ##div-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: addsub-cpu < simple-ops-cpu ;
|
||||
M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
|
||||
|
||||
! v+-
|
||||
[ { ##add-sub-vector } ]
|
||||
[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##load-constant ##xor-vector ##add-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: saturating-cpu < simple-ops-cpu ;
|
||||
M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
|
||||
M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
|
||||
M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
|
||||
|
||||
! vs+
|
||||
[ { ##add-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##add-vector } ]
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##saturated-add-vector } ]
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vs-
|
||||
[ { ##sub-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##sub-vector } ]
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##saturated-sub-vector } ]
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vs*
|
||||
[ { ##mul-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##mul-vector } ]
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##saturated-mul-vector } ]
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: minmax-cpu < simple-ops-cpu ;
|
||||
M: minmax-cpu %min-vector-reps signed-reps ;
|
||||
M: minmax-cpu %max-vector-reps signed-reps ;
|
||||
M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
|
||||
M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||
|
||||
TUPLE: compare-cpu < simple-ops-cpu ;
|
||||
M: compare-cpu %compare-vector-reps drop signed-reps ;
|
||||
M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||
|
||||
! vmin
|
||||
[ { ##min-vector } ]
|
||||
[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
|
||||
[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmax
|
||||
[ { ##max-vector } ]
|
||||
[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
|
||||
[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: dot-cpu < simple-ops-cpu ;
|
||||
M: dot-cpu %dot-vector-reps { float-4-rep } ;
|
||||
|
||||
TUPLE: horizontal-cpu < simple-ops-cpu ;
|
||||
M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
|
||||
M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
|
||||
M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
|
||||
|
||||
! v.
|
||||
[ { ##dot-vector } ]
|
||||
[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
|
||||
[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ {
|
||||
##mul-vector
|
||||
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||
##vector>scalar
|
||||
} ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vsqrt
|
||||
[ { ##sqrt-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! sum
|
||||
[ { ##horizontal-add-vector ##vector>scalar } ]
|
||||
[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
|
||||
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ {
|
||||
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||
##horizontal-add-vector
|
||||
##vector>scalar
|
||||
} ]
|
||||
[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ {
|
||||
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||
##horizontal-add-vector ##horizontal-add-vector
|
||||
##vector>scalar
|
||||
} ]
|
||||
[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ {
|
||||
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||
##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
|
||||
##vector>scalar
|
||||
} ]
|
||||
[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: abs-cpu < simple-ops-cpu ;
|
||||
M: abs-cpu %abs-vector-reps signed-reps ;
|
||||
|
||||
! vabs
|
||||
[ { } ]
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##abs-vector } ]
|
||||
[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##load-constant ##andn-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
|
||||
[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vand
|
||||
[ { ##and-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vandn
|
||||
[ { ##andn-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vor
|
||||
[ { ##or-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vxor
|
||||
[ { ##xor-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: not-cpu < simple-ops-cpu ;
|
||||
M: not-cpu %not-vector-reps signed-reps ;
|
||||
|
||||
! vnot
|
||||
[ { ##not-vector } ]
|
||||
[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##fill-vector ##xor-vector } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: shift-cpu < simple-ops-cpu ;
|
||||
M: shift-cpu %shl-vector-reps signed-reps ;
|
||||
M: shift-cpu %shr-vector-reps signed-reps ;
|
||||
|
||||
TUPLE: shift-imm-cpu < simple-ops-cpu ;
|
||||
M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
|
||||
M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
|
||||
|
||||
TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
|
||||
M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
|
||||
M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
|
||||
|
||||
! vlshift
|
||||
[ { ##shl-vector-imm } ]
|
||||
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
[ { ##shl-vector } ]
|
||||
[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vrshift
|
||||
[ { ##shr-vector-imm } ]
|
||||
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
[ { ##shr-vector } ]
|
||||
[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! hlshift
|
||||
[ { ##horizontal-shl-vector-imm } ]
|
||||
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! hrshift
|
||||
[ { ##horizontal-shr-vector-imm } ]
|
||||
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
|
||||
M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
|
||||
|
||||
TUPLE: shuffle-cpu < simple-ops-cpu ;
|
||||
M: shuffle-cpu %shuffle-vector-reps signed-reps ;
|
||||
|
||||
! vshuffle-elements
|
||||
[ { ##load-constant ##shuffle-vector } ]
|
||||
[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
[ { ##shuffle-vector-imm } ]
|
||||
[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! vshuffle-bytes
|
||||
[ { ##shuffle-vector } ]
|
||||
[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmerge-head
|
||||
[ { ##merge-vector-head } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmerge-tail
|
||||
[ { ##merge-vector-tail } ]
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v<= etc.
|
||||
[ { ##compare-vector } ]
|
||||
[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##min-vector ##compare-vector } ]
|
||||
[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##load-constant ##xor-vector ##xor-vector ##compare-vector } ]
|
||||
[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vany? etc.
|
||||
[ { ##test-vector } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: convert-cpu < simple-ops-cpu ;
|
||||
M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
|
||||
M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
|
||||
|
||||
! v>float
|
||||
[ { } ]
|
||||
[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##integer>float-vector } ]
|
||||
[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v>integer
|
||||
[ { } ]
|
||||
[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##float>integer-vector } ]
|
||||
[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vpack-signed
|
||||
[ { ##signed-pack-vector } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vpack-unsigned
|
||||
[ { ##unsigned-pack-vector } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: unpack-head-cpu < simple-ops-cpu ;
|
||||
M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
|
||||
TUPLE: unpack-cpu < unpack-head-cpu ;
|
||||
M: unpack-cpu %unpack-vector-tail-reps all-reps ;
|
||||
|
||||
! vunpack-head
|
||||
[ { ##unpack-vector-head } ]
|
||||
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##merge-vector-head } ]
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##merge-vector-head ##shr-vector-imm } ]
|
||||
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##compare-vector ##merge-vector-head } ]
|
||||
[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vunpack-tail
|
||||
[ { ##unpack-vector-tail } ]
|
||||
[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##tail>head-vector ##unpack-vector-head } ]
|
||||
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##merge-vector-tail } ]
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##merge-vector-tail ##shr-vector-imm } ]
|
||||
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
[ { ##zero-vector ##compare-vector ##merge-vector-tail } ]
|
||||
[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! with
|
||||
[ { ##scalar>vector ##shuffle-vector-imm } ]
|
||||
[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! gather-2
|
||||
[ { ##gather-vector-2 } ]
|
||||
[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! gather-4
|
||||
[ { ##gather-vector-4 } ]
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! select
|
||||
[ { ##shuffle-vector-imm ##vector>scalar } ]
|
||||
[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! test with nonliteral/invalid reps
|
||||
[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
||||
|
||||
[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
||||
|
||||
[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
||||
|
|
@ -1,189 +1,26 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! Copyright (C) 2009 Slava Pestov, Joe Groff.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien byte-arrays fry classes.algebra
|
||||
cpu.architecture kernel math sequences math.vectors
|
||||
math.vectors.simd.intrinsics macros generalizations combinators
|
||||
combinators.short-circuit arrays locals
|
||||
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||
USING: accessors alien alien.c-types byte-arrays fry
|
||||
classes.algebra cpu.architecture kernel math sequences
|
||||
math.vectors math.vectors.simd.intrinsics
|
||||
macros generalizations combinators combinators.short-circuit
|
||||
arrays locals compiler.tree.propagation.info
|
||||
compiler.cfg.builder.blocks
|
||||
compiler.cfg.comparisons
|
||||
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.intrinsics
|
||||
compiler.cfg.intrinsics.alien
|
||||
compiler.cfg.intrinsics.simd.backend
|
||||
specialized-arrays ;
|
||||
FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ;
|
||||
SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ;
|
||||
FROM: alien.c-types => heap-size char short int longlong float double ;
|
||||
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
MACRO: check-elements ( quots -- )
|
||||
[ length '[ _ firstn ] ]
|
||||
[ '[ _ spread ] ]
|
||||
[ length 1 - \ and <repetition> [ ] like ]
|
||||
tri 3append ;
|
||||
|
||||
MACRO: if-literals-match ( quots -- )
|
||||
[ length ] [ ] [ length ] tri
|
||||
! n quots n
|
||||
'[
|
||||
! node quot
|
||||
[
|
||||
dup node-input-infos
|
||||
_ tail-slice* [ literal>> ] map
|
||||
dup _ check-elements
|
||||
] dip
|
||||
swap [
|
||||
! node literals quot
|
||||
[ _ firstn ] dip call
|
||||
drop
|
||||
] [ 2drop emit-primitive ] if
|
||||
] ;
|
||||
|
||||
: emit-vector-op ( node quot: ( rep -- ) -- )
|
||||
{ [ representation? ] } if-literals-match ; inline
|
||||
|
||||
: [binary] ( quot -- quot' )
|
||||
'[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
|
||||
|
||||
: emit-binary-vector-op ( node quot -- )
|
||||
[binary] emit-vector-op ; inline
|
||||
|
||||
: [unary] ( quot -- quot' )
|
||||
'[ [ ds-drop ds-pop ] dip @ ds-push ] ; inline
|
||||
|
||||
: emit-unary-vector-op ( node quot -- )
|
||||
[unary] emit-vector-op ; inline
|
||||
|
||||
: [unary/param] ( quot -- quot' )
|
||||
'[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline
|
||||
|
||||
: emit-shift-vector-imm-op ( node quot -- )
|
||||
[unary/param]
|
||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||
|
||||
:: emit-shift-vector-op ( node imm-quot var-quot -- )
|
||||
node node-input-infos 2 tail-slice* first literal>> integer?
|
||||
[ node imm-quot emit-shift-vector-imm-op ]
|
||||
[ node var-quot emit-binary-vector-op ] if ; inline
|
||||
|
||||
: emit-gather-vector-2 ( node -- )
|
||||
[ ^^gather-vector-2 ] emit-binary-vector-op ;
|
||||
|
||||
: emit-gather-vector-4 ( node -- )
|
||||
[
|
||||
ds-drop
|
||||
[
|
||||
D 3 peek-loc
|
||||
D 2 peek-loc
|
||||
D 1 peek-loc
|
||||
D 0 peek-loc
|
||||
-4 inc-d
|
||||
] dip
|
||||
^^gather-vector-4
|
||||
ds-push
|
||||
] emit-vector-op ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
[ iota >byte-array ] bi
|
||||
'[ _ n*v _ v+ ] map concat ;
|
||||
|
||||
: generate-shuffle-vector-imm ( src shuffle rep -- dst )
|
||||
dup %shuffle-vector-imm-reps member?
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[
|
||||
[ >variable-shuffle ^^load-constant ] keep
|
||||
^^shuffle-vector
|
||||
] if ;
|
||||
|
||||
: emit-shuffle-vector-imm ( node -- )
|
||||
! Pad the permutation with zeroes if it's too short, since we
|
||||
! can't throw an error at this point.
|
||||
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
|
||||
{ [ shuffle? ] [ representation? ] } if-literals-match ;
|
||||
|
||||
: emit-shuffle-vector-var ( node -- )
|
||||
[ ^^shuffle-vector ] [binary]
|
||||
{ [ %shuffle-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: emit-shuffle-vector ( node -- )
|
||||
dup node-input-infos {
|
||||
[ length 3 = ]
|
||||
[ first class>> byte-array class<= ]
|
||||
[ second class>> byte-array class<= ]
|
||||
[ third literal>> representation? ]
|
||||
} 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
|
||||
|
||||
: ^^broadcast-vector ( src n rep -- dst )
|
||||
[ rep-components swap <array> ] keep
|
||||
generate-shuffle-vector-imm ;
|
||||
|
||||
: emit-broadcast-vector ( node -- )
|
||||
[ ^^broadcast-vector ] [unary/param]
|
||||
{ [ integer? ] [ representation? ] } if-literals-match ;
|
||||
|
||||
: ^^with-vector ( src rep -- dst )
|
||||
[ ^^scalar>vector ] keep [ 0 ] dip ^^broadcast-vector ;
|
||||
|
||||
: ^^select-vector ( src n rep -- dst )
|
||||
[ ^^broadcast-vector ] keep ^^vector>scalar ;
|
||||
|
||||
: emit-select-vector ( node -- )
|
||||
[ ^^select-vector ] [unary/param]
|
||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||
|
||||
: emit-alien-vector-op ( node quot: ( rep -- ) -- )
|
||||
{ [ %alien-vector-reps member? ] } if-literals-match ; inline
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-alien-getter
|
||||
_ ^^alien-vector ds-push
|
||||
]
|
||||
[ inline-alien-getter? ] inline-alien
|
||||
] with emit-alien-vector-op ;
|
||||
|
||||
: emit-set-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-alien-setter ds-pop
|
||||
_ ##set-alien-vector
|
||||
]
|
||||
[ byte-array inline-alien-setter? ]
|
||||
inline-alien
|
||||
] with emit-alien-vector-op ;
|
||||
|
||||
: generate-not-vector ( src rep -- dst )
|
||||
dup %not-vector-reps member?
|
||||
[ ^^not-vector ]
|
||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
||||
|
||||
:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
|
||||
{cc,swap} first2 :> ( cc swap? )
|
||||
swap?
|
||||
[ src2 src1 rep cc ^^compare-vector ]
|
||||
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||
|
||||
:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||
rep orig-cc %compare-vector-ccs :> ( ccs not? )
|
||||
|
||||
ccs empty?
|
||||
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||
[
|
||||
ccs unclip :> ( rest-ccs first-cc )
|
||||
src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst
|
||||
|
||||
rest-ccs first-dst
|
||||
[ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ]
|
||||
reduce
|
||||
|
||||
not? [ rep generate-not-vector ] when
|
||||
] if ;
|
||||
! compound vector ops
|
||||
|
||||
: sign-bit-mask ( rep -- byte-array )
|
||||
unsign-rep {
|
||||
signed-rep {
|
||||
{ char-16-rep [ uchar-array{
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
HEX: 80 HEX: 80 HEX: 80 HEX: 80
|
||||
|
@ -204,150 +41,584 @@ MACRO: if-literals-match ( quots -- )
|
|||
} underlying>> ] }
|
||||
} case ;
|
||||
|
||||
:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||
orig-cc order-cc {
|
||||
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] }
|
||||
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] }
|
||||
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] }
|
||||
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] }
|
||||
} case ;
|
||||
|
||||
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
|
||||
: ^load-neg-zero-vector ( rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep orig-cc %compare-vector-reps member? ]
|
||||
[ src1 src2 rep orig-cc (generate-compare-vector) ]
|
||||
}
|
||||
{
|
||||
[ rep %min-vector-reps member? ]
|
||||
[ src1 src2 rep orig-cc (generate-minmax-compare-vector) ]
|
||||
}
|
||||
{
|
||||
[ rep unsign-rep orig-cc %compare-vector-reps member? ]
|
||||
[
|
||||
rep sign-bit-mask ^^load-constant :> sign-bits
|
||||
src1 sign-bits rep ^^xor-vector
|
||||
src2 sign-bits rep ^^xor-vector
|
||||
rep unsign-rep orig-cc (generate-compare-vector)
|
||||
]
|
||||
}
|
||||
} cond ;
|
||||
|
||||
:: generate-unpack-vector-head ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep %unpack-vector-head-reps member? ]
|
||||
[ src rep ^^unpack-vector-head ]
|
||||
}
|
||||
{
|
||||
[ rep unsigned-int-vector-rep? ]
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
src zero rep ^^merge-vector-head
|
||||
]
|
||||
}
|
||||
{
|
||||
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
|
||||
[
|
||||
src src rep ^^merge-vector-head
|
||||
rep rep-component-type
|
||||
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
|
||||
]
|
||||
}
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
]
|
||||
} cond ;
|
||||
|
||||
:: generate-unpack-vector-tail ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep %unpack-vector-tail-reps member? ]
|
||||
[ src rep ^^unpack-vector-tail ]
|
||||
}
|
||||
{
|
||||
[ rep %unpack-vector-head-reps member? ]
|
||||
[
|
||||
src rep ^^tail>head-vector :> tail
|
||||
tail rep ^^unpack-vector-head
|
||||
]
|
||||
}
|
||||
{
|
||||
[ rep unsigned-int-vector-rep? ]
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
src zero rep ^^merge-vector-tail
|
||||
]
|
||||
}
|
||||
{
|
||||
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
|
||||
[
|
||||
src src rep ^^merge-vector-tail
|
||||
rep rep-component-type
|
||||
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
|
||||
]
|
||||
}
|
||||
[
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
]
|
||||
} cond ;
|
||||
|
||||
:: generate-load-neg-zero-vector ( rep -- dst )
|
||||
rep {
|
||||
{ float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
|
||||
{ double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
|
||||
[ drop rep ^^zero-vector ]
|
||||
} case ;
|
||||
|
||||
:: generate-neg-vector ( src rep -- dst )
|
||||
rep generate-load-neg-zero-vector
|
||||
src rep ^^sub-vector ;
|
||||
: ^load-add-sub-vector ( rep -- dst )
|
||||
signed-rep {
|
||||
{ float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] }
|
||||
{ double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] }
|
||||
{ char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
|
||||
{ short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
|
||||
{ int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
|
||||
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
|
||||
} case ;
|
||||
|
||||
:: generate-blend-vector ( mask true false rep -- dst )
|
||||
mask true rep ^^and-vector
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
[ iota >byte-array ] bi
|
||||
'[ _ n*v _ v+ ] map concat ;
|
||||
|
||||
: ^load-immediate-shuffle ( shuffle rep -- dst )
|
||||
>variable-shuffle ^^load-constant ;
|
||||
|
||||
:: ^blend-vector ( mask true false rep -- dst )
|
||||
true mask rep ^^and-vector
|
||||
mask false rep ^^andn-vector
|
||||
rep ^^or-vector ;
|
||||
|
||||
:: generate-abs-vector ( src rep -- dst )
|
||||
: ^not-vector ( src rep -- dst )
|
||||
{
|
||||
{
|
||||
[ rep unsigned-int-vector-rep? ]
|
||||
[ src ]
|
||||
}
|
||||
{
|
||||
[ rep %abs-vector-reps member? ]
|
||||
[ src rep ^^abs-vector ]
|
||||
}
|
||||
{
|
||||
[ rep float-vector-rep? ]
|
||||
[
|
||||
rep generate-load-neg-zero-vector
|
||||
src rep ^^andn-vector
|
||||
]
|
||||
}
|
||||
[
|
||||
[ ^^not-vector ]
|
||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
|
||||
} v-vector-op ;
|
||||
|
||||
:: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
|
||||
{cc,swap} first2 :> ( cc swap? )
|
||||
swap?
|
||||
[ src2 src1 rep cc ^^compare-vector ]
|
||||
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||
|
||||
:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||
rep orig-cc %compare-vector-ccs :> ( ccs not? )
|
||||
|
||||
ccs empty?
|
||||
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||
[
|
||||
ccs unclip :> ( rest-ccs first-cc )
|
||||
src1 src2 rep first-cc ^((compare-vector)) :> first-dst
|
||||
|
||||
rest-ccs first-dst
|
||||
[ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
|
||||
reduce
|
||||
|
||||
not? [ rep ^not-vector ] when
|
||||
] if ;
|
||||
|
||||
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
|
||||
cc order-cc {
|
||||
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
|
||||
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
|
||||
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
|
||||
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
|
||||
} case ;
|
||||
|
||||
: ^compare-vector ( src1 src2 rep cc -- dst )
|
||||
{
|
||||
[ ^(compare-vector) ]
|
||||
[ ^minmax-compare-vector ]
|
||||
{ unsigned-int-vector-rep [| src1 src2 rep cc |
|
||||
rep sign-bit-mask ^^load-constant :> sign-bits
|
||||
src1 sign-bits rep ^^xor-vector
|
||||
src2 sign-bits rep ^^xor-vector
|
||||
rep signed-rep cc ^(compare-vector)
|
||||
] }
|
||||
} vv-cc-vector-op ;
|
||||
|
||||
: ^unpack-vector-head ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-head ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-head :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: ^unpack-vector-tail ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-tail ]
|
||||
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
src src rep ^^merge-vector-tail :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-2) ( src rep -- dst )
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-4) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] bi
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-8) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] tri
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-vector-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-16) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
} cleave
|
||||
]
|
||||
[| src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-vector-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector :> src'''
|
||||
|
||||
rep'' widen-vector-rep :> rep'''
|
||||
src''' src''' rep''' ^^merge-vector-head :> head'''
|
||||
src''' src''' rep''' ^^merge-vector-tail :> tail'''
|
||||
head''' tail''' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector) ( src rep -- dst )
|
||||
[
|
||||
dup rep-length {
|
||||
{ 2 [ ^(sum-vector-2) ] }
|
||||
{ 4 [ ^(sum-vector-4) ] }
|
||||
{ 8 [ ^(sum-vector-8) ] }
|
||||
{ 16 [ ^(sum-vector-16) ] }
|
||||
} case
|
||||
] [ ^^vector>scalar ] bi ;
|
||||
|
||||
: ^sum-vector ( src rep -- dst )
|
||||
{
|
||||
{ float-vector-rep [ ^(sum-vector) ] }
|
||||
{ int-vector-rep [| src rep |
|
||||
src rep ^unpack-vector-head :> head
|
||||
src rep ^unpack-vector-tail :> tail
|
||||
rep widen-vector-rep :> wide-rep
|
||||
head tail wide-rep ^^add-vector wide-rep
|
||||
^(sum-vector)
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
|
||||
[ rep-length 0 pad-tail ] keep {
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
|
||||
} vl-vector-op ;
|
||||
|
||||
: ^broadcast-vector ( src n rep -- dst )
|
||||
[ rep-length swap <array> ] keep
|
||||
^shuffle-vector-imm ;
|
||||
|
||||
: ^with-vector ( src rep -- dst )
|
||||
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
|
||||
|
||||
: ^select-vector ( src n rep -- dst )
|
||||
[ ^broadcast-vector ] keep ^^vector>scalar ;
|
||||
|
||||
! intrinsic emitters
|
||||
|
||||
: emit-simd-v+ ( node -- )
|
||||
{
|
||||
[ ^^add-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v- ( node -- )
|
||||
{
|
||||
[ ^^sub-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vneg ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||
{ int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v+- ( node -- )
|
||||
{
|
||||
[ ^^add-sub-vector ]
|
||||
{ float-vector-rep [| src1 src2 rep |
|
||||
rep ^load-add-sub-vector :> signs
|
||||
src2 signs rep ^^xor-vector :> src2'
|
||||
src1 src2' rep ^^add-vector
|
||||
] }
|
||||
{ int-vector-rep [| src1 src2 rep |
|
||||
rep ^load-add-sub-vector :> signs
|
||||
src2 signs rep ^^xor-vector :> src2'
|
||||
src2' signs rep ^^sub-vector :> src2''
|
||||
src1 src2'' rep ^^add-vector
|
||||
] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs+ ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^add-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-add-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs- ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^sub-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-sub-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs* ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^mul-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-mul-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v* ( node -- )
|
||||
{
|
||||
[ ^^mul-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v/ ( node -- )
|
||||
{
|
||||
[ ^^div-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmin ( node -- )
|
||||
{
|
||||
[ ^^min-vector ]
|
||||
[
|
||||
[ cc< ^compare-vector ]
|
||||
[ ^blend-vector ] 3bi
|
||||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmax ( node -- )
|
||||
{
|
||||
[ ^^max-vector ]
|
||||
[
|
||||
[ cc> ^compare-vector ]
|
||||
[ ^blend-vector ] 3bi
|
||||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v. ( node -- )
|
||||
{
|
||||
[ ^^dot-vector ]
|
||||
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsqrt ( node -- )
|
||||
{
|
||||
[ ^^sqrt-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-sum ( node -- )
|
||||
{
|
||||
[ ^sum-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vabs ( node -- )
|
||||
{
|
||||
{ unsigned-int-vector-rep [ drop ] }
|
||||
[ ^^abs-vector ]
|
||||
{ float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
|
||||
{ int-vector-rep [| src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep ^^sub-vector :> -src
|
||||
zero src rep cc> ^^compare-vector :> sign
|
||||
sign -src src rep generate-blend-vector
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
sign -src src rep ^blend-vector
|
||||
] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vand ( node -- )
|
||||
{
|
||||
[ ^^and-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vandn ( node -- )
|
||||
{
|
||||
[ ^^andn-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vor ( node -- )
|
||||
{
|
||||
[ ^^or-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vxor ( node -- )
|
||||
{
|
||||
[ ^^xor-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vnot ( node -- )
|
||||
{
|
||||
[ ^not-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vlshift ( node -- )
|
||||
{
|
||||
[ ^^shl-vector ]
|
||||
} {
|
||||
[ ^^shl-vector-imm ]
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-vrshift ( node -- )
|
||||
{
|
||||
[ ^^shr-vector ]
|
||||
} {
|
||||
[ ^^shr-vector-imm ]
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-hlshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shl-vector-imm ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-hrshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shr-vector-imm ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle-elements ( node -- )
|
||||
{
|
||||
[ ^shuffle-vector-imm ]
|
||||
} [ shuffle? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle-bytes ( node -- )
|
||||
{
|
||||
[ ^^shuffle-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmerge-head ( node -- )
|
||||
{
|
||||
[ ^^merge-vector-head ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmerge-tail ( node -- )
|
||||
{
|
||||
[ ^^merge-vector-tail ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v<= ( node -- )
|
||||
{
|
||||
[ cc<= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v< ( node -- )
|
||||
{
|
||||
[ cc< ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v= ( node -- )
|
||||
{
|
||||
[ cc= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v> ( node -- )
|
||||
{
|
||||
[ cc> ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v>= ( node -- )
|
||||
{
|
||||
[ cc>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-vunordered? ( node -- )
|
||||
{
|
||||
[ cc/<>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vany? ( node -- )
|
||||
{
|
||||
[ vcc-any ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
: emit-simd-vall? ( node -- )
|
||||
{
|
||||
[ vcc-all ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
: emit-simd-vnone? ( node -- )
|
||||
{
|
||||
[ vcc-none ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v>float ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ drop ] }
|
||||
{ int-vector-rep [ ^^integer>float-vector ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v>integer ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^float>integer-vector ] }
|
||||
{ int-vector-rep [ drop ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vpack-signed ( node -- )
|
||||
{
|
||||
[ ^^signed-pack-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vpack-unsigned ( node -- )
|
||||
{
|
||||
[ ^^unsigned-pack-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vunpack-head ( node -- )
|
||||
{
|
||||
[ ^unpack-vector-head ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vunpack-tail ( node -- )
|
||||
{
|
||||
[ ^unpack-vector-tail ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-with ( node -- )
|
||||
{
|
||||
[ ^with-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-gather-2 ( node -- )
|
||||
{
|
||||
[ ^^gather-vector-2 ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-gather-4 ( node -- )
|
||||
{
|
||||
[ ^^gather-vector-4 ]
|
||||
} emit-vvvv-vector-op ;
|
||||
|
||||
: emit-simd-select ( node -- )
|
||||
{
|
||||
[ ^select-vector ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-alien-getter
|
||||
_ ^^alien-vector ds-push
|
||||
]
|
||||
} cond ;
|
||||
[ inline-alien-getter? ] inline-alien
|
||||
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: generate-min-vector ( src1 src2 rep -- dst )
|
||||
dup %min-vector-reps member?
|
||||
[ ^^min-vector ] [
|
||||
[ cc< generate-compare-vector ]
|
||||
[ generate-blend-vector ] 3bi
|
||||
] if ;
|
||||
: emit-set-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-alien-setter ds-pop
|
||||
_ ##set-alien-vector
|
||||
]
|
||||
[ byte-array inline-alien-setter? ]
|
||||
inline-alien
|
||||
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: generate-max-vector ( src1 src2 rep -- dst )
|
||||
dup %max-vector-reps member?
|
||||
[ ^^max-vector ] [
|
||||
[ cc> generate-compare-vector ]
|
||||
[ generate-blend-vector ] 3bi
|
||||
] if ;
|
||||
: enable-simd ( -- )
|
||||
{
|
||||
{ (simd-v+) [ emit-simd-v+ ] }
|
||||
{ (simd-v-) [ emit-simd-v- ] }
|
||||
{ (simd-vneg) [ emit-simd-vneg ] }
|
||||
{ (simd-v+-) [ emit-simd-v+- ] }
|
||||
{ (simd-vs+) [ emit-simd-vs+ ] }
|
||||
{ (simd-vs-) [ emit-simd-vs- ] }
|
||||
{ (simd-vs*) [ emit-simd-vs* ] }
|
||||
{ (simd-v*) [ emit-simd-v* ] }
|
||||
{ (simd-v/) [ emit-simd-v/ ] }
|
||||
{ (simd-vmin) [ emit-simd-vmin ] }
|
||||
{ (simd-vmax) [ emit-simd-vmax ] }
|
||||
{ (simd-v.) [ emit-simd-v. ] }
|
||||
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
|
||||
{ (simd-sum) [ emit-simd-sum ] }
|
||||
{ (simd-vabs) [ emit-simd-vabs ] }
|
||||
{ (simd-vbitand) [ emit-simd-vand ] }
|
||||
{ (simd-vbitandn) [ emit-simd-vandn ] }
|
||||
{ (simd-vbitor) [ emit-simd-vor ] }
|
||||
{ (simd-vbitxor) [ emit-simd-vxor ] }
|
||||
{ (simd-vbitnot) [ emit-simd-vnot ] }
|
||||
{ (simd-vand) [ emit-simd-vand ] }
|
||||
{ (simd-vandn) [ emit-simd-vandn ] }
|
||||
{ (simd-vor) [ emit-simd-vor ] }
|
||||
{ (simd-vxor) [ emit-simd-vxor ] }
|
||||
{ (simd-vnot) [ emit-simd-vnot ] }
|
||||
{ (simd-vlshift) [ emit-simd-vlshift ] }
|
||||
{ (simd-vrshift) [ emit-simd-vrshift ] }
|
||||
{ (simd-hlshift) [ emit-simd-hlshift ] }
|
||||
{ (simd-hrshift) [ emit-simd-hrshift ] }
|
||||
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
|
||||
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
|
||||
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
|
||||
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
|
||||
{ (simd-v<=) [ emit-simd-v<= ] }
|
||||
{ (simd-v<) [ emit-simd-v< ] }
|
||||
{ (simd-v=) [ emit-simd-v= ] }
|
||||
{ (simd-v>) [ emit-simd-v> ] }
|
||||
{ (simd-v>=) [ emit-simd-v>= ] }
|
||||
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
|
||||
{ (simd-vany?) [ emit-simd-vany? ] }
|
||||
{ (simd-vall?) [ emit-simd-vall? ] }
|
||||
{ (simd-vnone?) [ emit-simd-vnone? ] }
|
||||
{ (simd-v>float) [ emit-simd-v>float ] }
|
||||
{ (simd-v>integer) [ emit-simd-v>integer ] }
|
||||
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
|
||||
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
|
||||
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
|
||||
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
|
||||
{ (simd-with) [ emit-simd-with ] }
|
||||
{ (simd-gather-2) [ emit-simd-gather-2 ] }
|
||||
{ (simd-gather-4) [ emit-simd-gather-4 ] }
|
||||
{ (simd-select) [ emit-simd-select ] }
|
||||
{ alien-vector [ emit-alien-vector ] }
|
||||
{ set-alien-vector [ emit-set-alien-vector ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
enable-simd
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators combinators.short-circuit arrays
|
||||
fry kernel layouts math namespaces sequences cpu.architecture
|
||||
math.bitwise math.order math.vectors.simd.intrinsics classes
|
||||
math.bitwise math.order classes
|
||||
vectors locals make alien.c-types io.binary grouping
|
||||
compiler.cfg
|
||||
compiler.cfg.registers
|
||||
|
@ -42,6 +42,14 @@ M: insn rewrite drop f ;
|
|||
] [ drop f ] if ; inline
|
||||
|
||||
: general-compare-expr? ( insn -- ? )
|
||||
{
|
||||
[ compare-expr? ]
|
||||
[ compare-imm-expr? ]
|
||||
[ compare-float-unordered-expr? ]
|
||||
[ compare-float-ordered-expr? ]
|
||||
} 1|| ;
|
||||
|
||||
: general-or-vector-compare-expr? ( insn -- ? )
|
||||
{
|
||||
[ compare-expr? ]
|
||||
[ compare-imm-expr? ]
|
||||
|
@ -52,7 +60,7 @@ M: insn rewrite drop f ;
|
|||
|
||||
: rewrite-boolean-comparison? ( insn -- ? )
|
||||
dup ##branch-t? [
|
||||
src1>> vreg>expr general-compare-expr?
|
||||
src1>> vreg>expr general-or-vector-compare-expr?
|
||||
] [ drop f ] if ; inline
|
||||
|
||||
: >compare-expr< ( expr -- in1 in2 cc )
|
||||
|
@ -463,100 +471,9 @@ M: ##alien-signed-2 rewrite rewrite-alien-addressing ;
|
|||
M: ##alien-signed-4 rewrite rewrite-alien-addressing ;
|
||||
M: ##alien-float rewrite rewrite-alien-addressing ;
|
||||
M: ##alien-double rewrite rewrite-alien-addressing ;
|
||||
M: ##alien-vector rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-float rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-double rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-vector rewrite rewrite-alien-addressing ;
|
||||
|
||||
! Some lame constant folding for SIMD intrinsics. Eventually this
|
||||
! should be redone completely.
|
||||
|
||||
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
|
||||
2dup [ rep>> ] bi@ eq? [
|
||||
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
|
||||
[ [ shuffle>> ] bi@ nths ]
|
||||
[ drop rep>> ]
|
||||
2tri \ ##shuffle-vector-imm new-insn
|
||||
] [ 2drop f ] if ;
|
||||
|
||||
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
|
||||
2dup length swap length /i group nths concat ;
|
||||
|
||||
: fold-shuffle-vector-imm ( insn expr -- insn' )
|
||||
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
|
||||
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
|
||||
|
||||
M: ##shuffle-vector-imm rewrite
|
||||
dup src>> vreg>expr {
|
||||
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
[ 2drop f ]
|
||||
} cond ;
|
||||
|
||||
: (fold-scalar>vector) ( insn bytes -- insn' )
|
||||
[ [ dst>> ] [ rep>> rep-components ] bi ] dip <repetition> concat
|
||||
\ ##load-constant new-insn ;
|
||||
|
||||
: fold-scalar>vector ( insn expr -- insn' )
|
||||
value>> over rep>> {
|
||||
{ float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
|
||||
{ double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
|
||||
[ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
|
||||
} case ;
|
||||
|
||||
M: ##scalar>vector rewrite
|
||||
dup src>> vreg>expr dup constant-expr?
|
||||
[ fold-scalar>vector ] [ 2drop f ] if ;
|
||||
|
||||
M: ##xor-vector rewrite
|
||||
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
|
||||
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
|
||||
|
||||
: vector-not? ( expr -- ? )
|
||||
{
|
||||
[ not-vector-expr? ]
|
||||
[ {
|
||||
[ xor-vector-expr? ]
|
||||
[ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
|
||||
} 1&& ]
|
||||
} 1|| ;
|
||||
|
||||
GENERIC: vector-not-src ( expr -- vreg )
|
||||
M: not-vector-expr vector-not-src src>> vn>vreg ;
|
||||
M: xor-vector-expr vector-not-src
|
||||
dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
|
||||
|
||||
M: ##and-vector rewrite
|
||||
{
|
||||
{ [ dup src1>> vreg>expr vector-not? ] [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src1>> vreg>expr vector-not-src ]
|
||||
[ src2>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##andn-vector new-insn
|
||||
] }
|
||||
{ [ dup src2>> vreg>expr vector-not? ] [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src2>> vreg>expr vector-not-src ]
|
||||
[ src1>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##andn-vector new-insn
|
||||
] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: ##andn-vector rewrite
|
||||
dup src1>> vreg>expr vector-not? [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src1>> vreg>expr vector-not-src ]
|
||||
[ src2>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##and-vector new-insn
|
||||
] [ drop f ] if ;
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators combinators.short-circuit arrays
|
||||
fry kernel layouts math namespaces sequences cpu.architecture
|
||||
math.bitwise math.order classes
|
||||
vectors locals make alien.c-types io.binary grouping
|
||||
math.vectors.simd.intrinsics
|
||||
compiler.cfg
|
||||
compiler.cfg.registers
|
||||
compiler.cfg.comparisons
|
||||
compiler.cfg.instructions
|
||||
compiler.cfg.value-numbering.expressions
|
||||
compiler.cfg.value-numbering.graph
|
||||
compiler.cfg.value-numbering.rewrite
|
||||
compiler.cfg.value-numbering.simplify ;
|
||||
IN: compiler.cfg.value-numbering.simd
|
||||
|
||||
M: ##alien-vector rewrite rewrite-alien-addressing ;
|
||||
M: ##set-alien-vector rewrite rewrite-alien-addressing ;
|
||||
|
||||
! Some lame constant folding for SIMD intrinsics. Eventually this
|
||||
! should be redone completely.
|
||||
|
||||
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
|
||||
2dup [ rep>> ] bi@ eq? [
|
||||
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
|
||||
[ [ shuffle>> ] bi@ nths ]
|
||||
[ drop rep>> ]
|
||||
2tri \ ##shuffle-vector-imm new-insn
|
||||
] [ 2drop f ] if ;
|
||||
|
||||
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
|
||||
2dup length swap length /i group nths concat ;
|
||||
|
||||
: fold-shuffle-vector-imm ( insn expr -- insn' )
|
||||
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
|
||||
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
|
||||
|
||||
M: ##shuffle-vector-imm rewrite
|
||||
dup src>> vreg>expr {
|
||||
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
[ 2drop f ]
|
||||
} cond ;
|
||||
|
||||
: (fold-scalar>vector) ( insn bytes -- insn' )
|
||||
[ [ dst>> ] [ rep>> rep-length ] bi ] dip <repetition> concat
|
||||
\ ##load-constant new-insn ;
|
||||
|
||||
: fold-scalar>vector ( insn expr -- insn' )
|
||||
value>> over rep>> {
|
||||
{ float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
|
||||
{ double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
|
||||
[ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
|
||||
} case ;
|
||||
|
||||
M: ##scalar>vector rewrite
|
||||
dup src>> vreg>expr dup constant-expr?
|
||||
[ fold-scalar>vector ] [ 2drop f ] if ;
|
||||
|
||||
M: ##xor-vector rewrite
|
||||
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
|
||||
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
|
||||
|
||||
: vector-not? ( expr -- ? )
|
||||
{
|
||||
[ not-vector-expr? ]
|
||||
[ {
|
||||
[ xor-vector-expr? ]
|
||||
[ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
|
||||
} 1&& ]
|
||||
} 1|| ;
|
||||
|
||||
GENERIC: vector-not-src ( expr -- vreg )
|
||||
M: not-vector-expr vector-not-src src>> vn>vreg ;
|
||||
M: xor-vector-expr vector-not-src
|
||||
dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
|
||||
|
||||
M: ##and-vector rewrite
|
||||
{
|
||||
{ [ dup src1>> vreg>expr vector-not? ] [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src1>> vreg>expr vector-not-src ]
|
||||
[ src2>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##andn-vector new-insn
|
||||
] }
|
||||
{ [ dup src2>> vreg>expr vector-not? ] [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src2>> vreg>expr vector-not-src ]
|
||||
[ src1>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##andn-vector new-insn
|
||||
] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: ##andn-vector rewrite
|
||||
dup src1>> vreg>expr vector-not? [
|
||||
{
|
||||
[ dst>> ]
|
||||
[ src1>> vreg>expr vector-not-src ]
|
||||
[ src2>> ]
|
||||
[ rep>> ]
|
||||
} cleave \ ##and-vector new-insn
|
||||
] [ drop f ] if ;
|
||||
|
||||
M: scalar>vector-expr simplify*
|
||||
src>> vn>expr {
|
||||
{ [ dup vector>scalar-expr? ] [ src>> ] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: shuffle-vector-imm-expr simplify*
|
||||
[ src>> ] [ shuffle>> ] [ rep>> rep-length iota ] tri
|
||||
sequence= [ drop f ] unless ;
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel accessors combinators classes math layouts
|
||||
sequences math.vectors.simd.intrinsics
|
||||
sequences
|
||||
compiler.cfg.instructions
|
||||
compiler.cfg.value-numbering.graph
|
||||
compiler.cfg.value-numbering.expressions ;
|
||||
|
@ -130,16 +130,6 @@ M: box-displaced-alien-expr simplify*
|
|||
[ 2drop f ]
|
||||
} cond ;
|
||||
|
||||
M: scalar>vector-expr simplify*
|
||||
src>> vn>expr {
|
||||
{ [ dup vector>scalar-expr? ] [ src>> ] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: shuffle-vector-imm-expr simplify*
|
||||
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
|
||||
sequence= [ drop f ] unless ;
|
||||
|
||||
M: expr simplify* drop f ;
|
||||
|
||||
: simplify ( expr -- vn )
|
||||
|
|
|
@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit
|
|||
accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
|
||||
compiler.cfg.ssa.destruction compiler.cfg.loop-detection
|
||||
compiler.cfg.representations compiler.cfg assocs vectors arrays
|
||||
layouts literals namespaces alien ;
|
||||
layouts literals namespaces alien compiler.cfg.value-numbering.simd ;
|
||||
IN: compiler.cfg.value-numbering.tests
|
||||
|
||||
: trim-temps ( insns -- insns )
|
||||
|
|
|
@ -16,8 +16,7 @@ compiler.tree.propagation.slots
|
|||
compiler.tree.propagation.simple
|
||||
compiler.tree.propagation.constraints
|
||||
compiler.tree.propagation.call-effect
|
||||
compiler.tree.propagation.transforms
|
||||
compiler.tree.propagation.simd ;
|
||||
compiler.tree.propagation.transforms ;
|
||||
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
|
||||
IN: compiler.tree.propagation.known-words
|
||||
|
||||
|
|
|
@ -1,57 +1,77 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators fry sequences
|
||||
compiler.tree.propagation.info cpu.architecture kernel words math
|
||||
math.intervals math.vectors.simd.intrinsics ;
|
||||
USING: accessors assocs byte-arrays combinators compiler.cfg.builder
|
||||
continuations fry sequences compiler.tree.propagation.info
|
||||
cpu.architecture kernel words make math math.intervals
|
||||
math.vectors.simd.intrinsics namespaces ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-vabs)
|
||||
(simd-v+-)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-sum)
|
||||
(simd-vsqrt)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vbitnot)
|
||||
(simd-vand)
|
||||
(simd-vandn)
|
||||
(simd-vor)
|
||||
(simd-vxor)
|
||||
(simd-vnot)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-(vmerge-head))
|
||||
(simd-(vmerge-tail))
|
||||
(simd-(v>float))
|
||||
(simd-(v>integer))
|
||||
(simd-(vpack-signed))
|
||||
(simd-(vpack-unsigned))
|
||||
(simd-(vunpack-head))
|
||||
(simd-(vunpack-tail))
|
||||
(simd-v<=)
|
||||
(simd-v<)
|
||||
(simd-v=)
|
||||
(simd-v>)
|
||||
(simd-v>=)
|
||||
(simd-vunordered?)
|
||||
(simd-with)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
} [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
CONSTANT: vector>vector-intrinsics
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-v+-)
|
||||
(simd-vs+)
|
||||
(simd-vs-)
|
||||
(simd-vs*)
|
||||
(simd-v*)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-vsqrt)
|
||||
(simd-vabs)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vbitnot)
|
||||
(simd-vand)
|
||||
(simd-vandn)
|
||||
(simd-vor)
|
||||
(simd-vxor)
|
||||
(simd-vnot)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vmerge-head)
|
||||
(simd-vmerge-tail)
|
||||
(simd-v<=)
|
||||
(simd-v<)
|
||||
(simd-v=)
|
||||
(simd-v>)
|
||||
(simd-v>=)
|
||||
(simd-vunordered?)
|
||||
(simd-v>float)
|
||||
(simd-v>integer)
|
||||
(simd-vpack-signed)
|
||||
(simd-vpack-unsigned)
|
||||
(simd-vunpack-head)
|
||||
(simd-vunpack-tail)
|
||||
(simd-with)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
}
|
||||
|
||||
CONSTANT: vector-other-intrinsics
|
||||
{
|
||||
(simd-v.)
|
||||
(simd-sum)
|
||||
(simd-vany?)
|
||||
(simd-vall?)
|
||||
(simd-vnone?)
|
||||
(simd-select)
|
||||
set-alien-vector
|
||||
}
|
||||
|
||||
: vector-intrinsics ( -- x )
|
||||
vector>vector-intrinsics vector-other-intrinsics append ;
|
||||
|
||||
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
|
||||
: scalar-output-class ( rep -- class )
|
||||
dup literal?>> [
|
||||
|
@ -79,12 +99,24 @@ IN: compiler.tree.propagation.simd
|
|||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
! If SIMD is not available, inline alien-vector and set-alien-vector
|
||||
! to get a speedup
|
||||
: clone-with-value-infos ( node -- node' )
|
||||
clone dup in-d>> [ dup value-info ] H{ } map>assoc >>info ;
|
||||
|
||||
: try-intrinsic ( node intrinsic-quot -- ? )
|
||||
'[
|
||||
_ clone-with-value-infos
|
||||
_ with-dummy-cfg-builder
|
||||
t
|
||||
] [ drop f ] recover ;
|
||||
|
||||
: inline-unless-intrinsic ( word -- )
|
||||
dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ]
|
||||
dup '[
|
||||
_ swap over "intrinsic" word-prop
|
||||
"always-inline-simd-intrinsics" get not swap and
|
||||
! word node intrinsic
|
||||
[ try-intrinsic [ drop f ] [ def>> ] if ]
|
||||
[ drop def>> ] if*
|
||||
]
|
||||
"custom-inlining" set-word-prop ;
|
||||
|
||||
\ alien-vector inline-unless-intrinsic
|
||||
|
||||
\ set-alien-vector inline-unless-intrinsic
|
||||
vector-intrinsics [ inline-unless-intrinsic ] each
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
USING: cords strings tools.test kernel sequences ;
|
||||
IN: cords.tests
|
||||
|
||||
[ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test
|
||||
[ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test
|
|
@ -1,72 +0,0 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs sequences sorting binary-search math
|
||||
math.order arrays combinators kernel ;
|
||||
IN: cords
|
||||
|
||||
<PRIVATE
|
||||
|
||||
TUPLE: simple-cord
|
||||
{ first read-only } { second read-only } ;
|
||||
|
||||
M: simple-cord length
|
||||
[ first>> length ] [ second>> length ] bi + ; inline
|
||||
|
||||
M: simple-cord virtual-exemplar first>> ; inline
|
||||
|
||||
M: simple-cord virtual@
|
||||
2dup first>> length <
|
||||
[ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline
|
||||
|
||||
TUPLE: multi-cord
|
||||
{ count read-only } { seqs read-only } ;
|
||||
|
||||
M: multi-cord length count>> ; inline
|
||||
|
||||
M: multi-cord virtual@
|
||||
dupd
|
||||
seqs>> [ first <=> ] with search nip
|
||||
[ first - ] [ second ] bi ; inline
|
||||
|
||||
M: multi-cord virtual-exemplar
|
||||
seqs>> [ f ] [ first second ] if-empty ; inline
|
||||
|
||||
: <cord> ( seqs -- cord )
|
||||
dup length 2 = [
|
||||
first2 simple-cord boa
|
||||
] [
|
||||
[ 0 [ length + ] accumulate ] keep zip multi-cord boa
|
||||
] if ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
UNION: cord simple-cord multi-cord ;
|
||||
|
||||
INSTANCE: cord virtual-sequence
|
||||
|
||||
INSTANCE: multi-cord virtual-sequence
|
||||
|
||||
: cord-append ( seq1 seq2 -- cord )
|
||||
{
|
||||
{ [ over empty? ] [ nip ] }
|
||||
{ [ dup empty? ] [ drop ] }
|
||||
{ [ 2dup [ cord? ] both? ] [ [ seqs>> values ] bi@ append <cord> ] }
|
||||
{ [ over cord? ] [ [ seqs>> values ] dip suffix <cord> ] }
|
||||
{ [ dup cord? ] [ seqs>> values swap prefix <cord> ] }
|
||||
[ 2array <cord> ]
|
||||
} cond ; inline
|
||||
|
||||
: cord-concat ( seqs -- cord )
|
||||
{
|
||||
{ [ dup empty? ] [ drop f ] }
|
||||
{ [ dup length 1 = ] [ first ] }
|
||||
[
|
||||
[
|
||||
{
|
||||
{ [ dup cord? ] [ seqs>> values ] }
|
||||
{ [ dup empty? ] [ drop { } ] }
|
||||
[ 1array ]
|
||||
} cond
|
||||
] map concat <cord>
|
||||
]
|
||||
} cond ; inline
|
|
@ -95,7 +95,7 @@ double-rep
|
|||
vector-rep
|
||||
scalar-rep ;
|
||||
|
||||
: unsign-rep ( rep -- rep' )
|
||||
: signed-rep ( rep -- rep' )
|
||||
{
|
||||
{ uint-4-rep int-4-rep }
|
||||
{ ulonglong-2-rep longlong-2-rep }
|
||||
|
@ -105,7 +105,7 @@ scalar-rep ;
|
|||
{ ushort-scalar-rep short-scalar-rep }
|
||||
{ uint-scalar-rep int-scalar-rep }
|
||||
{ ulonglong-scalar-rep longlong-scalar-rep }
|
||||
} ?at drop ;
|
||||
} ?at drop ; foldable
|
||||
|
||||
: widen-vector-rep ( rep -- rep' )
|
||||
{
|
||||
|
@ -115,7 +115,19 @@ scalar-rep ;
|
|||
{ uchar-16-rep ushort-8-rep }
|
||||
{ ushort-8-rep uint-4-rep }
|
||||
{ uint-4-rep ulonglong-2-rep }
|
||||
} at ;
|
||||
{ float-4-rep double-2-rep }
|
||||
} at ; foldable
|
||||
|
||||
: narrow-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ short-8-rep char-16-rep }
|
||||
{ int-4-rep short-8-rep }
|
||||
{ longlong-2-rep int-4-rep }
|
||||
{ ushort-8-rep uchar-16-rep }
|
||||
{ uint-4-rep ushort-8-rep }
|
||||
{ ulonglong-2-rep uint-4-rep }
|
||||
{ double-2-rep float-4-rep }
|
||||
} at ; foldable
|
||||
|
||||
! Register classes
|
||||
SINGLETONS: int-regs float-regs ;
|
||||
|
@ -277,8 +289,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
|||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %sqrt-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
|
||||
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %abs-vector cpu ( dst src rep -- )
|
||||
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
|
||||
|
@ -385,6 +397,10 @@ M: object %shr-vector-imm-reps { } ;
|
|||
M: object %horizontal-shl-vector-imm-reps { } ;
|
||||
M: object %horizontal-shr-vector-imm-reps { } ;
|
||||
|
||||
ALIAS: %merge-vector-head-reps %merge-vector-reps
|
||||
ALIAS: %merge-vector-tail-reps %merge-vector-reps
|
||||
ALIAS: %tail>head-vector-reps %unpack-vector-head-reps
|
||||
|
||||
HOOK: %unbox-alien cpu ( dst src -- )
|
||||
HOOK: %unbox-any-c-ptr cpu ( dst src -- )
|
||||
HOOK: %box-alien cpu ( dst src temp -- )
|
||||
|
|
|
@ -650,7 +650,7 @@ M: x86 %fill-vector-reps
|
|||
} available-reps ;
|
||||
|
||||
! M:: x86 %broadcast-vector ( dst src rep -- )
|
||||
! rep unsign-rep {
|
||||
! rep signed-rep {
|
||||
! { float-4-rep [
|
||||
! dst src float-4-rep %copy
|
||||
! dst dst { 0 0 0 0 } SHUFPS
|
||||
|
@ -687,7 +687,7 @@ M: x86 %fill-vector-reps
|
|||
! } available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||
rep unsign-rep {
|
||||
rep signed-rep {
|
||||
{ float-4-rep [
|
||||
dst src1 float-4-rep %copy
|
||||
dst src2 UNPCKLPS
|
||||
|
@ -710,7 +710,7 @@ M: x86 %gather-vector-4-reps
|
|||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||
rep unsign-rep {
|
||||
rep signed-rep {
|
||||
{ double-2-rep [
|
||||
dst src1 double-2-rep %copy
|
||||
dst src2 MOVLHPS
|
||||
|
@ -763,7 +763,7 @@ M: x86 %gather-vector-2-reps
|
|||
|
||||
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
|
||||
dst src rep %copy
|
||||
dst shuffle rep unsign-rep {
|
||||
dst shuffle rep signed-rep {
|
||||
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
|
||||
{ float-4-rep [ float-4-shuffle ] }
|
||||
{ int-4-rep [ int-4-shuffle ] }
|
||||
|
@ -786,7 +786,7 @@ M: x86 %shuffle-vector-reps
|
|||
|
||||
M: x86 %merge-vector-head
|
||||
[ two-operand ] keep
|
||||
unsign-rep {
|
||||
signed-rep {
|
||||
{ double-2-rep [ MOVLHPS ] }
|
||||
{ float-4-rep [ UNPCKLPS ] }
|
||||
{ longlong-2-rep [ PUNPCKLQDQ ] }
|
||||
|
@ -797,7 +797,7 @@ M: x86 %merge-vector-head
|
|||
|
||||
M: x86 %merge-vector-tail
|
||||
[ two-operand ] keep
|
||||
unsign-rep {
|
||||
signed-rep {
|
||||
{ double-2-rep [ UNPCKHPD ] }
|
||||
{ float-4-rep [ UNPCKHPS ] }
|
||||
{ longlong-2-rep [ PUNPCKHQDQ ] }
|
||||
|
@ -826,7 +826,7 @@ M: x86 %signed-pack-vector-reps
|
|||
|
||||
M: x86 %unsigned-pack-vector
|
||||
[ two-operand ] keep
|
||||
unsign-rep {
|
||||
signed-rep {
|
||||
{ int-4-rep [ PACKUSDW ] }
|
||||
{ short-8-rep [ PACKUSWB ] }
|
||||
} case ;
|
||||
|
@ -896,7 +896,7 @@ M: x86 %float>integer-vector-reps
|
|||
} case ;
|
||||
|
||||
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
|
||||
rep unsign-rep :> rep'
|
||||
rep signed-rep :> rep'
|
||||
dst src rep' {
|
||||
{ longlong-2-rep [ int64 call ] }
|
||||
{ int-4-rep [ int32 call ] }
|
||||
|
@ -1162,34 +1162,28 @@ M: x86 %max-vector-reps
|
|||
M: x86 %dot-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPS ]
|
||||
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
|
||||
if
|
||||
] }
|
||||
{ double-2-rep [
|
||||
sse4.1?
|
||||
[ HEX: ff DPPD ]
|
||||
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
|
||||
if
|
||||
] }
|
||||
{ float-4-rep [ HEX: ff DPPS ] }
|
||||
{ double-2-rep [ HEX: ff DPPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %dot-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
{ sse4.1? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
|
||||
{ double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
|
||||
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
{ float-4-rep [ HADDPS ] }
|
||||
{ double-2-rep [ HADDPD ] }
|
||||
{ int-4-rep [ PHADDD ] }
|
||||
{ short-8-rep [ PHADDW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
||||
|
@ -1197,7 +1191,7 @@ M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
|||
|
||||
M: x86 %horizontal-shl-vector-imm-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
|
||||
|
@ -1205,7 +1199,7 @@ M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
|
|||
|
||||
M: x86 %horizontal-shr-vector-imm-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %abs-vector ( dst src rep -- )
|
||||
|
@ -1329,8 +1323,15 @@ M: x86 %shr-vector-imm-reps %shr-vector-reps ;
|
|||
|
||||
M: x86 %integer>scalar drop MOVD ;
|
||||
|
||||
! XXX the longlong versions won't work on x86.32
|
||||
M:: x86 %scalar>integer ( dst src rep -- )
|
||||
rep {
|
||||
{ longlong-scalar-rep [
|
||||
dst src MOVD
|
||||
] }
|
||||
{ ulonglong-scalar-rep [
|
||||
dst src MOVD
|
||||
] }
|
||||
{ int-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 32-bit-version-of
|
||||
|
@ -1401,7 +1402,6 @@ M: x86 immediate-bitwise? ( n -- ? )
|
|||
#! set up by the caller.
|
||||
stack-frame get total-size>> + stack@ ;
|
||||
|
||||
enable-simd
|
||||
enable-min/max
|
||||
enable-fixnum-log2
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ IN: fry.tests
|
|||
|
||||
SYMBOLS: a b c d e f g h ;
|
||||
|
||||
[ [ ] ] [ '[ ] ] unit-test
|
||||
[ [ + ] ] [ '[ + ] ] unit-test
|
||||
[ [ 1 ] ] [ 1 '[ _ ] ] unit-test
|
||||
[ [ 1 ] ] [ [ 1 ] '[ @ ] ] unit-test
|
||||
[ [ 1 2 ] ] [ [ 1 ] [ 2 ] '[ @ @ ] ] unit-test
|
||||
|
|
|
@ -136,10 +136,12 @@ TUPLE: dredge-fry-state
|
|||
PRIVATE>
|
||||
|
||||
M: callable fry ( quot -- quot' )
|
||||
0 swap <dredge-fry>
|
||||
[ dredge-fry ] [
|
||||
[ prequot>> >quotation ]
|
||||
[ quot>> >quotation shallow-fry ] bi append
|
||||
] bi ;
|
||||
[ [ [ ] ] ] [
|
||||
0 swap <dredge-fry>
|
||||
[ dredge-fry ] [
|
||||
[ prequot>> >quotation ]
|
||||
[ quot>> >quotation shallow-fry ] bi append
|
||||
] bi
|
||||
] if-empty ;
|
||||
|
||||
SYNTAX: '[ parse-quotation fry append! ;
|
||||
|
|
|
@ -87,7 +87,6 @@ ARTICLE: "io.mmap.examples" "Memory-mapped file examples"
|
|||
"Normalize a file containing packed quadrupes of floats:"
|
||||
{ $code
|
||||
"USING: kernel io.mmap math.vectors math.vectors.simd" "sequences specialized-arrays ;"
|
||||
"SIMD: float"
|
||||
"SPECIALIZED-ARRAY: float-4"
|
||||
""
|
||||
"\"mydata.dat\" float-4 ["
|
||||
|
|
|
@ -41,7 +41,6 @@ CONSTANT: b 2
|
|||
[ 0 ] [ BIN: 0 bit-count ] unit-test
|
||||
[ 1 ] [ BIN: 1 bit-count ] unit-test
|
||||
|
||||
SIMD: uint
|
||||
SPECIALIZED-ARRAY: uint
|
||||
SPECIALIZED-ARRAY: uint-4
|
||||
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
! (c)Joe Groff bsd license
|
||||
USING: accessors alien.c-types arrays assocs classes combinators
|
||||
cords fry kernel math math.vectors sequences ;
|
||||
IN: math.vectors.conversion.backend
|
||||
|
||||
: saturate-map-as ( v quot result -- w )
|
||||
[ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline
|
||||
|
||||
: (v>float) ( i to-type -- f )
|
||||
[ >float ] swap new map-as ;
|
||||
: (v>integer) ( f to-type -- i )
|
||||
[ >integer ] swap new map-as ;
|
||||
: (vpack-signed) ( a b to-type -- ab )
|
||||
[ cord-append [ ] ] dip new saturate-map-as ;
|
||||
: (vpack-unsigned) ( a b to-type -- ab )
|
||||
[ cord-append [ ] ] dip new saturate-map-as ;
|
||||
: (vunpack-head) ( ab to-type -- a )
|
||||
[ dup length 2 /i head-slice ] dip new like ;
|
||||
: (vunpack-tail) ( ab to-type -- b )
|
||||
[ dup length 2 /i tail-slice ] dip new like ;
|
||||
|
|
@ -22,7 +22,7 @@ HELP: vconvert
|
|||
}
|
||||
{ $description "Converts SIMD vectors of " { $snippet "from-type" } " to " { $snippet "to-type" } ". The number of inputs and outputs depends on the relationship of the two types:"
|
||||
{ $list
|
||||
{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-8" } " to " { $snippet "float-8" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
|
||||
{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "float-4" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
|
||||
{ "Likewise, if " { $snippet "to-type" } " is an integer vector type with the same byte length and element count as the floating-point vector type " { $snippet "from-type" } ", " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and truncates its elements to integers, outputting one vector of " { $snippet "to-type" } "." }
|
||||
{ "If " { $snippet "to-type" } " is a vector type with the same byte length as and twice the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "ushort-8" } ", from " { $snippet "double-2" } " to " { $snippet "float-4" } ", or from " { $snippet "short-8" } " to " { $snippet "char-16" } "), " { $snippet "vconvert" } " takes two vectors of " { $snippet "from-type" } " and packs them into one vector of " { $snippet "to-type" } ", saturating values too large or small to be representable as elements of " { $snippet "to-type" } "." }
|
||||
{ "If " { $snippet "to-type" } " is a vector type with the same byte length as and half the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "ushort-8" } " to " { $snippet "int-4" } ", from " { $snippet "float-4" } " to " { $snippet "double-2" } ", or from " { $snippet "char-16" } " to " { $snippet "short-8" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and unpacks it into two vectors of " { $snippet "to-type" } "." }
|
||||
|
@ -39,26 +39,23 @@ HELP: vconvert
|
|||
"Conversion between integer and float vectors:"
|
||||
{ $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
|
||||
prettyprint ;
|
||||
SIMDS: int float longlong double ;
|
||||
|
||||
int-8{ 0 1 2 3 4 5 6 7 } int-8 float-8 vconvert .
|
||||
int-4{ 0 1 2 3 } int-4 float-4 vconvert .
|
||||
double-2{ 1.25 3.75 } double-2 longlong-2 vconvert ."""
|
||||
"""float-8{ 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 }
|
||||
"""float-4{ 0.0 1.0 2.0 3.0 }
|
||||
longlong-2{ 1 3 }""" }
|
||||
"Packing conversions:"
|
||||
{ $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
|
||||
prettyprint ;
|
||||
SIMDS: ushort int float double ;
|
||||
|
||||
int-4{ -8 70000 6000 50 } int-4{ 4 3 2 -1 } int-4 ushort-8 vconvert .
|
||||
double-4{ 0.0 1.5 1.0e100 2.0 }
|
||||
double-4{ -1.0e100 0.0 1.0 2.0 } double-4 float-8 vconvert ."""
|
||||
double-2{ 0.0 1.0e100 }
|
||||
double-2{ -1.0e100 0.0 } double-2 float-4 vconvert ."""
|
||||
"""ushort-8{ 0 65535 6000 50 4 3 2 0 }
|
||||
float-8{ 0.0 1.5 1/0. 2.0 -1/0. 0.0 1.0 2.0 }""" }
|
||||
float-4{ 0.0 1/0. -1/0. 0.0 }""" }
|
||||
"Unpacking conversions:"
|
||||
{ $example """USING: alien.c-types kernel math.vectors.conversion
|
||||
math.vectors.simd prettyprint ;
|
||||
SIMDS: uchar short ;
|
||||
|
||||
uchar-16{ 8 70 60 50 4 30 200 1 9 10 110 102 133 143 115 0 }
|
||||
uchar-16 short-8 vconvert [ . ] bi@"""
|
||||
|
|
|
@ -3,16 +3,6 @@ USING: accessors arrays compiler continuations generalizations
|
|||
kernel kernel.private locals math.vectors.conversion math.vectors.simd
|
||||
sequences stack-checker tools.test ;
|
||||
FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
|
||||
SIMD: uchar
|
||||
SIMD: char
|
||||
SIMD: ushort
|
||||
SIMD: short
|
||||
SIMD: uint
|
||||
SIMD: int
|
||||
SIMD: ulonglong
|
||||
SIMD: longlong
|
||||
SIMD: float
|
||||
SIMD: double
|
||||
IN: math.vectors.conversion.tests
|
||||
|
||||
ERROR: optimized-vconvert-inconsistent
|
||||
|
@ -59,12 +49,12 @@ MACRO:: test-vconvert ( from-type to-type -- )
|
|||
[ double-2{ -5.0 1.0 } ]
|
||||
[ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test
|
||||
|
||||
[ longlong-4{ -5 1 2 6 } ]
|
||||
[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test
|
||||
[ longlong-2{ -5 1 } ]
|
||||
[ double-2{ -5.0 1.0 } double-2 longlong-2 test-vconvert ] unit-test
|
||||
|
||||
! TODO we should be able to do double->int pack
|
||||
! [ int-8{ -5 1 2 6 12 34 -56 78 } ]
|
||||
[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ]
|
||||
! [ int-4{ -5 1 12 34 } ]
|
||||
[ double-2{ -5.0 1.0 } double-2{ 12.0 34.0 } double-2 int-4 test-vconvert ]
|
||||
[ error>> bad-vconvert? ] must-fail-with
|
||||
|
||||
[ float-4{ -1.25 2.0 3.0 -4.0 } ]
|
||||
|
@ -76,10 +66,10 @@ MACRO:: test-vconvert ( from-type to-type -- )
|
|||
[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
|
||||
[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test
|
||||
|
||||
[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ]
|
||||
[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
|
||||
[
|
||||
int-8{ -1 2 3 -40000 3 2 1 0 }
|
||||
int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert
|
||||
int-4{ -1 2 3 -40000 }
|
||||
int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert
|
||||
] unit-test
|
||||
|
||||
[ ushort-8{ 0 2 3 0 5 60000 0 65535 } ]
|
||||
|
@ -97,15 +87,6 @@ MACRO:: test-vconvert ( from-type to-type -- )
|
|||
uchar-16 ushort-8 test-vconvert
|
||||
] unit-test
|
||||
|
||||
! TODO we should be able to do 256->128 pack
|
||||
! [ float-4{ -1.25 2.0 3.0 -4.0 } ]
|
||||
[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ]
|
||||
[ error>> bad-vconvert? ] must-fail-with
|
||||
|
||||
! [ int-4{ -1 2 3 -4 } ]
|
||||
[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ]
|
||||
[ error>> bad-vconvert? ] must-fail-with
|
||||
|
||||
[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ]
|
||||
[ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test
|
||||
|
||||
|
@ -121,8 +102,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
|
|||
[ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ]
|
||||
[ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test
|
||||
|
||||
[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
|
||||
[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
|
||||
[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
|
||||
[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
|
||||
|
||||
[ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ]
|
||||
[ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test
|
||||
|
@ -130,13 +111,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
|
|||
[ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ]
|
||||
[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test
|
||||
|
||||
[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
|
||||
[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
|
||||
|
||||
! TODO we should be able to do 128->256 unpack
|
||||
! [ longlong-4{ 1 2 3 4 } ]
|
||||
[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ]
|
||||
[ error>> bad-vconvert? ] must-fail-with
|
||||
[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
|
||||
[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
|
||||
|
||||
! TODO we should be able to do multi-tier pack/unpack
|
||||
! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ]
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
! (c)Joe Groff bsd license
|
||||
USING: accessors alien.c-types arrays assocs classes combinators
|
||||
combinators.short-circuit cords fry kernel locals math
|
||||
math.vectors math.vectors.conversion.backend sequences ;
|
||||
FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
|
||||
combinators.short-circuit fry kernel locals math
|
||||
math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ;
|
||||
FROM: alien.c-types =>
|
||||
char uchar short ushort int uint longlong ulonglong
|
||||
float double ;
|
||||
IN: math.vectors.conversion
|
||||
|
||||
ERROR: bad-vconvert from-type to-type ;
|
||||
|
@ -30,11 +32,11 @@ ERROR: bad-vconvert-input value expected-type ;
|
|||
}
|
||||
{
|
||||
[ from-element float-type? ]
|
||||
[ [ to-type (v>integer) ] ]
|
||||
[ from-type new simd-rep '[ underlying>> _ (simd-v>integer) to-type boa ] ]
|
||||
}
|
||||
{
|
||||
[ to-element float-type? ]
|
||||
[ [ to-type (v>float) ] ]
|
||||
[ from-type new simd-rep '[ underlying>> _ (simd-v>float) to-type boa ] ]
|
||||
}
|
||||
} cond
|
||||
[ from-type check-vconvert-type ] prepose ;
|
||||
|
@ -47,10 +49,18 @@ ERROR: bad-vconvert-input value expected-type ;
|
|||
} 0|| [ from-type to-type bad-vconvert ] when ;
|
||||
|
||||
:: [[vpack-unsigned]] ( from-type to-type -- quot )
|
||||
[ [ from-type check-vconvert-type ] bi@ to-type (vpack-unsigned) ] ;
|
||||
from-type new simd-rep
|
||||
'[
|
||||
[ from-type check-vconvert-type underlying>> ] bi@
|
||||
_ (simd-vpack-unsigned) to-type boa
|
||||
] ;
|
||||
|
||||
:: [[vpack-signed]] ( from-type to-type -- quot )
|
||||
[ [ from-type check-vconvert-type ] bi@ to-type (vpack-signed) ] ;
|
||||
from-type new simd-rep
|
||||
'[
|
||||
[ from-type check-vconvert-type underlying>> ] bi@
|
||||
_ (simd-vpack-signed) to-type boa
|
||||
] ;
|
||||
|
||||
:: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot )
|
||||
from-size to-size /i log2 :> steps
|
||||
|
@ -68,9 +78,11 @@ ERROR: bad-vconvert-input value expected-type ;
|
|||
} 0|| [ from-type to-type bad-vconvert ] when ;
|
||||
|
||||
:: [[vunpack]] ( from-type to-type -- quot )
|
||||
[
|
||||
from-type check-vconvert-type
|
||||
[ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi
|
||||
from-type new simd-rep
|
||||
'[
|
||||
from-type check-vconvert-type underlying>> _
|
||||
[ (simd-vunpack-head) to-type boa ]
|
||||
[ (simd-vunpack-tail) to-type boa ] 2bi
|
||||
] ;
|
||||
|
||||
:: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot )
|
||||
|
@ -81,8 +93,8 @@ ERROR: bad-vconvert-input value expected-type ;
|
|||
PRIVATE>
|
||||
|
||||
MACRO:: vconvert ( from-type to-type -- )
|
||||
from-type new [ element-type ] [ byte-length ] bi :> ( from-element from-length )
|
||||
to-type new [ element-type ] [ byte-length ] bi :> ( to-element to-length )
|
||||
from-type new [ simd-element-type ] [ byte-length ] bi :> ( from-element from-length )
|
||||
to-type new [ simd-element-type ] [ byte-length ] bi :> ( to-element to-length )
|
||||
from-element heap-size :> from-size
|
||||
to-element heap-size :> to-size
|
||||
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
USING: accessors alien.c-types arrays byte-arrays
|
||||
cpu.architecture effects functors generalizations kernel lexer
|
||||
math math.vectors.simd math.vectors.simd.intrinsics parser
|
||||
prettyprint.custom quotations sequences sequences.cords words ;
|
||||
IN: math.vectors.simd.cords
|
||||
|
||||
<<
|
||||
<PRIVATE
|
||||
|
||||
FUNCTOR: (define-simd-128-cord) ( A/2 A -- )
|
||||
|
||||
A-rep IS ${A/2}-rep
|
||||
>A/2 IS >${A/2}
|
||||
A/2-boa IS ${A/2}-boa
|
||||
A/2-with IS ${A/2}-with
|
||||
A/2-cast IS ${A/2}-cast
|
||||
|
||||
>A DEFINES >${A}
|
||||
A-boa DEFINES ${A}-boa
|
||||
A-with DEFINES ${A}-with
|
||||
A-cast DEFINES ${A}-cast
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
N [ A-rep rep-length ]
|
||||
BOA-EFFECT [ N 2 * "n" <repetition> >array { "v" } <effect> ]
|
||||
|
||||
WHERE
|
||||
|
||||
: >A ( seq -- A )
|
||||
[ N head >A/2 ]
|
||||
[ N tail >A/2 ] bi cord-append ;
|
||||
|
||||
\ A-boa
|
||||
{ N ndip A/2-boa cord-append } { A/2-boa } >quotation prefix >quotation
|
||||
BOA-EFFECT define-inline
|
||||
|
||||
: A-with ( n -- v )
|
||||
[ A/2-with ] [ A/2-with ] bi cord-append ;
|
||||
|
||||
: A-cast ( v -- v' )
|
||||
[ A/2-cast ] cord-map ;
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
<c-type>
|
||||
byte-array >>class
|
||||
A >>boxed-class
|
||||
[
|
||||
[ A-rep alien-vector A/2 boa ]
|
||||
[ 16 + A-rep alien-vector A/2 boa ] 2bi cord-append
|
||||
] >>getter
|
||||
[
|
||||
[ [ head>> underlying>> ] 2dip A-rep set-alien-vector ]
|
||||
[ [ tail>> underlying>> ] 2dip 16 + A-rep set-alien-vector ] 3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
16 >>align
|
||||
A-rep >>rep
|
||||
\ A typedef
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
: define-simd-128-cord ( A/2 T -- )
|
||||
[ define-specialized-cord ]
|
||||
[ create-in (define-simd-128-cord) ] 2bi ;
|
||||
|
||||
SYNTAX: SIMD-128-CORD:
|
||||
scan-word scan define-simd-128-cord ;
|
||||
|
||||
PRIVATE>
|
||||
>>
|
||||
|
||||
SIMD-128-CORD: char-16 char-32
|
||||
SIMD-128-CORD: uchar-16 uchar-32
|
||||
SIMD-128-CORD: short-8 short-16
|
||||
SIMD-128-CORD: ushort-8 ushort-16
|
||||
SIMD-128-CORD: int-4 int-8
|
||||
SIMD-128-CORD: uint-4 uint-8
|
||||
SIMD-128-CORD: longlong-2 longlong-4
|
||||
SIMD-128-CORD: ulonglong-2 ulonglong-4
|
||||
SIMD-128-CORD: float-4 float-8
|
||||
SIMD-128-CORD: double-2 double-4
|
||||
|
|
@ -1 +0,0 @@
|
|||
Slava Pestov
|
|
@ -1,524 +0,0 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs byte-arrays classes classes.algebra effects fry
|
||||
functors generalizations kernel literals locals math math.functions
|
||||
math.vectors math.vectors.private math.vectors.simd.intrinsics
|
||||
math.vectors.conversion.backend
|
||||
math.vectors.specialization parser prettyprint.custom sequences
|
||||
sequences.private strings words definitions macros cpu.architecture
|
||||
namespaces arrays quotations combinators combinators.short-circuit sets
|
||||
layouts ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
QUALIFIED: math.private
|
||||
IN: math.vectors.simd.functor
|
||||
|
||||
ERROR: bad-length got expected ;
|
||||
|
||||
: vector-true-value ( class -- value )
|
||||
{
|
||||
{ [ dup integer class<= ] [ drop -1 ] }
|
||||
{ [ dup float class<= ] [ drop -1 bits>double ] }
|
||||
} cond ; foldable
|
||||
|
||||
: vector-false-value ( class -- value )
|
||||
{
|
||||
{ [ dup integer class<= ] [ drop 0 ] }
|
||||
{ [ dup float class<= ] [ drop 0.0 ] }
|
||||
} cond ; foldable
|
||||
|
||||
: boolean>element ( bool/elt class -- elt )
|
||||
swap {
|
||||
{ t [ vector-true-value ] }
|
||||
{ f [ vector-false-value ] }
|
||||
[ nip ]
|
||||
} case ; inline
|
||||
|
||||
MACRO: simd-boa ( rep class -- simd-array )
|
||||
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
|
||||
|
||||
: can-be-unboxed? ( type -- ? )
|
||||
{
|
||||
{ c:float [ \ math.private:float+ "intrinsic" word-prop ] }
|
||||
{ c:double [ \ math.private:float+ "intrinsic" word-prop ] }
|
||||
[ c:heap-size cell < ]
|
||||
} case ;
|
||||
|
||||
: simd-boa-fast? ( rep -- ? )
|
||||
[ dup rep-gather-word supported-simd-op? ]
|
||||
[ rep-component-type can-be-unboxed? ]
|
||||
bi and ;
|
||||
|
||||
:: define-boa-custom-inlining ( word rep class -- )
|
||||
word [
|
||||
drop
|
||||
rep simd-boa-fast? [
|
||||
[ rep (simd-boa) class boa ]
|
||||
] [ word def>> ] if
|
||||
] "custom-inlining" set-word-prop ;
|
||||
|
||||
: simd-with ( rep class x -- simd-array )
|
||||
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
|
||||
|
||||
: simd-with/nth-fast? ( rep -- ? )
|
||||
[ \ (simd-vshuffle-elements) supported-simd-op? ]
|
||||
[ rep-component-type can-be-unboxed? ]
|
||||
bi and ;
|
||||
|
||||
:: define-with-custom-inlining ( word rep class -- )
|
||||
word [
|
||||
drop
|
||||
rep simd-with/nth-fast? [
|
||||
[ rep rep-coerce rep (simd-with) class boa ]
|
||||
] [ word def>> ] if
|
||||
] "custom-inlining" set-word-prop ;
|
||||
|
||||
: simd-nth-fast ( rep -- quot )
|
||||
[ rep-components ] keep
|
||||
'[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
|
||||
'[ swap >fixnum _ case ] ;
|
||||
|
||||
: simd-nth-slow ( rep -- quot )
|
||||
rep-component-type dup c:c-type-getter-boxer c:array-accessor ;
|
||||
|
||||
MACRO: simd-nth ( rep -- x )
|
||||
dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ;
|
||||
|
||||
: boa-effect ( rep n -- effect )
|
||||
[ rep-components ] dip *
|
||||
[ CHAR: a + 1string ] map
|
||||
{ "simd-vector" } <effect> ;
|
||||
|
||||
: supported-simd-ops ( assoc rep -- assoc' )
|
||||
[ simd-ops get ] dip
|
||||
'[ nip _ swap supported-simd-op? ] assoc-filter
|
||||
'[ drop _ key? ] assoc-filter ;
|
||||
|
||||
ERROR: bad-schema op schema ;
|
||||
|
||||
:: op-wrapper ( op specials schemas -- wrapper )
|
||||
op {
|
||||
[ specials at ]
|
||||
[ word-schema schemas at ]
|
||||
[ dup word-schema bad-schema ]
|
||||
} 1|| ;
|
||||
|
||||
: low-level-ops ( simd-ops specials schemas -- alist )
|
||||
'[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ;
|
||||
|
||||
:: high-level-ops ( ctor elt-class -- assoc )
|
||||
! Some SIMD operations are defined in terms of others.
|
||||
{
|
||||
{ vbroadcast [ swap nth ctor execute ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
{ v+n [ ctor execute v+ ] }
|
||||
{ n-v [ [ ctor execute ] dip v- ] }
|
||||
{ v-n [ ctor execute v- ] }
|
||||
{ n*v [ [ ctor execute ] dip v* ] }
|
||||
{ v*n [ ctor execute v* ] }
|
||||
{ n/v [ [ ctor execute ] dip v/ ] }
|
||||
{ v/n [ ctor execute v/ ] }
|
||||
{ norm-sq [ dup v. assert-positive ] }
|
||||
{ norm [ norm-sq sqrt ] }
|
||||
{ normalize [ dup norm v/n ] }
|
||||
}
|
||||
! To compute dot product and distance with integer vectors, we
|
||||
! have to do things less efficiently, with integer overflow checks,
|
||||
! in the general case.
|
||||
elt-class float = [ { distance [ v- norm ] } suffix ] when ;
|
||||
|
||||
TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
|
||||
|
||||
: define-simd ( simd -- )
|
||||
dup rep>> rep-component-type c:c-type-boxed-class >>elt-class
|
||||
{
|
||||
[ class>> ]
|
||||
[ elt-class>> ]
|
||||
[ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ]
|
||||
[ rep>> supported-simd-ops ]
|
||||
[ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
|
||||
} cleave
|
||||
specialize-vector-words ;
|
||||
|
||||
:: define-simd-128-type ( class rep -- )
|
||||
c:<c-type>
|
||||
byte-array >>class
|
||||
class >>boxed-class
|
||||
[ rep alien-vector class boa ] >>getter
|
||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||
16 >>size
|
||||
16 >>align
|
||||
16 >>align-first
|
||||
rep >>rep
|
||||
class c:typedef ;
|
||||
|
||||
: (define-simd-128) ( simd -- )
|
||||
simd-ops get >>ops
|
||||
[ define-simd ]
|
||||
[ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
|
||||
|
||||
FUNCTOR: define-simd-128 ( T -- )
|
||||
|
||||
N [ 16 T c:heap-size /i ]
|
||||
|
||||
A DEFINES-CLASS ${T}-${N}
|
||||
A-boa DEFINES ${A}-boa
|
||||
A-with DEFINES ${A}-with
|
||||
A-cast DEFINES ${A}-cast
|
||||
>A DEFINES >${A}
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
SET-NTH [ T dup c:c-setter c:array-accessor ]
|
||||
|
||||
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||
A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
|
||||
A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op
|
||||
|
||||
A-element-class [ A-rep rep-component-type c:c-type-boxed-class ]
|
||||
|
||||
WHERE
|
||||
|
||||
TUPLE: A
|
||||
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
|
||||
|
||||
INSTANCE: A simd-128
|
||||
|
||||
M: A clone underlying>> clone \ A boa ; inline
|
||||
|
||||
M: A length drop N ; inline
|
||||
|
||||
M: A equal?
|
||||
over \ A instance? [ v= vall? ] [ 2drop f ] if ;
|
||||
|
||||
M: A nth-unsafe underlying>> A-rep simd-nth ; inline
|
||||
|
||||
M: A set-nth-unsafe
|
||||
[ A-element-class boolean>element ] 2dip
|
||||
underlying>> SET-NTH call ; inline
|
||||
|
||||
: >A ( seq -- simd-array ) \ A new clone-like ;
|
||||
|
||||
M: A like drop dup \ A instance? [ >A ] unless ; inline
|
||||
|
||||
M: A new-underlying drop \ A boa ; inline
|
||||
|
||||
M: A new-sequence
|
||||
drop dup N =
|
||||
[ drop 16 <byte-array> \ A boa ]
|
||||
[ N bad-length ]
|
||||
if ; inline
|
||||
|
||||
M: A c:byte-length underlying>> length ; inline
|
||||
|
||||
M: A element-type drop A-rep rep-component-type ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
|
||||
|
||||
\ A-with \ A-rep \ A define-with-custom-inlining
|
||||
|
||||
\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
|
||||
|
||||
\ A-rep rep-gather-word [
|
||||
\ A-boa \ A-rep \ A define-boa-custom-inlining
|
||||
] when
|
||||
|
||||
: A-cast ( simd-array -- simd-array' )
|
||||
underlying>> \ A boa ; inline
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-vn->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-vv->n-op ( v1 v2 quot -- n )
|
||||
[ [ underlying>> ] bi@ A-rep ] dip call ; inline
|
||||
|
||||
: A-v->v-op ( v1 quot -- v2 )
|
||||
[ underlying>> A-rep ] dip call \ A boa ; inline
|
||||
|
||||
: A-v->n-op ( v quot -- n )
|
||||
[ underlying>> A-rep ] dip call ; inline
|
||||
|
||||
: A-v-conversion-op ( v1 to-type quot -- v2 )
|
||||
swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline
|
||||
|
||||
: A-vv-conversion-op ( v1 v2 to-type quot -- v2 )
|
||||
swap {
|
||||
[ underlying>> ]
|
||||
[ underlying>> A-rep ]
|
||||
[ call ]
|
||||
[ '[ _ boa ] call( u -- v ) ]
|
||||
} spread ; inline
|
||||
|
||||
simd new
|
||||
\ A >>class
|
||||
\ A-with >>ctor
|
||||
\ A-rep >>rep
|
||||
{
|
||||
{ (v>float) A-v-conversion-op }
|
||||
{ (v>integer) A-v-conversion-op }
|
||||
{ (vpack-signed) A-vv-conversion-op }
|
||||
{ (vpack-unsigned) A-vv-conversion-op }
|
||||
{ (vunpack-head) A-v-conversion-op }
|
||||
{ (vunpack-tail) A-v-conversion-op }
|
||||
} >>special-wrappers
|
||||
{
|
||||
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||
{ { +vector+ +any-vector+ -> +vector+ } A-vv->v-op }
|
||||
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||
{ { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
|
||||
{ { +vector+ +vector+ -> +boolean+ } A-vv->n-op }
|
||||
{ { +vector+ -> +vector+ } A-v->v-op }
|
||||
{ { +vector+ -> +scalar+ } A-v->n-op }
|
||||
{ { +vector+ -> +boolean+ } A-v->n-op }
|
||||
{ { +vector+ -> +nonnegative+ } A-v->n-op }
|
||||
} >>schema-wrappers
|
||||
(define-simd-128)
|
||||
|
||||
PRIVATE>
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
! Synthesize 256-bit vectors from a pair of 128-bit vectors
|
||||
SLOT: underlying1
|
||||
SLOT: underlying2
|
||||
|
||||
:: define-simd-256-type ( class rep -- )
|
||||
c:<c-type>
|
||||
class >>class
|
||||
class >>boxed-class
|
||||
[
|
||||
[ rep alien-vector ]
|
||||
[ 16 + >fixnum rep alien-vector ] 2bi
|
||||
class boa
|
||||
] >>getter
|
||||
[
|
||||
[ [ underlying1>> ] 2dip rep set-alien-vector ]
|
||||
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
|
||||
3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
16 >>align
|
||||
16 >>align-first
|
||||
rep >>rep
|
||||
class c:typedef ;
|
||||
|
||||
: (define-simd-256) ( simd -- )
|
||||
simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
|
||||
[ define-simd ]
|
||||
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
|
||||
|
||||
FUNCTOR: define-simd-256 ( T -- )
|
||||
|
||||
N [ 32 T c:heap-size /i ]
|
||||
|
||||
N/2 [ N 2 /i ]
|
||||
A/2 IS ${T}-${N/2}
|
||||
A/2-boa IS ${A/2}-boa
|
||||
A/2-with IS ${A/2}-with
|
||||
|
||||
A DEFINES-CLASS ${T}-${N}
|
||||
A-boa DEFINES ${A}-boa
|
||||
A-with DEFINES ${A}-with
|
||||
A-cast DEFINES ${A}-cast
|
||||
>A DEFINES >${A}
|
||||
A{ DEFINES ${A}{
|
||||
|
||||
A-deref DEFINES-PRIVATE ${A}-deref
|
||||
|
||||
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
|
||||
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
|
||||
A-v.-op DEFINES-PRIVATE ${A}-v.-op
|
||||
(A-v->n-op) DEFINES-PRIVATE (${A}-v->v-op)
|
||||
A-sum-op DEFINES-PRIVATE ${A}-sum-op
|
||||
A-vany-op DEFINES-PRIVATE ${A}-vany-op
|
||||
A-vall-op DEFINES-PRIVATE ${A}-vall-op
|
||||
A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op
|
||||
A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op
|
||||
A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
|
||||
A-vpack-op DEFINES-PRIVATE ${A}-vpack-op
|
||||
A-vunpack-head-op DEFINES-PRIVATE ${A}-vunpack-head-op
|
||||
A-vunpack-tail-op DEFINES-PRIVATE ${A}-vunpack-tail-op
|
||||
|
||||
WHERE
|
||||
|
||||
SLOT: underlying1
|
||||
SLOT: underlying2
|
||||
|
||||
TUPLE: A
|
||||
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
|
||||
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
|
||||
|
||||
INSTANCE: A simd-256
|
||||
|
||||
M: A clone
|
||||
[ underlying1>> clone ] [ underlying2>> clone ] bi
|
||||
\ A boa ; inline
|
||||
|
||||
M: A length drop N ; inline
|
||||
|
||||
M: A equal?
|
||||
over \ A instance? [ v= vall? ] [ 2drop f ] if ;
|
||||
|
||||
: A-deref ( n seq -- n' seq' )
|
||||
over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline
|
||||
|
||||
M: A nth-unsafe A-deref nth-unsafe ; inline
|
||||
|
||||
M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
|
||||
|
||||
: >A ( seq -- simd-array ) \ A new clone-like ;
|
||||
|
||||
M: A like drop dup \ A instance? [ >A ] unless ; inline
|
||||
|
||||
M: A new-sequence
|
||||
drop dup N =
|
||||
[ drop 16 <byte-array> 16 <byte-array> \ A boa ]
|
||||
[ N bad-length ]
|
||||
if ; inline
|
||||
|
||||
M: A c:byte-length drop 32 ; inline
|
||||
|
||||
M: A element-type drop A-rep rep-component-type ;
|
||||
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
: A-with ( x -- simd-array )
|
||||
[ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
|
||||
\ A boa ; inline
|
||||
|
||||
: A-boa ( ... -- simd-array )
|
||||
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
|
||||
\ A boa ; inline
|
||||
|
||||
\ A-rep 2 boa-effect \ A-boa set-stack-effect
|
||||
|
||||
: A-cast ( simd-array -- simd-array' )
|
||||
[ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
: A-vv->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
|
||||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-vn->v-op ( v1 v2 quot -- v3 )
|
||||
[ [ [ underlying1>> ] dip A-rep ] dip call ]
|
||||
[ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v->v-op ( v1 combine-quot -- v2 )
|
||||
[ [ underlying1>> A-rep ] dip call ]
|
||||
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v.-op ( v1 v2 quot -- n )
|
||||
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
|
||||
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
|
||||
+ ; inline
|
||||
|
||||
: (A-v->n-op) ( v1 quot reduce-quot -- n )
|
||||
'[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline
|
||||
|
||||
: A-sum-op ( v1 quot -- n )
|
||||
[ (simd-v+) ] (A-v->n-op) ; inline
|
||||
|
||||
: A-vany-op ( v1 quot -- n )
|
||||
[ (simd-vbitor) ] (A-v->n-op) ; inline
|
||||
: A-vall-op ( v1 quot -- n )
|
||||
[ (simd-vbitand) ] (A-v->n-op) ; inline
|
||||
|
||||
: A-vmerge-head-op ( v1 v2 quot -- v )
|
||||
drop
|
||||
[ underlying1>> ] bi@
|
||||
[ A-rep (simd-(vmerge-head)) ]
|
||||
[ A-rep (simd-(vmerge-tail)) ] 2bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-vmerge-tail-op ( v1 v2 quot -- v )
|
||||
drop
|
||||
[ underlying2>> ] bi@
|
||||
[ A-rep (simd-(vmerge-head)) ]
|
||||
[ A-rep (simd-(vmerge-tail)) ] 2bi
|
||||
\ A boa ; inline
|
||||
|
||||
: A-v-conversion-op ( v1 to-type quot -- v )
|
||||
swap [
|
||||
[ [ underlying1>> A-rep ] dip call ]
|
||||
[ [ underlying2>> A-rep ] dip call ] 2bi
|
||||
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
|
||||
|
||||
: A-vpack-op ( v1 v2 to-type quot -- v )
|
||||
swap [
|
||||
'[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi*
|
||||
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
|
||||
|
||||
: A-vunpack-head-op ( v1 to-type quot -- v )
|
||||
'[
|
||||
underlying1>>
|
||||
[ A-rep @ ]
|
||||
[ A-rep (simd-(vunpack-tail)) ] bi
|
||||
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
|
||||
|
||||
: A-vunpack-tail-op ( v1 to-type quot -- v )
|
||||
'[
|
||||
underlying2>>
|
||||
[ A-rep (simd-(vunpack-head)) ]
|
||||
[ A-rep @ ] bi
|
||||
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
|
||||
|
||||
simd new
|
||||
\ A >>class
|
||||
\ A-with >>ctor
|
||||
\ A-rep >>rep
|
||||
{
|
||||
{ v. A-v.-op }
|
||||
{ sum A-sum-op }
|
||||
{ vnone? A-vany-op }
|
||||
{ vany? A-vany-op }
|
||||
{ vall? A-vall-op }
|
||||
{ (vmerge-head) A-vmerge-head-op }
|
||||
{ (vmerge-tail) A-vmerge-tail-op }
|
||||
{ (v>integer) A-v-conversion-op }
|
||||
{ (v>float) A-v-conversion-op }
|
||||
{ (vpack-signed) A-vpack-op }
|
||||
{ (vpack-unsigned) A-vpack-op }
|
||||
{ (vunpack-head) A-vunpack-head-op }
|
||||
{ (vunpack-tail) A-vunpack-tail-op }
|
||||
} >>special-wrappers
|
||||
{
|
||||
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
|
||||
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
|
||||
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
|
||||
{ { +vector+ -> +vector+ } A-v->v-op }
|
||||
} >>schema-wrappers
|
||||
(define-simd-256)
|
||||
|
||||
;FUNCTOR
|
|
@ -1 +0,0 @@
|
|||
Slava Pestov
|
|
@ -1,18 +0,0 @@
|
|||
IN: math.vectors.simd.intrinsics.tests
|
||||
USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
|
||||
|
||||
[ 16 ] [ uchar-16-rep rep-components ] unit-test
|
||||
[ 16 ] [ char-16-rep rep-components ] unit-test
|
||||
[ 8 ] [ ushort-8-rep rep-components ] unit-test
|
||||
[ 8 ] [ short-8-rep rep-components ] unit-test
|
||||
[ 4 ] [ uint-4-rep rep-components ] unit-test
|
||||
[ 4 ] [ int-4-rep rep-components ] unit-test
|
||||
[ 4 ] [ float-4-rep rep-components ] unit-test
|
||||
[ 2 ] [ double-2-rep rep-components ] unit-test
|
||||
|
||||
{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
|
||||
{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
|
||||
{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
|
||||
{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as
|
||||
|
||||
|
|
@ -1,207 +1,236 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien alien.c-types alien.data assocs combinators
|
||||
cpu.architecture compiler.cfg.comparisons fry generalizations
|
||||
kernel libc macros math
|
||||
math.vectors.conversion.backend
|
||||
sequences sets effects accessors namespaces
|
||||
lexer parser vocabs.parser words arrays math.vectors ;
|
||||
! (c)2009 Slava Pestov, Joe Groff bsd license
|
||||
USING: accessors alien alien.c-types alien.data combinators
|
||||
sequences.cords cpu.architecture fry generalizations kernel
|
||||
libc locals math math.libm math.order math.ranges math.vectors
|
||||
sequences sequences.private specialized-arrays vocabs.loader ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SPECIALIZED-ARRAYS:
|
||||
c:char c:short c:int c:longlong
|
||||
c:uchar c:ushort c:uint c:ulonglong
|
||||
c:float c:double ;
|
||||
IN: math.vectors.simd.intrinsics
|
||||
|
||||
ERROR: bad-simd-call word ;
|
||||
|
||||
<<
|
||||
|
||||
: simd-effect ( word -- effect )
|
||||
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
|
||||
: simd-conversion-effect ( word -- effect )
|
||||
stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi <effect> ;
|
||||
|
||||
SYMBOL: simd-ops
|
||||
|
||||
V{ } clone simd-ops set-global
|
||||
|
||||
: (SIMD-OP:) ( accum quot -- accum )
|
||||
[
|
||||
scan-word dup name>> "(simd-" ")" surround create-in
|
||||
[ nip dup '[ _ bad-simd-call ] define ]
|
||||
] dip
|
||||
'[ _ dip set-stack-effect ]
|
||||
[ 2array simd-ops get push ]
|
||||
2tri ; inline
|
||||
|
||||
SYNTAX: SIMD-OP:
|
||||
[ simd-effect ] (SIMD-OP:) ;
|
||||
|
||||
SYNTAX: SIMD-CONVERSION-OP:
|
||||
[ simd-conversion-effect ] (SIMD-OP:) ;
|
||||
|
||||
>>
|
||||
|
||||
SIMD-OP: v+
|
||||
SIMD-OP: v-
|
||||
SIMD-OP: vneg
|
||||
SIMD-OP: v+-
|
||||
SIMD-OP: vs+
|
||||
SIMD-OP: vs-
|
||||
SIMD-OP: vs*
|
||||
SIMD-OP: v*
|
||||
SIMD-OP: v/
|
||||
SIMD-OP: vmin
|
||||
SIMD-OP: vmax
|
||||
SIMD-OP: v.
|
||||
SIMD-OP: vsqrt
|
||||
SIMD-OP: sum
|
||||
SIMD-OP: vabs
|
||||
SIMD-OP: vbitand
|
||||
SIMD-OP: vbitandn
|
||||
SIMD-OP: vbitor
|
||||
SIMD-OP: vbitxor
|
||||
SIMD-OP: vbitnot
|
||||
SIMD-OP: vand
|
||||
SIMD-OP: vandn
|
||||
SIMD-OP: vor
|
||||
SIMD-OP: vxor
|
||||
SIMD-OP: vnot
|
||||
SIMD-OP: vlshift
|
||||
SIMD-OP: vrshift
|
||||
SIMD-OP: hlshift
|
||||
SIMD-OP: hrshift
|
||||
SIMD-OP: vshuffle-elements
|
||||
SIMD-OP: vshuffle-bytes
|
||||
SIMD-OP: (vmerge-head)
|
||||
SIMD-OP: (vmerge-tail)
|
||||
SIMD-OP: v<=
|
||||
SIMD-OP: v<
|
||||
SIMD-OP: v=
|
||||
SIMD-OP: v>
|
||||
SIMD-OP: v>=
|
||||
SIMD-OP: vunordered?
|
||||
SIMD-OP: vany?
|
||||
SIMD-OP: vall?
|
||||
SIMD-OP: vnone?
|
||||
|
||||
SIMD-CONVERSION-OP: (v>float)
|
||||
SIMD-CONVERSION-OP: (v>integer)
|
||||
SIMD-CONVERSION-OP: (vpack-signed)
|
||||
SIMD-CONVERSION-OP: (vpack-unsigned)
|
||||
SIMD-CONVERSION-OP: (vunpack-head)
|
||||
SIMD-CONVERSION-OP: (vunpack-tail)
|
||||
|
||||
: (simd-with) ( x rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
|
||||
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
|
||||
: (simd-select) ( v n rep -- x ) bad-simd-call ;
|
||||
|
||||
: assert-positive ( x -- y ) ;
|
||||
|
||||
: alien-vector ( c-ptr n rep -- value )
|
||||
! Inefficient version for when intrinsics are missing
|
||||
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
|
||||
<PRIVATE
|
||||
|
||||
: set-alien-vector ( value c-ptr n rep -- )
|
||||
! Inefficient version for when intrinsics are missing
|
||||
: >bitwise-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ float-4-rep [ uint-4-rep ] }
|
||||
{ double-2-rep [ ulonglong-2-rep ] }
|
||||
[ ]
|
||||
} case ; foldable
|
||||
|
||||
: >uint-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ longlong-2-rep [ ulonglong-2-rep ] }
|
||||
{ int-4-rep [ uint-4-rep ] }
|
||||
{ short-8-rep [ ushort-8-rep ] }
|
||||
{ char-16-rep [ uchar-16-rep ] }
|
||||
[ ]
|
||||
} case ; foldable
|
||||
|
||||
: >int-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ float-4-rep [ int-4-rep ] }
|
||||
{ double-2-rep [ longlong-2-rep ] }
|
||||
} case ; foldable
|
||||
|
||||
: >float-vector-rep ( rep -- rep' )
|
||||
{
|
||||
{ int-4-rep [ float-4-rep ] }
|
||||
{ longlong-2-rep [ double-2-rep ] }
|
||||
} case ; foldable
|
||||
|
||||
: [byte>rep-array] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ byte-array>char-array ] ] }
|
||||
{ uchar-16-rep [ [ byte-array>uchar-array ] ] }
|
||||
{ short-8-rep [ [ byte-array>short-array ] ] }
|
||||
{ ushort-8-rep [ [ byte-array>ushort-array ] ] }
|
||||
{ int-4-rep [ [ byte-array>int-array ] ] }
|
||||
{ uint-4-rep [ [ byte-array>uint-array ] ] }
|
||||
{ longlong-2-rep [ [ byte-array>longlong-array ] ] }
|
||||
{ ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] }
|
||||
{ float-4-rep [ [ byte-array>float-array ] ] }
|
||||
{ double-2-rep [ [ byte-array>double-array ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: [>rep-array] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ >char-array ] ] }
|
||||
{ uchar-16-rep [ [ >uchar-array ] ] }
|
||||
{ short-8-rep [ [ >short-array ] ] }
|
||||
{ ushort-8-rep [ [ >ushort-array ] ] }
|
||||
{ int-4-rep [ [ >int-array ] ] }
|
||||
{ uint-4-rep [ [ >uint-array ] ] }
|
||||
{ longlong-2-rep [ [ >longlong-array ] ] }
|
||||
{ ulonglong-2-rep [ [ >ulonglong-array ] ] }
|
||||
{ float-4-rep [ [ >float-array ] ] }
|
||||
{ double-2-rep [ [ >double-array ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: [<rep-array>] ( rep -- class )
|
||||
{
|
||||
{ char-16-rep [ [ 16 (char-array) ] ] }
|
||||
{ uchar-16-rep [ [ 16 (uchar-array) ] ] }
|
||||
{ short-8-rep [ [ 8 (short-array) ] ] }
|
||||
{ ushort-8-rep [ [ 8 (ushort-array) ] ] }
|
||||
{ int-4-rep [ [ 4 (int-array) ] ] }
|
||||
{ uint-4-rep [ [ 4 (uint-array) ] ] }
|
||||
{ longlong-2-rep [ [ 2 (longlong-array) ] ] }
|
||||
{ ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] }
|
||||
{ float-4-rep [ [ 4 (float-array) ] ] }
|
||||
{ double-2-rep [ [ 2 (double-array) ] ] }
|
||||
} case ; foldable
|
||||
|
||||
: rep-tf-values ( rep -- t f )
|
||||
float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
|
||||
|
||||
: >rep-array ( a rep -- a' )
|
||||
[byte>rep-array] call( a -- a' ) ; inline
|
||||
: 2>rep-array ( a b rep -- a' b' )
|
||||
[byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
|
||||
: <rep-array> ( rep -- a' )
|
||||
[<rep-array>] call( -- a' ) ; inline
|
||||
|
||||
: components-map ( a rep quot -- c )
|
||||
[ >rep-array ] dip map underlying>> ; inline
|
||||
: components-2map ( a b rep quot -- c )
|
||||
[ 2>rep-array ] dip 2map underlying>> ; inline
|
||||
: components-reduce ( a rep quot -- x )
|
||||
[ >rep-array [ ] ] dip map-reduce ; inline
|
||||
|
||||
: bitwise-components-map ( a rep quot -- c )
|
||||
[ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
|
||||
: bitwise-components-2map ( a b rep quot -- c )
|
||||
[ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
|
||||
: bitwise-components-reduce ( a rep quot -- x )
|
||||
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
|
||||
|
||||
:: (vshuffle) ( a elts rep -- c )
|
||||
a rep >rep-array :> a'
|
||||
rep <rep-array> :> c'
|
||||
elts [| from to |
|
||||
from rep rep-length 1 - bitand
|
||||
a' nth-unsafe
|
||||
to c' set-nth-unsafe
|
||||
] each-index
|
||||
c' underlying>> ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
|
||||
: (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
|
||||
: (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
|
||||
:: (simd-v+-) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
0 rep rep-length 1 - 2 <range> [| n |
|
||||
n a' nth-unsafe n b' nth-unsafe -
|
||||
n c' set-nth-unsafe
|
||||
|
||||
n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
|
||||
n 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
: (simd-vs+) ( a b rep -- c )
|
||||
dup rep-component-type '[ + _ c-type-clamp ] components-2map ;
|
||||
: (simd-vs-) ( a b rep -- c )
|
||||
dup rep-component-type '[ - _ c-type-clamp ] components-2map ;
|
||||
: (simd-vs*) ( a b rep -- c )
|
||||
dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
|
||||
: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
|
||||
: (simd-v/) ( a b rep -- c ) [ / ] components-2map ;
|
||||
: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
|
||||
: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
|
||||
: (simd-v.) ( a b rep -- n )
|
||||
[ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
|
||||
1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
|
||||
: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
|
||||
: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
|
||||
: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
|
||||
: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
|
||||
: (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
|
||||
: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
|
||||
: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
|
||||
: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
|
||||
: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
|
||||
: (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
|
||||
: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
|
||||
: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
|
||||
: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
|
||||
: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
|
||||
: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
|
||||
: (simd-hlshift) ( a n rep -- c )
|
||||
drop head-slice* 16 0 pad-head ;
|
||||
: (simd-hrshift) ( a n rep -- c )
|
||||
drop tail-slice 16 0 pad-tail ;
|
||||
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
|
||||
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
|
||||
:: (simd-vmerge-head) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
rep rep-length 2 /i iota [| n |
|
||||
n a' nth-unsafe n 2 * c' set-nth-unsafe
|
||||
n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
:: (simd-vmerge-tail) ( a b rep -- c )
|
||||
a b rep 2>rep-array :> ( a' b' )
|
||||
rep <rep-array> :> c'
|
||||
rep rep-length 2 /i :> len
|
||||
len iota [| n |
|
||||
n len + a' nth-unsafe n 2 * c' set-nth-unsafe
|
||||
n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
|
||||
] each
|
||||
c' underlying>> ;
|
||||
: (simd-v<=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ <= _ _ ? ] components-2map ;
|
||||
: (simd-v<) ( a b rep -- c )
|
||||
dup rep-tf-values '[ < _ _ ? ] components-2map ;
|
||||
: (simd-v=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ = _ _ ? ] components-2map ;
|
||||
: (simd-v>) ( a b rep -- c )
|
||||
dup rep-tf-values '[ > _ _ ? ] components-2map ;
|
||||
: (simd-v>=) ( a b rep -- c )
|
||||
dup rep-tf-values '[ >= _ _ ? ] components-2map ;
|
||||
: (simd-vunordered?) ( a b rep -- c )
|
||||
dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
|
||||
: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
|
||||
: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
|
||||
: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
|
||||
: (simd-v>float) ( a rep -- c )
|
||||
[ >rep-array [ >float ] ] [ >float-vector-rep <rep-array> ] bi map-as underlying>> ;
|
||||
: (simd-v>integer) ( a rep -- c )
|
||||
[ >rep-array [ >integer ] ] [ >int-vector-rep <rep-array> ] bi map-as underlying>> ;
|
||||
: (simd-vpack-signed) ( a b rep -- c )
|
||||
[ 2>rep-array cord-append ]
|
||||
[ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
|
||||
'[ _ c-type-clamp ] swap map-as underlying>> ;
|
||||
: (simd-vpack-unsigned) ( a b rep -- c )
|
||||
[ 2>rep-array cord-append ]
|
||||
[ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
|
||||
'[ _ c-type-clamp ] swap map-as underlying>> ;
|
||||
: (simd-vunpack-head) ( a rep -- c )
|
||||
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
|
||||
[ head-slice ] dip call( a' -- c' ) underlying>> ;
|
||||
: (simd-vunpack-tail) ( a rep -- c )
|
||||
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
|
||||
[ tail-slice ] dip call( a' -- c' ) underlying>> ;
|
||||
: (simd-with) ( n rep -- v )
|
||||
[ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as
|
||||
underlying>> ;
|
||||
: (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
|
||||
: (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
|
||||
: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
|
||||
|
||||
: alien-vector ( c-ptr n rep -- value )
|
||||
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
|
||||
: set-alien-vector ( value c-ptr n rep -- )
|
||||
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
|
||||
|
||||
<<
|
||||
"compiler.cfg.intrinsics.simd" require
|
||||
"compiler.tree.propagation.simd" require
|
||||
"compiler.cfg.value-numbering.simd" require
|
||||
|
||||
: rep-components ( rep -- n )
|
||||
16 swap rep-component-type heap-size /i ; foldable
|
||||
|
||||
: rep-coercer ( rep -- quot )
|
||||
{
|
||||
{ [ dup int-vector-rep? ] [ [ >fixnum ] ] }
|
||||
{ [ dup float-vector-rep? ] [ [ >float ] ] }
|
||||
} cond nip ; foldable
|
||||
|
||||
: rep-coerce ( value rep -- value' )
|
||||
rep-coercer call( value -- value' ) ; inline
|
||||
|
||||
CONSTANT: rep-gather-words
|
||||
{
|
||||
{ 2 (simd-gather-2) }
|
||||
{ 4 (simd-gather-4) }
|
||||
}
|
||||
|
||||
: rep-gather-word ( rep -- word )
|
||||
rep-components rep-gather-words at ;
|
||||
|
||||
>>
|
||||
|
||||
MACRO: (simd-boa) ( rep -- quot )
|
||||
{
|
||||
[ rep-coercer ]
|
||||
[ rep-components ]
|
||||
[ ]
|
||||
[ rep-gather-word ]
|
||||
} cleave
|
||||
'[ _ _ napply _ _ execute ] ;
|
||||
|
||||
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||
|
||||
: (%unpack-reps) ( -- reps )
|
||||
%merge-vector-reps [ int-vector-rep? ] filter
|
||||
%unpack-vector-head-reps union ;
|
||||
|
||||
: (%abs-reps) ( -- reps )
|
||||
cc> %compare-vector-reps [ int-vector-rep? ] filter
|
||||
%xor-vector-reps [ float-vector-rep? ] filter
|
||||
union
|
||||
[ { } ] [ { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ] if-empty ;
|
||||
|
||||
: (%shuffle-imm-reps) ( -- reps )
|
||||
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
|
||||
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
||||
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
|
||||
{ \ (simd-vneg) [ %sub-vector-reps ] }
|
||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||
{ \ (simd-vmin) [ %min-vector-reps cc< %compare-vector-reps union ] }
|
||||
{ \ (simd-vmax) [ %max-vector-reps cc> %compare-vector-reps union ] }
|
||||
{ \ (simd-v.) [ %dot-vector-reps ] }
|
||||
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
|
||||
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
|
||||
{ \ (simd-vabs) [ (%abs-reps) ] }
|
||||
{ \ (simd-vbitand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
|
||||
{ \ (simd-vbitor) [ %or-vector-reps ] }
|
||||
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
|
||||
{ \ (simd-vbitnot) [ %xor-vector-reps ] }
|
||||
{ \ (simd-vand) [ %and-vector-reps ] }
|
||||
{ \ (simd-vandn) [ %andn-vector-reps ] }
|
||||
{ \ (simd-vor) [ %or-vector-reps ] }
|
||||
{ \ (simd-vxor) [ %xor-vector-reps ] }
|
||||
{ \ (simd-vnot) [ %xor-vector-reps ] }
|
||||
{ \ (simd-vlshift) [ %shl-vector-reps ] }
|
||||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-imm-reps ] }
|
||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-imm-reps ] }
|
||||
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
|
||||
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
|
||||
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }
|
||||
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
|
||||
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
|
||||
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
||||
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
|
||||
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
|
||||
{ \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] }
|
||||
{ \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] }
|
||||
{ \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] }
|
||||
{ \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] }
|
||||
{ \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] }
|
||||
{ \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] }
|
||||
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
|
||||
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
|
||||
{ \ (simd-vany?) [ %test-vector-reps ] }
|
||||
{ \ (simd-vall?) [ %test-vector-reps ] }
|
||||
{ \ (simd-vnone?) [ %test-vector-reps ] }
|
||||
} case member? ;
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
USING: math.vectors.simd mirrors ;
|
||||
IN: math.vectors.simd.mirrors
|
||||
INSTANCE: simd-128 enumerated-sequence
|
|
@ -1,6 +1,6 @@
|
|||
USING: classes.tuple.private cpu.architecture help.markup
|
||||
help.syntax kernel.private math math.vectors
|
||||
math.vectors.simd.intrinsics sequences ;
|
||||
help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
|
||||
sequences ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
|
||||
|
@ -19,11 +19,11 @@ $nl
|
|||
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
|
||||
"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
|
||||
$nl
|
||||
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
|
||||
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } ")."
|
||||
$nl
|
||||
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
|
||||
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } ") and integer SIMD (all types). Integer SIMD is missing a few features; in particular, the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
|
||||
$nl
|
||||
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
|
||||
"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
|
||||
$nl
|
||||
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
|
||||
$nl
|
||||
|
@ -36,47 +36,18 @@ $nl
|
|||
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
|
||||
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
|
||||
$nl
|
||||
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
|
||||
{ $subsections
|
||||
POSTPONE: SIMD:
|
||||
POSTPONE: SIMDS:
|
||||
}
|
||||
"The following scalar types are supported:"
|
||||
{ $code
|
||||
"char"
|
||||
"uchar"
|
||||
"short"
|
||||
"ushort"
|
||||
"int"
|
||||
"uint"
|
||||
"longlong"
|
||||
"ulonglong"
|
||||
"float"
|
||||
"double"
|
||||
}
|
||||
|
||||
"The following vector types are generated from the above scalar types:"
|
||||
"The following vector types are available:"
|
||||
{ $code
|
||||
"char-16"
|
||||
"uchar-16"
|
||||
"char-32"
|
||||
"uchar-32"
|
||||
"short-8"
|
||||
"ushort-8"
|
||||
"short-16"
|
||||
"ushort-16"
|
||||
"int-4"
|
||||
"uint-4"
|
||||
"int-8"
|
||||
"uint-8"
|
||||
"longlong-2"
|
||||
"ulonglong-2"
|
||||
"longlong-4"
|
||||
"ulonglong-4"
|
||||
"float-4"
|
||||
"float-8"
|
||||
"double-2"
|
||||
"double-4"
|
||||
} ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
|
||||
|
@ -103,19 +74,17 @@ $nl
|
|||
{ $code
|
||||
"""USING: compiler.tree.debugger math.vectors
|
||||
math.vectors.simd ;
|
||||
SIMD: double
|
||||
SYMBOLS: x y ;
|
||||
|
||||
[
|
||||
double-4{ 1.5 2.0 3.7 0.4 } x set
|
||||
double-4{ 1.5 2.0 3.7 0.4 } y set
|
||||
float-4{ 1.5 2.0 3.7 0.4 } x set
|
||||
float-4{ 1.5 2.0 3.7 0.4 } y set
|
||||
x get y get v+
|
||||
] optimizer-report.""" }
|
||||
"The following word benefits from SIMD optimization, because it begins with an unsafe declaration:"
|
||||
{ $code
|
||||
"""USING: compiler.tree.debugger kernel.private
|
||||
math.vectors math.vectors.simd ;
|
||||
SIMD: float
|
||||
IN: simd-demo
|
||||
|
||||
: interpolate ( v a b -- w )
|
||||
|
@ -129,7 +98,6 @@ $nl
|
|||
{ $code
|
||||
"""USING: compiler.tree.debugger hints
|
||||
math.vectors math.vectors.simd ;
|
||||
SIMD: float
|
||||
IN: simd-demo
|
||||
|
||||
: interpolate ( v a b -- w )
|
||||
|
@ -145,7 +113,6 @@ $nl
|
|||
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
|
||||
{ $code
|
||||
"""USING: compiler.tree.debugger math.vectors math.vectors.simd ;
|
||||
SIMD: float
|
||||
IN: simd-demo
|
||||
|
||||
STRUCT: actor
|
||||
|
@ -182,7 +149,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
|
|||
{ $list
|
||||
"They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
|
||||
"They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
|
||||
{ "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." }
|
||||
}
|
||||
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
|
||||
$nl
|
||||
|
@ -203,7 +169,7 @@ $nl
|
|||
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
|
||||
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
|
||||
$nl
|
||||
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
|
||||
"In particular, horizontal operations on " { $snippet "float-4" } " vectors are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
||||
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
|
||||
|
@ -218,16 +184,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
|||
"math.vectors.simd.intrinsics"
|
||||
} ;
|
||||
|
||||
HELP: SIMD:
|
||||
{ $syntax "SIMD: type" }
|
||||
{ $values { "type" "a scalar C type" } }
|
||||
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
|
||||
|
||||
HELP: SIMDS:
|
||||
{ $syntax "SIMDS: type type type ... ;" }
|
||||
{ $values { "type" "a scalar C type" } }
|
||||
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
|
||||
|
||||
{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words
|
||||
|
||||
ABOUT: "math.vectors.simd"
|
||||
|
|
|
@ -3,22 +3,14 @@ effects fry io kernel kernel.private math math.functions
|
|||
math.private math.vectors math.vectors.simd
|
||||
math.vectors.simd.private prettyprint random sequences system
|
||||
tools.test vocabs assocs compiler.cfg.debugger words
|
||||
locals math.vectors.specialization combinators cpu.architecture
|
||||
math.vectors.conversion.backend
|
||||
math.vectors.simd.intrinsics namespaces byte-arrays alien
|
||||
locals combinators cpu.architecture namespaces byte-arrays alien
|
||||
specialized-arrays classes.struct eval classes.algebra sets
|
||||
quotations math.constants compiler.units ;
|
||||
quotations math.constants compiler.units splitting ;
|
||||
FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SPECIALIZED-ARRAY: c:float
|
||||
SIMD: c:char
|
||||
SIMDS: c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double ;
|
||||
IN: math.vectors.simd.tests
|
||||
|
||||
! Make sure the functor doesn't generate bogus vocabularies
|
||||
2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times
|
||||
|
||||
[ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test
|
||||
|
||||
! Test type propagation
|
||||
[ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
|
||||
|
||||
|
@ -38,10 +30,6 @@ IN: math.vectors.simd.tests
|
|||
|
||||
[ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test
|
||||
|
||||
[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test
|
||||
|
||||
[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test
|
||||
|
||||
! Test puns; only on x86
|
||||
cpu x86? [
|
||||
[ double-2{ 4 1024 } ] [
|
||||
|
@ -55,26 +43,76 @@ CONSTANT: simd-classes
|
|||
{
|
||||
char-16
|
||||
uchar-16
|
||||
char-32
|
||||
uchar-32
|
||||
short-8
|
||||
ushort-8
|
||||
short-16
|
||||
ushort-16
|
||||
int-4
|
||||
uint-4
|
||||
int-8
|
||||
uint-8
|
||||
longlong-2
|
||||
ulonglong-2
|
||||
longlong-4
|
||||
ulonglong-4
|
||||
float-4
|
||||
float-8
|
||||
double-2
|
||||
double-4
|
||||
}
|
||||
|
||||
SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
|
||||
|
||||
CONSTANT: vector-words
|
||||
H{
|
||||
{ [v-] { +vector+ +vector+ -> +vector+ } }
|
||||
{ distance { +vector+ +vector+ -> +nonnegative+ } }
|
||||
{ n*v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n+v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n-v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n/v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ norm { +vector+ -> +nonnegative+ } }
|
||||
{ norm-sq { +vector+ -> +nonnegative+ } }
|
||||
{ normalize { +vector+ -> +vector+ } }
|
||||
{ v* { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs* { +vector+ +vector+ -> +vector+ } }
|
||||
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v- { +vector+ +vector+ -> +vector+ } }
|
||||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vs- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v. { +vector+ +vector+ -> +scalar+ } }
|
||||
{ v/ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v/n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ vceiling { +vector+ -> +vector+ } }
|
||||
{ vfloor { +vector+ -> +vector+ } }
|
||||
{ vmax { +vector+ +vector+ -> +vector+ } }
|
||||
{ vmin { +vector+ +vector+ -> +vector+ } }
|
||||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vtruncate { +vector+ -> +vector+ } }
|
||||
{ sum { +vector+ -> +scalar+ } }
|
||||
{ vabs { +vector+ -> +vector+ } }
|
||||
{ vsqrt { +vector+ -> +vector+ } }
|
||||
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitandn { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitnot { +vector+ -> +vector+ } }
|
||||
{ vand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vandn { +vector+ +vector+ -> +vector+ } }
|
||||
{ vor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vxor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vnot { +vector+ -> +vector+ } }
|
||||
{ vlshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
|
||||
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
|
||||
{ v<= { +vector+ +vector+ -> +vector+ } }
|
||||
{ v< { +vector+ +vector+ -> +vector+ } }
|
||||
{ v= { +vector+ +vector+ -> +vector+ } }
|
||||
{ v> { +vector+ +vector+ -> +vector+ } }
|
||||
{ v>= { +vector+ +vector+ -> +vector+ } }
|
||||
{ vunordered? { +vector+ +vector+ -> +vector+ } }
|
||||
}
|
||||
|
||||
: vector-word-inputs ( schema -- seq ) { -> } split first ;
|
||||
|
||||
: with-ctors ( -- seq )
|
||||
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
|
||||
|
||||
|
@ -82,7 +120,7 @@ CONSTANT: simd-classes
|
|||
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
|
||||
|
||||
: check-optimizer ( seq quot eq-quot -- failures )
|
||||
'[
|
||||
dup '[
|
||||
@
|
||||
[ dup [ class ] { } map-as ] dip '[ _ declare @ ]
|
||||
{
|
||||
|
@ -90,8 +128,9 @@ CONSTANT: simd-classes
|
|||
[ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
|
||||
[ [ [ call ] dip call ] call( quot quot -- result ) ]
|
||||
[ [ [ call ] dip compile-call ] call( quot quot -- result ) ]
|
||||
[ [ t "always-inline-simd-intrinsics" [ [ call ] dip compile-call ] with-variable ] call( quot quot -- result ) ]
|
||||
} 2cleave
|
||||
@ not
|
||||
[ drop @ ] [ nip @ ] 3bi and not
|
||||
] filter ; inline
|
||||
|
||||
"== Checking -new constructors" print
|
||||
|
@ -166,26 +205,15 @@ CONSTANT: simd-classes
|
|||
: remove-boolean-words ( alist -- alist' )
|
||||
boolean-ops unique assoc-diff ;
|
||||
|
||||
: remove-special-words ( alist -- alist' )
|
||||
! These have their own tests later
|
||||
{
|
||||
hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
|
||||
vany? vall? vnone?
|
||||
(v>float) (v>integer)
|
||||
(vpack-signed) (vpack-unsigned)
|
||||
(vunpack-head) (vunpack-tail)
|
||||
} unique assoc-diff ;
|
||||
|
||||
: ops-to-check ( elt-class -- alist )
|
||||
[ vector-words >alist ] dip
|
||||
float = [ remove-integer-words ] [ remove-float-words ] if
|
||||
remove-boolean-words
|
||||
remove-special-words ;
|
||||
remove-boolean-words ;
|
||||
|
||||
: check-vector-ops ( class elt-class compare-quot -- )
|
||||
[
|
||||
[ nip ops-to-check ] 2keep
|
||||
'[ first2 inputs _ _ check-vector-op ]
|
||||
'[ first2 vector-word-inputs _ _ check-vector-op ]
|
||||
] dip check-optimizer ; inline
|
||||
|
||||
: (approx=) ( x y -- ? )
|
||||
|
@ -235,8 +263,8 @@ simd-classes&reps [
|
|||
|
||||
: check-boolean-ops ( class elt-class compare-quot -- seq )
|
||||
[
|
||||
[ boolean-ops [ dup word-schema ] { } map>assoc ] 2dip
|
||||
'[ first2 inputs _ _ check-boolean-op ]
|
||||
[ boolean-ops [ dup vector-words at ] { } map>assoc ] 2dip
|
||||
'[ first2 vector-word-inputs _ _ check-boolean-op ]
|
||||
] dip check-optimizer ; inline
|
||||
|
||||
simd-classes&reps [
|
||||
|
@ -427,27 +455,6 @@ TUPLE: inconsistent-vector-test bool branch ;
|
|||
[ t f f ]
|
||||
[ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test
|
||||
|
||||
[ f t t ]
|
||||
[ float-8{ t t t t t t t t } { float-8 } test-vector-tests ] unit-test
|
||||
[ f t f ]
|
||||
[ float-8{ f t t t t f t t } { float-8 } test-vector-tests ] unit-test
|
||||
[ t f f ]
|
||||
[ float-8{ f f f f f f f f } { float-8 } test-vector-tests ] unit-test
|
||||
|
||||
[ f t t ]
|
||||
[ double-4{ t t t t } { double-4 } test-vector-tests ] unit-test
|
||||
[ f t f ]
|
||||
[ double-4{ f t t f } { double-4 } test-vector-tests ] unit-test
|
||||
[ t f f ]
|
||||
[ double-4{ f f f f } { double-4 } test-vector-tests ] unit-test
|
||||
|
||||
[ f t t ]
|
||||
[ int-8{ t t t t t t t t } { int-8 } test-vector-tests ] unit-test
|
||||
[ f t f ]
|
||||
[ int-8{ f t t t t f f f } { int-8 } test-vector-tests ] unit-test
|
||||
[ t f f ]
|
||||
[ int-8{ f f f f f f f f } { int-8 } test-vector-tests ] unit-test
|
||||
|
||||
"== Checking element access" print
|
||||
|
||||
! Test element access -- it should box bignums for int-4 on x86
|
||||
|
@ -467,14 +474,6 @@ TUPLE: inconsistent-vector-test bool branch ;
|
|||
[ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test
|
||||
[ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test
|
||||
|
||||
[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test
|
||||
[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
|
||||
[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
|
||||
|
||||
[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
|
||||
[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test
|
||||
[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test
|
||||
|
||||
"== Checking broadcast" print
|
||||
: test-broadcast ( seq -- failures )
|
||||
[ length >array ] keep
|
||||
|
@ -488,14 +487,6 @@ TUPLE: inconsistent-vector-test bool branch ;
|
|||
[ { } ] [ longlong-2{ 1 2 } test-broadcast ] unit-test
|
||||
[ { } ] [ ulonglong-2{ 1 2 } test-broadcast ] unit-test
|
||||
|
||||
[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-broadcast ] unit-test
|
||||
[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
|
||||
[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
|
||||
|
||||
[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test
|
||||
[ { } ] [ longlong-4{ 1 2 3 4 } test-broadcast ] unit-test
|
||||
[ { } ] [ ulonglong-4{ 1 2 3 4 } test-broadcast ] unit-test
|
||||
|
||||
! Make sure we use the fallback in the correct situations
|
||||
[ int-4{ 3 3 3 3 } ] [ int-4{ 12 34 3 17 } 2 [ { int-4 fixnum } declare vbroadcast ] compile-call ] unit-test
|
||||
|
||||
|
@ -529,37 +520,37 @@ TUPLE: inconsistent-vector-test bool branch ;
|
|||
STRUCT: simd-struct
|
||||
{ x float-4 }
|
||||
{ y longlong-2 }
|
||||
{ z double-4 }
|
||||
{ w int-8 } ;
|
||||
{ z double-2 }
|
||||
{ w int-4 } ;
|
||||
|
||||
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
longlong-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
int-8{ 1 2 3 4 5 6 7 8 }
|
||||
double-2{ 4 3 }
|
||||
int-4{ 1 2 3 4 }
|
||||
] [
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
longlong-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
int-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
double-2{ 4 3 } >>z
|
||||
int-4{ 1 2 3 4 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
longlong-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
int-8{ 1 2 3 4 5 6 7 8 }
|
||||
double-2{ 4 3 }
|
||||
int-4{ 1 2 3 4 }
|
||||
] [
|
||||
[
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
longlong-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
int-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
double-2{ 4 3 } >>z
|
||||
int-4{ 1 2 3 4 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
@ -569,9 +560,9 @@ STRUCT: simd-struct
|
|||
[ ] [ char-16 new 1array stack. ] unit-test
|
||||
|
||||
! CSSA bug
|
||||
[ 8000000 ] [
|
||||
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
|
||||
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
|
||||
[ 4000000 ] [
|
||||
int-4{ 1000 1000 1000 1000 }
|
||||
[ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
|
||||
] unit-test
|
||||
|
||||
! Coalescing was too aggressive
|
||||
|
|
|
@ -1,42 +1,274 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types combinators fry kernel parser math math.parser
|
||||
math.vectors.simd.functor sequences splitting vocabs.generated
|
||||
vocabs.loader vocabs.parser words accessors vocabs compiler.units
|
||||
definitions ;
|
||||
USING: accessors alien.c-types arrays byte-arrays classes combinators
|
||||
cpu.architecture effects fry functors generalizations generic
|
||||
generic.parser kernel lexer literals macros math math.functions
|
||||
math.vectors math.vectors.private math.vectors.simd.intrinsics namespaces parser
|
||||
prettyprint.custom quotations sequences sequences.private vocabs
|
||||
vocabs.loader words ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors.simd
|
||||
|
||||
ERROR: bad-base-type type ;
|
||||
ERROR: bad-simd-length got expected ;
|
||||
|
||||
<<
|
||||
<PRIVATE
|
||||
! Primitive SIMD constructors
|
||||
|
||||
GENERIC: new-underlying ( underlying seq -- seq' )
|
||||
|
||||
: make-underlying ( seq quot -- seq' )
|
||||
dip new-underlying ; inline
|
||||
: change-underlying ( seq quot -- seq' )
|
||||
'[ underlying>> @ ] keep new-underlying ; inline
|
||||
PRIVATE>
|
||||
>>
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: simd-vocab ( base-type -- vocab )
|
||||
name>> "math.vectors.simd.instances." prepend ;
|
||||
! Helper for boolean vector literals
|
||||
|
||||
: parse-base-type ( c-type -- c-type )
|
||||
dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } member-eq?
|
||||
[ bad-base-type ] unless ;
|
||||
: vector-true-value ( class -- value )
|
||||
{ c:float c:double } member? [ -1 bits>double ] [ -1 ] if ; foldable
|
||||
|
||||
: forget-instances ( -- )
|
||||
[
|
||||
"math.vectors.simd.instances" child-vocabs
|
||||
[ forget-vocab ] each
|
||||
] with-compilation-unit ;
|
||||
: vector-false-value ( type -- value )
|
||||
{ c:float c:double } member? [ 0.0 ] [ 0 ] if ; foldable
|
||||
|
||||
: boolean>element ( bool/elt type -- elt )
|
||||
swap {
|
||||
{ t [ vector-true-value ] }
|
||||
{ f [ vector-false-value ] }
|
||||
[ nip ]
|
||||
} case ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: define-simd-vocab ( type -- vocab )
|
||||
parse-base-type
|
||||
[ simd-vocab ] keep '[
|
||||
_
|
||||
[ define-simd-128 ]
|
||||
[ define-simd-256 ] bi
|
||||
] generate-vocab ;
|
||||
! SIMD base type
|
||||
|
||||
SYNTAX: SIMD:
|
||||
scan-word define-simd-vocab use-vocab ;
|
||||
TUPLE: simd-128
|
||||
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
|
||||
|
||||
SYNTAX: SIMDS:
|
||||
\ ; parse-until [ define-simd-vocab use-vocab ] each ;
|
||||
GENERIC: simd-element-type ( obj -- c-type )
|
||||
GENERIC: simd-rep ( simd -- rep )
|
||||
|
||||
M: object simd-element-type drop f ;
|
||||
M: object simd-rep drop f ;
|
||||
|
||||
<<
|
||||
<PRIVATE
|
||||
|
||||
DEFER: simd-construct-op
|
||||
|
||||
! Unboxers for SIMD operations
|
||||
: if-both-vectors ( a b rep t f -- )
|
||||
[ 2over [ simd-128? ] both? ] 2dip if ; inline
|
||||
|
||||
: if-both-vectors-match ( a b rep t f -- )
|
||||
[ 3dup [ drop [ simd-128? ] both? ] [ '[ simd-rep _ eq? ] both? ] 3bi and ]
|
||||
2dip if ; inline
|
||||
|
||||
: simd-unbox ( a -- a (a) )
|
||||
[ ] [ underlying>> ] bi ; inline
|
||||
|
||||
: v->v-op ( a rep quot: ( (a) rep -- (c) ) fallback-quot -- c )
|
||||
drop [ simd-unbox ] 2dip 2curry make-underlying ; inline
|
||||
|
||||
: vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c )
|
||||
drop [ simd-unbox ] 3dip 3curry make-underlying ; inline
|
||||
|
||||
: vn->n-op ( a n rep quot: ( (a) n rep -- n ) fallback-quot -- n )
|
||||
drop [ underlying>> ] 3dip call ; inline
|
||||
|
||||
: v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n )
|
||||
drop [ underlying>> ] 2dip call ; inline
|
||||
|
||||
: (vv->v-op) ( a b rep quot: ( (a) (b) rep -- (c) ) -- c )
|
||||
[ [ simd-unbox ] [ underlying>> ] bi* ] 2dip 3curry make-underlying ; inline
|
||||
|
||||
: (vv->n-op) ( a b rep quot: ( (a) (b) rep -- n ) -- n )
|
||||
[ [ underlying>> ] bi@ ] 2dip 3curry call ; inline
|
||||
|
||||
: vv->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
|
||||
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
|
||||
|
||||
: vv'->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
|
||||
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors ; inline
|
||||
|
||||
: vv->n-op ( a b rep quot: ( (a) (b) rep -- n ) fallback-quot -- n )
|
||||
[ '[ _ (vv->n-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
|
||||
|
||||
PRIVATE>
|
||||
>>
|
||||
|
||||
<<
|
||||
<PRIVATE
|
||||
|
||||
! SIMD concrete type functor
|
||||
|
||||
FUNCTOR: define-simd-128 ( T -- )
|
||||
|
||||
A DEFINES-CLASS ${T}
|
||||
A-rep IS ${T}-rep
|
||||
>A DEFINES >${T}
|
||||
A-boa DEFINES ${T}-boa
|
||||
A-with DEFINES ${T}-with
|
||||
A-cast DEFINES ${T}-cast
|
||||
A{ DEFINES ${T}{
|
||||
|
||||
ELT [ A-rep rep-component-type ]
|
||||
N [ A-rep rep-length ]
|
||||
COERCER [ ELT c-type-class "coercer" word-prop [ ] or ]
|
||||
|
||||
SET-NTH [ ELT dup c:c-setter c:array-accessor ]
|
||||
|
||||
BOA-EFFECT [ N "n" <repetition> >array { "v" } <effect> ]
|
||||
|
||||
WHERE
|
||||
|
||||
TUPLE: A < simd-128 ;
|
||||
|
||||
M: A new-underlying drop \ A boa ; inline
|
||||
M: A simd-rep drop A-rep ; inline
|
||||
M: A simd-element-type drop ELT ; inline
|
||||
|
||||
M: A set-nth-unsafe
|
||||
[ ELT boolean>element ] 2dip
|
||||
underlying>> SET-NTH call ; inline
|
||||
|
||||
: >A ( seq -- simd ) \ A new clone-like ; inline
|
||||
|
||||
M: A like drop dup \ A instance? [ >A ] unless ; inline
|
||||
|
||||
: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
|
||||
: A-cast ( v -- v' ) underlying>> \ A boa ; inline
|
||||
|
||||
! SIMD vectors as sequences
|
||||
|
||||
M: A hashcode* underlying>> hashcode* ; inline
|
||||
M: A clone [ clone ] change-underlying ; inline
|
||||
M: A length drop N ; inline
|
||||
M: A nth-unsafe
|
||||
swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
|
||||
M: A c:byte-length drop 16 ; inline
|
||||
|
||||
M: A new-sequence
|
||||
2dup length =
|
||||
[ nip [ 16 (byte-array) ] make-underlying ]
|
||||
[ length bad-simd-length ] if ; inline
|
||||
|
||||
M: A equal?
|
||||
\ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
|
||||
|
||||
! SIMD primitive operations
|
||||
|
||||
M: A v+ \ A-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v- \ A-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vneg \ A-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline
|
||||
M: A v+- \ A-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vs+ \ A-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vmax \ A-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v. \ A-rep [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline
|
||||
M: A vsqrt \ A-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline
|
||||
M: A sum \ A-rep [ (simd-sum) ] [ call-next-method ] v->n-op ; inline
|
||||
M: A vabs \ A-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline
|
||||
M: A vbitand \ A-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vbitandn \ A-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vbitor \ A-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vbitxor \ A-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vbitnot \ A-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline
|
||||
M: A vand \ A-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vandn \ A-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vor \ A-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vxor \ A-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vnot \ A-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline
|
||||
M: A vlshift \ A-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline
|
||||
M: A vrshift \ A-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline
|
||||
M: A hlshift \ A-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline
|
||||
M: A hrshift \ A-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline
|
||||
M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
|
||||
M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline
|
||||
M: A (vmerge-head) \ A-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A (vmerge-tail) \ A-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v<= \ A-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v< \ A-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v= \ A-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v> \ A-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A v>= \ A-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vunordered? \ A-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline
|
||||
M: A vany? \ A-rep [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline
|
||||
M: A vall? \ A-rep [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline
|
||||
M: A vnone? \ A-rep [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline
|
||||
|
||||
! SIMD high-level specializations
|
||||
|
||||
M: A vbroadcast swap nth A-with ; inline
|
||||
M: A n+v [ A-with ] dip v+ ; inline
|
||||
M: A n-v [ A-with ] dip v- ; inline
|
||||
M: A n*v [ A-with ] dip v* ; inline
|
||||
M: A n/v [ A-with ] dip v/ ; inline
|
||||
M: A v+n A-with v+ ; inline
|
||||
M: A v-n A-with v- ; inline
|
||||
M: A v*n A-with v* ; inline
|
||||
M: A v/n A-with v/ ; inline
|
||||
M: A norm-sq dup v. assert-positive ; inline
|
||||
M: A distance v- norm ; inline
|
||||
|
||||
M: A >pprint-sequence ;
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
\ A-boa
|
||||
[ COERCER N napply ] N {
|
||||
{ 2 [ [ A-rep (simd-gather-2) A boa ] ] }
|
||||
{ 4 [ [ A-rep (simd-gather-4) A boa ] ] }
|
||||
[ \ A new '[ _ _ nsequence ] ]
|
||||
} case compose
|
||||
BOA-EFFECT define-inline
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
c:<c-type>
|
||||
byte-array >>class
|
||||
A >>boxed-class
|
||||
{ A-rep alien-vector A boa } >quotation >>getter
|
||||
{ [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter
|
||||
16 >>size
|
||||
16 >>align
|
||||
A-rep >>rep
|
||||
\ A c:typedef
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
SYNTAX: SIMD-128:
|
||||
scan define-simd-128 ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
>>
|
||||
|
||||
INSTANCE: simd-128 sequence
|
||||
|
||||
! SIMD instances
|
||||
|
||||
SIMD-128: char-16
|
||||
SIMD-128: uchar-16
|
||||
SIMD-128: short-8
|
||||
SIMD-128: ushort-8
|
||||
SIMD-128: int-4
|
||||
SIMD-128: uint-4
|
||||
SIMD-128: longlong-2
|
||||
SIMD-128: ulonglong-2
|
||||
SIMD-128: float-4
|
||||
SIMD-128: double-2
|
||||
|
||||
! misc
|
||||
|
||||
M: simd-128 vshuffle ( u perm -- v )
|
||||
vshuffle-bytes ; inline
|
||||
|
||||
"mirrors" vocab [
|
||||
"math.vectors.simd.mirrors" require
|
||||
] when
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
IN: math.vectors.specialization.tests
|
||||
USING: compiler.tree.debugger math.vectors tools.test kernel
|
||||
kernel.private math specialized-arrays ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
QUALIFIED-WITH: alien.complex c
|
||||
SPECIALIZED-ARRAY: c:double
|
||||
SPECIALIZED-ARRAY: c:complex-float
|
||||
SPECIALIZED-ARRAY: c:float
|
||||
|
||||
[ V{ t } ] [
|
||||
[ { double-array double-array } declare distance 0.0 < not ] final-literals
|
||||
] unit-test
|
||||
|
||||
[ V{ float } ] [
|
||||
[ { float-array float } declare v*n norm ] final-classes
|
||||
] unit-test
|
||||
|
||||
[ V{ complex } ] [
|
||||
[ { complex-float-array complex-float-array } declare v. ] final-classes
|
||||
] unit-test
|
||||
|
||||
[ V{ float } ] [
|
||||
[ { float-array float } declare v*n norm ] final-classes
|
||||
] unit-test
|
||||
|
||||
[ V{ float } ] [
|
||||
[ { complex-float-array complex } declare v*n norm ] final-classes
|
||||
] unit-test
|
|
@ -1,207 +0,0 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: words kernel make sequences effects sets kernel.private
|
||||
accessors combinators math math.intervals math.vectors
|
||||
math.vectors.conversion.backend namespaces assocs fry splitting
|
||||
classes.algebra generalizations locals
|
||||
compiler.tree.propagation.info ;
|
||||
IN: math.vectors.specialization
|
||||
|
||||
SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
|
||||
|
||||
: parent-vector-class ( type -- type' )
|
||||
{
|
||||
{ [ dup simd-128 class<= ] [ drop simd-128 ] }
|
||||
{ [ dup simd-256 class<= ] [ drop simd-256 ] }
|
||||
[ "Not a vector class" throw ]
|
||||
} cond ;
|
||||
|
||||
: signature-for-schema ( array-type elt-type schema -- signature )
|
||||
[
|
||||
{
|
||||
{ +vector+ [ drop ] }
|
||||
{ +any-vector+ [ drop parent-vector-class ] }
|
||||
{ +scalar+ [ nip ] }
|
||||
{ +boolean+ [ 2drop boolean ] }
|
||||
{ +nonnegative+ [ nip ] }
|
||||
{ +literal+ [ 2drop f ] }
|
||||
} case
|
||||
] with with map ;
|
||||
|
||||
: (specialize-vector-word) ( word array-type elt-type schema -- word' )
|
||||
signature-for-schema
|
||||
[ [ name>> ] [ [ name>> ] map "," join ] bi* "=>" glue f <word> ]
|
||||
[ [ , \ declare , def>> % ] [ ] make ]
|
||||
[ drop stack-effect ]
|
||||
2tri
|
||||
[ define-declared ] [ 2drop ] 3bi ;
|
||||
|
||||
: output-infos ( array-type elt-type schema -- value-infos )
|
||||
[
|
||||
{
|
||||
{ +vector+ [ drop <class-info> ] }
|
||||
{ +any-vector+ [ drop parent-vector-class <class-info> ] }
|
||||
{ +scalar+ [ nip <class-info> ] }
|
||||
{ +boolean+ [ 2drop boolean <class-info> ] }
|
||||
{
|
||||
+nonnegative+
|
||||
[
|
||||
nip
|
||||
dup complex class<= [ drop float ] when
|
||||
[0,inf] <class/interval-info>
|
||||
]
|
||||
}
|
||||
} case
|
||||
] with with map ;
|
||||
|
||||
: record-output-signature ( word array-type elt-type schema -- word )
|
||||
output-infos
|
||||
[ drop ]
|
||||
[ drop ]
|
||||
[ [ stack-effect in>> length '[ _ ndrop ] ] dip append ] 2tri
|
||||
"outputs" set-word-prop ;
|
||||
|
||||
CONSTANT: vector-words
|
||||
H{
|
||||
{ [v-] { +vector+ +vector+ -> +vector+ } }
|
||||
{ distance { +vector+ +vector+ -> +nonnegative+ } }
|
||||
{ n*v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n+v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n-v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ n/v { +scalar+ +vector+ -> +vector+ } }
|
||||
{ norm { +vector+ -> +nonnegative+ } }
|
||||
{ norm-sq { +vector+ -> +nonnegative+ } }
|
||||
{ normalize { +vector+ -> +vector+ } }
|
||||
{ v* { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs* { +vector+ +vector+ -> +vector+ } }
|
||||
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ vs+ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v- { +vector+ +vector+ -> +vector+ } }
|
||||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vs- { +vector+ +vector+ -> +vector+ } }
|
||||
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ v. { +vector+ +vector+ -> +scalar+ } }
|
||||
{ v/ { +vector+ +vector+ -> +vector+ } }
|
||||
{ v/n { +vector+ +scalar+ -> +vector+ } }
|
||||
{ vceiling { +vector+ -> +vector+ } }
|
||||
{ vfloor { +vector+ -> +vector+ } }
|
||||
{ vmax { +vector+ +vector+ -> +vector+ } }
|
||||
{ vmin { +vector+ +vector+ -> +vector+ } }
|
||||
{ vneg { +vector+ -> +vector+ } }
|
||||
{ vtruncate { +vector+ -> +vector+ } }
|
||||
{ sum { +vector+ -> +scalar+ } }
|
||||
{ vabs { +vector+ -> +vector+ } }
|
||||
{ vsqrt { +vector+ -> +vector+ } }
|
||||
{ vbitand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitandn { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitxor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbitnot { +vector+ -> +vector+ } }
|
||||
{ vand { +vector+ +vector+ -> +vector+ } }
|
||||
{ vandn { +vector+ +vector+ -> +vector+ } }
|
||||
{ vor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vxor { +vector+ +vector+ -> +vector+ } }
|
||||
{ vnot { +vector+ -> +vector+ } }
|
||||
{ vlshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ hlshift { +vector+ +literal+ -> +vector+ } }
|
||||
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
||||
{ vshuffle-elements { +vector+ +literal+ -> +vector+ } }
|
||||
{ vshuffle-bytes { +vector+ +any-vector+ -> +vector+ } }
|
||||
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
||||
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
|
||||
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
|
||||
{ (v>float) { +vector+ +literal+ -> +vector+ } }
|
||||
{ (v>integer) { +vector+ +literal+ -> +vector+ } }
|
||||
{ (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } }
|
||||
{ (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } }
|
||||
{ (vunpack-head) { +vector+ +literal+ -> +vector+ } }
|
||||
{ (vunpack-tail) { +vector+ +literal+ -> +vector+ } }
|
||||
{ v<= { +vector+ +vector+ -> +vector+ } }
|
||||
{ v< { +vector+ +vector+ -> +vector+ } }
|
||||
{ v= { +vector+ +vector+ -> +vector+ } }
|
||||
{ v> { +vector+ +vector+ -> +vector+ } }
|
||||
{ v>= { +vector+ +vector+ -> +vector+ } }
|
||||
{ vunordered? { +vector+ +vector+ -> +vector+ } }
|
||||
{ vany? { +vector+ -> +boolean+ } }
|
||||
{ vall? { +vector+ -> +boolean+ } }
|
||||
{ vnone? { +vector+ -> +boolean+ } }
|
||||
}
|
||||
|
||||
PREDICATE: vector-word < word vector-words key? ;
|
||||
|
||||
: specializations ( word -- assoc )
|
||||
dup "specializations" word-prop
|
||||
[ ] [ V{ } clone [ "specializations" set-word-prop ] keep ] ?if ;
|
||||
|
||||
M: vector-word subwords specializations values [ word? ] filter ;
|
||||
|
||||
: add-specialization ( new-word signature word -- )
|
||||
specializations set-at ;
|
||||
|
||||
ERROR: bad-vector-word word ;
|
||||
|
||||
: word-schema ( word -- schema )
|
||||
vector-words ?at [ bad-vector-word ] unless ;
|
||||
|
||||
: inputs ( schema -- seq ) { -> } split first ;
|
||||
|
||||
: outputs ( schema -- seq ) { -> } split second ;
|
||||
|
||||
: loop-vector-op ( word array-type elt-type -- word' )
|
||||
pick word-schema
|
||||
[ inputs (specialize-vector-word) ]
|
||||
[ outputs record-output-signature ] 3bi ;
|
||||
|
||||
:: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
|
||||
word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
|
||||
|
||||
:: input-signature ( word array-type elt-type -- signature )
|
||||
array-type elt-type word word-schema inputs signature-for-schema ;
|
||||
|
||||
: vector-words-for-type ( elt-type -- words )
|
||||
{
|
||||
! Can't do shifts on floats
|
||||
{ [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
|
||||
! Can't divide integers
|
||||
{ [ dup integer class<= ] [ vector-words keys { vsqrt n/v v/n v/ normalize } diff ] }
|
||||
! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
|
||||
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
|
||||
[ { } ]
|
||||
} cond
|
||||
! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
|
||||
{
|
||||
hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
|
||||
(v>integer) (v>float)
|
||||
(vpack-signed) (vpack-unsigned)
|
||||
(vunpack-head) (vunpack-tail)
|
||||
} diff
|
||||
nip ;
|
||||
|
||||
:: specialize-vector-words ( array-type elt-type simd -- )
|
||||
elt-type vector-words-for-type simd keys union [
|
||||
[ array-type elt-type simd specialize-vector-word ]
|
||||
[ array-type elt-type input-signature ]
|
||||
[ ]
|
||||
tri add-specialization
|
||||
] each ;
|
||||
|
||||
: specialization-matches? ( value-infos signature -- ? )
|
||||
[ [ [ class>> ] dip class<= ] [ literal?>> ] if* ] 2all? ;
|
||||
|
||||
: find-specialization ( classes word -- word/f )
|
||||
specializations
|
||||
[ first specialization-matches? ] with find
|
||||
swap [ second ] when ;
|
||||
|
||||
: vector-word-custom-inlining ( #call -- word/f )
|
||||
[ in-d>> [ value-info ] map ] [ word>> ] bi
|
||||
find-specialization ;
|
||||
|
||||
vector-words keys [
|
||||
[ vector-word-custom-inlining ]
|
||||
"custom-inlining" set-word-prop
|
||||
] each
|
|
@ -436,7 +436,6 @@ HELP: vshuffle
|
|||
{ $example
|
||||
"USING: alien.c-types combinators math.vectors math.vectors.simd"
|
||||
"namespaces prettyprint prettyprint.config ;"
|
||||
"SIMDS: int uchar ;"
|
||||
"IN: scratchpad"
|
||||
""
|
||||
": endian-swap ( size -- vector )"
|
||||
|
|
|
@ -6,29 +6,47 @@ byte-arrays accessors locals ;
|
|||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors
|
||||
|
||||
MIXIN: simd-128
|
||||
MIXIN: simd-256
|
||||
GENERIC: vneg ( u -- v )
|
||||
M: object vneg [ neg ] map ;
|
||||
|
||||
GENERIC: element-type ( obj -- c-type )
|
||||
M: object element-type drop f ; inline
|
||||
GENERIC# v+n 1 ( u n -- v )
|
||||
M: object v+n [ + ] curry map ;
|
||||
|
||||
: vneg ( u -- v ) [ neg ] map ;
|
||||
GENERIC: n+v ( n v -- w )
|
||||
M: object n+v [ + ] with map ;
|
||||
|
||||
: v+n ( u n -- v ) [ + ] curry map ;
|
||||
: n+v ( n u -- v ) [ + ] with map ;
|
||||
: v-n ( u n -- v ) [ - ] curry map ;
|
||||
: n-v ( n u -- v ) [ - ] with map ;
|
||||
GENERIC# v-n 1 ( u n -- w )
|
||||
M: object v-n [ - ] curry map ;
|
||||
|
||||
: v*n ( u n -- v ) [ * ] curry map ;
|
||||
: n*v ( n u -- v ) [ * ] with map ;
|
||||
: v/n ( u n -- v ) [ / ] curry map ;
|
||||
: n/v ( n u -- v ) [ / ] with map ;
|
||||
GENERIC: n-v ( n v -- w )
|
||||
M: object n-v [ - ] with map ;
|
||||
|
||||
: v+ ( u v -- w ) [ + ] 2map ;
|
||||
: v- ( u v -- w ) [ - ] 2map ;
|
||||
: [v-] ( u v -- w ) [ [-] ] 2map ;
|
||||
: v* ( u v -- w ) [ * ] 2map ;
|
||||
: v/ ( u v -- w ) [ / ] 2map ;
|
||||
GENERIC# v*n 1 ( u n -- v )
|
||||
M: object v*n [ * ] curry map ;
|
||||
|
||||
GENERIC: n*v ( n v -- w )
|
||||
M: object n*v [ * ] with map ;
|
||||
|
||||
GENERIC# v/n 1 ( u n -- v )
|
||||
M: object v/n [ / ] curry map ;
|
||||
|
||||
GENERIC: n/v ( n v -- w )
|
||||
M: object n/v [ / ] with map ;
|
||||
|
||||
GENERIC: v+ ( u v -- w )
|
||||
M: object v+ [ + ] 2map ;
|
||||
|
||||
GENERIC: v- ( u v -- w )
|
||||
M: object v- [ - ] 2map ;
|
||||
|
||||
GENERIC: [v-] ( u v -- w )
|
||||
M: object [v-] [ [-] ] 2map ;
|
||||
|
||||
GENERIC: v* ( u v -- w )
|
||||
M: object v* [ * ] 2map ;
|
||||
|
||||
GENERIC: v/ ( u v -- w )
|
||||
M: object v/ [ / ] 2map ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
@ -37,113 +55,128 @@ M: object element-type drop f ; inline
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: vmax ( u v -- w ) [ [ float-max ] [ max ] if-both-floats ] 2map ;
|
||||
: vmin ( u v -- w ) [ [ float-min ] [ min ] if-both-floats ] 2map ;
|
||||
GENERIC: vmax ( u v -- w )
|
||||
M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ;
|
||||
|
||||
: v+- ( u v -- w )
|
||||
GENERIC: vmin ( u v -- w )
|
||||
M: object vmin [ [ float-min ] [ min ] if-both-floats ] 2map ;
|
||||
|
||||
GENERIC: v+- ( u v -- w )
|
||||
M: object v+-
|
||||
[ t ] 2dip
|
||||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||
nip ;
|
||||
|
||||
<PRIVATE
|
||||
GENERIC: vs+ ( u v -- w )
|
||||
M: object vs+ [ + ] 2map ;
|
||||
|
||||
: 2saturate-map ( u v quot -- w )
|
||||
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
||||
GENERIC: vs- ( u v -- w )
|
||||
M: object vs- [ - ] 2map ;
|
||||
|
||||
PRIVATE>
|
||||
GENERIC: vs* ( u v -- w )
|
||||
M: object vs* [ * ] 2map ;
|
||||
|
||||
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
||||
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
||||
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
||||
GENERIC: vabs ( u -- v )
|
||||
M: object vabs [ abs ] map ;
|
||||
|
||||
: vabs ( u -- v ) [ abs ] map ;
|
||||
: vsqrt ( u -- v ) [ >float fsqrt ] map ;
|
||||
GENERIC: vsqrt ( u -- v )
|
||||
M: object vsqrt [ >float fsqrt ] map ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: fp-bitwise-op ( x y seq quot -- z )
|
||||
swap element-type {
|
||||
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
|
||||
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
|
||||
[ drop call ]
|
||||
} case ; inline
|
||||
|
||||
: fp-bitwise-unary ( x seq quot -- z )
|
||||
swap element-type {
|
||||
{ c:double [ [ double>bits ] dip call bits>double ] }
|
||||
{ c:float [ [ float>bits ] dip call bits>float ] }
|
||||
[ drop call ]
|
||||
} case ; inline
|
||||
|
||||
: element>bool ( x seq -- ? )
|
||||
element-type [ [ f ] when-zero ] when ; inline
|
||||
|
||||
: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
|
||||
|
||||
GENERIC: new-underlying ( underlying seq -- seq' )
|
||||
|
||||
: change-underlying ( seq quot -- seq' )
|
||||
'[ underlying>> @ ] keep new-underlying ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
|
||||
: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
|
||||
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
|
||||
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
|
||||
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
||||
GENERIC: vbitand ( u v -- w )
|
||||
M: object vbitand [ bitand ] 2map ;
|
||||
GENERIC: vbitandn ( u v -- w )
|
||||
M: object vbitandn [ bitandn ] 2map ;
|
||||
GENERIC: vbitor ( u v -- w )
|
||||
M: object vbitor [ bitor ] 2map ;
|
||||
GENERIC: vbitxor ( u v -- w )
|
||||
M: object vbitxor [ bitxor ] 2map ;
|
||||
GENERIC: vbitnot ( u -- w )
|
||||
M: object vbitnot [ bitnot ] map ;
|
||||
|
||||
:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
|
||||
GENERIC# vbroadcast 1 ( u n -- v )
|
||||
M:: object vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
|
||||
|
||||
: vshuffle-elements ( u perm -- v )
|
||||
GENERIC# vshuffle-elements 1 ( u perm -- v )
|
||||
M: object vshuffle-elements
|
||||
over length 0 pad-tail
|
||||
swap [ '[ _ nth ] ] keep map-as ;
|
||||
|
||||
: vshuffle-bytes ( u perm -- v )
|
||||
underlying>> [
|
||||
swap [ '[ 15 bitand _ nth ] ] keep map-as
|
||||
] curry change-underlying ;
|
||||
GENERIC# vshuffle-bytes 1 ( u perm -- v )
|
||||
|
||||
GENERIC: vshuffle ( u perm -- v )
|
||||
M: array vshuffle ( u perm -- v )
|
||||
vshuffle-elements ; inline
|
||||
M: simd-128 vshuffle ( u perm -- v )
|
||||
vshuffle-bytes ; inline
|
||||
|
||||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||
GENERIC# vlshift 1 ( u n -- w )
|
||||
M: object vlshift '[ _ shift ] map ;
|
||||
GENERIC# vrshift 1 ( u n -- w )
|
||||
M: object vrshift neg '[ _ shift ] map ;
|
||||
|
||||
: hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
|
||||
: hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;
|
||||
GENERIC# hlshift 1 ( u n -- w )
|
||||
GENERIC# hrshift 1 ( u n -- w )
|
||||
|
||||
: (vmerge-head) ( u v -- h )
|
||||
over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
|
||||
: (vmerge-tail) ( u v -- t )
|
||||
over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
|
||||
GENERIC: (vmerge-head) ( u v -- h )
|
||||
M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
|
||||
GENERIC: (vmerge-tail) ( u v -- t )
|
||||
M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
|
||||
|
||||
: (vmerge) ( u v -- h t )
|
||||
GENERIC: (vmerge) ( u v -- h t )
|
||||
M: object (vmerge)
|
||||
[ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline
|
||||
|
||||
: vmerge ( u v -- w ) [ zip ] keep concat-as ;
|
||||
GENERIC: vmerge ( u v -- w )
|
||||
M: object vmerge [ zip ] keep concat-as ;
|
||||
|
||||
: vand ( u v -- w ) over '[ [ _ element>bool ] bi@ and ] 2map ;
|
||||
: vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ;
|
||||
: vor ( u v -- w ) over '[ [ _ element>bool ] bi@ or ] 2map ;
|
||||
: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ;
|
||||
: vnot ( u -- w ) dup '[ _ element>bool not ] map ;
|
||||
GENERIC: vand ( u v -- w )
|
||||
M: object vand [ and ] 2map ;
|
||||
|
||||
: vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
|
||||
: vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
|
||||
: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
|
||||
GENERIC: vandn ( u v -- w )
|
||||
M: object vandn [ [ not ] dip and ] 2map ;
|
||||
|
||||
: v< ( u v -- w ) [ < ] 2map ;
|
||||
: v<= ( u v -- w ) [ <= ] 2map ;
|
||||
: v>= ( u v -- w ) [ >= ] 2map ;
|
||||
: v> ( u v -- w ) [ > ] 2map ;
|
||||
: vunordered? ( u v -- w ) [ unordered? ] 2map ;
|
||||
: v= ( u v -- w ) [ = ] 2map ;
|
||||
GENERIC: vor ( u v -- w )
|
||||
M: object vor [ or ] 2map ;
|
||||
|
||||
: v? ( mask true false -- result )
|
||||
GENERIC: vxor ( u v -- w )
|
||||
M: object vxor [ xor ] 2map ;
|
||||
|
||||
GENERIC: vnot ( u -- w )
|
||||
M: object vnot [ not ] map ;
|
||||
|
||||
GENERIC: vall? ( v -- ? )
|
||||
M: object vall? [ ] all? ;
|
||||
|
||||
GENERIC: vany? ( v -- ? )
|
||||
M: object vany? [ ] any? ;
|
||||
|
||||
GENERIC: vnone? ( v -- ? )
|
||||
M: object vnone? [ not ] all? ;
|
||||
|
||||
GENERIC: v< ( u v -- w )
|
||||
M: object v< [ < ] 2map ;
|
||||
|
||||
GENERIC: v<= ( u v -- w )
|
||||
M: object v<= [ <= ] 2map ;
|
||||
|
||||
GENERIC: v>= ( u v -- w )
|
||||
M: object v>= [ >= ] 2map ;
|
||||
|
||||
GENERIC: v> ( u v -- w )
|
||||
M: object v> [ > ] 2map ;
|
||||
|
||||
GENERIC: vunordered? ( u v -- w )
|
||||
M: object vunordered? [ unordered? ] 2map ;
|
||||
|
||||
GENERIC: v= ( u v -- w )
|
||||
M: object v= [ = ] 2map ;
|
||||
|
||||
GENERIC: v? ( mask true false -- result )
|
||||
M: object v?
|
||||
[ vand ] [ vandn ] bi-curry* bi vor ; inline
|
||||
|
||||
:: vif ( mask true-quot false-quot -- result )
|
||||
|
@ -157,15 +190,21 @@ M: simd-128 vshuffle ( u perm -- v )
|
|||
: vceiling ( u -- v ) [ ceiling ] map ;
|
||||
: vtruncate ( u -- v ) [ truncate ] map ;
|
||||
|
||||
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ;
|
||||
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ;
|
||||
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; inline
|
||||
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; inline
|
||||
|
||||
: v. ( u v -- x ) [ conjugate * ] [ + ] 2map-reduce ;
|
||||
: norm-sq ( v -- x ) [ absq ] [ + ] map-reduce ;
|
||||
: norm ( v -- x ) norm-sq sqrt ;
|
||||
: normalize ( u -- v ) dup norm v/n ;
|
||||
GENERIC: v. ( u v -- x )
|
||||
M: object v. [ conjugate * ] [ + ] 2map-reduce ;
|
||||
|
||||
: distance ( u v -- x ) [ - absq ] [ + ] 2map-reduce sqrt ;
|
||||
GENERIC: norm-sq ( v -- x )
|
||||
M: object norm-sq [ absq ] [ + ] map-reduce ;
|
||||
|
||||
: norm ( v -- x ) norm-sq sqrt ; inline
|
||||
|
||||
: normalize ( u -- v ) dup norm v/n ; inline
|
||||
|
||||
GENERIC: distance ( u v -- x )
|
||||
M: object distance [ - absq ] [ + ] 2map-reduce sqrt ;
|
||||
|
||||
: set-axis ( u v axis -- w )
|
||||
[ [ zero? 2over ? ] dip swap nth ] map-index 2nip ;
|
||||
|
@ -198,27 +237,27 @@ PRIVATE>
|
|||
: v~ ( a b epsilon -- ? )
|
||||
[ ~ ] curry 2all? ; inline
|
||||
|
||||
HINTS: vneg { array } ;
|
||||
HINTS: norm-sq { array } ;
|
||||
HINTS: M\ object vneg { array } ;
|
||||
HINTS: M\ object norm-sq { array } ;
|
||||
HINTS: norm { array } ;
|
||||
HINTS: normalize { array } ;
|
||||
HINTS: distance { array array } ;
|
||||
HINTS: M\ object distance { array array } ;
|
||||
|
||||
HINTS: n*v { object array } ;
|
||||
HINTS: v*n { array object } ;
|
||||
HINTS: n/v { array } ;
|
||||
HINTS: v/n { array object } ;
|
||||
HINTS: M\ object n*v { object array } ;
|
||||
HINTS: M\ object v*n { array object } ;
|
||||
HINTS: M\ object n/v { object array } ;
|
||||
HINTS: M\ object v/n { array object } ;
|
||||
|
||||
HINTS: v+ { array array } ;
|
||||
HINTS: v- { array array } ;
|
||||
HINTS: v* { array array } ;
|
||||
HINTS: v/ { array array } ;
|
||||
HINTS: vmax { array array } ;
|
||||
HINTS: vmin { array array } ;
|
||||
HINTS: v. { array array } ;
|
||||
HINTS: M\ object v+ { array array } ;
|
||||
HINTS: M\ object v- { array array } ;
|
||||
HINTS: M\ object v* { array array } ;
|
||||
HINTS: M\ object v/ { array array } ;
|
||||
HINTS: M\ object vmax { array array } ;
|
||||
HINTS: M\ object vmin { array array } ;
|
||||
HINTS: M\ object v. { array array } ;
|
||||
|
||||
HINTS: vlerp { array array array } ;
|
||||
HINTS: vnlerp { array array object } ;
|
||||
|
||||
HINTS: bilerp { object object object object array } ;
|
||||
HINTS: trilerp { object object object object object object object object array } ;
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ USING: accessors alien.c-types kernel locals math math.ranges
|
|||
math.bitwise math.vectors math.vectors.simd random
|
||||
sequences specialized-arrays sequences.private classes.struct
|
||||
combinators.short-circuit fry ;
|
||||
SIMDS: uchar uint ;
|
||||
SPECIALIZED-ARRAY: uint
|
||||
SPECIALIZED-ARRAY: uint-4
|
||||
IN: random.sfmt
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
USING: sequences.cords strings tools.test kernel sequences ;
|
||||
IN: sequences.cords.tests
|
||||
|
||||
[ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test
|
|
@ -0,0 +1,112 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs sequences sorting binary-search fry math
|
||||
math.order arrays classes combinators kernel functors math.functions
|
||||
math.vectors ;
|
||||
IN: sequences.cords
|
||||
|
||||
MIXIN: cord
|
||||
|
||||
TUPLE: generic-cord
|
||||
{ head read-only } { tail read-only } ;
|
||||
INSTANCE: generic-cord cord
|
||||
|
||||
M: cord length
|
||||
[ head>> length ] [ tail>> length ] bi + ; inline
|
||||
|
||||
M: cord virtual-exemplar head>> ; inline
|
||||
|
||||
M: cord virtual@
|
||||
2dup head>> length <
|
||||
[ head>> ] [ [ head>> length - ] [ tail>> ] bi ] if ; inline
|
||||
|
||||
INSTANCE: cord virtual-sequence
|
||||
|
||||
GENERIC: cord-append ( seq1 seq2 -- cord )
|
||||
|
||||
M: object cord-append
|
||||
generic-cord boa ; inline
|
||||
|
||||
FUNCTOR: define-specialized-cord ( T C -- )
|
||||
|
||||
T-cord DEFINES-CLASS ${C}
|
||||
|
||||
WHERE
|
||||
|
||||
TUPLE: T-cord
|
||||
{ head T read-only } { tail T read-only } ;
|
||||
INSTANCE: T-cord cord
|
||||
|
||||
M: T cord-append
|
||||
2dup [ T instance? ] both?
|
||||
[ T-cord boa ] [ generic-cord boa ] if ; inline
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
: cord-map ( cord quot -- cord' )
|
||||
[ [ head>> ] dip call ]
|
||||
[ [ tail>> ] dip call ] 2bi cord-append ; inline
|
||||
|
||||
: cord-2map ( cord cord quot -- cord' )
|
||||
[ [ [ head>> ] bi@ ] dip call ]
|
||||
[ [ [ tail>> ] bi@ ] dip call ] 3bi cord-append ; inline
|
||||
|
||||
: cord-both ( cord quot -- h t )
|
||||
[ [ head>> ] [ tail>> ] bi ] dip bi@ ; inline
|
||||
|
||||
: cord-2both ( cord cord quot -- h t )
|
||||
[ [ [ head>> ] bi@ ] dip call ]
|
||||
[ [ [ tail>> ] bi@ ] dip call ] 3bi ; inline
|
||||
|
||||
M: cord v+ [ v+ ] cord-2map ; inline
|
||||
M: cord v- [ v- ] cord-2map ; inline
|
||||
M: cord vneg [ vneg ] cord-map ; inline
|
||||
M: cord v+- [ v+- ] cord-2map ; inline
|
||||
M: cord vs+ [ vs+ ] cord-2map ; inline
|
||||
M: cord vs- [ vs- ] cord-2map ; inline
|
||||
M: cord vs* [ vs* ] cord-2map ; inline
|
||||
M: cord v* [ v* ] cord-2map ; inline
|
||||
M: cord v/ [ v/ ] cord-2map ; inline
|
||||
M: cord vmin [ vmin ] cord-2map ; inline
|
||||
M: cord vmax [ vmax ] cord-2map ; inline
|
||||
M: cord v. [ v. ] cord-2both + ; inline
|
||||
M: cord vsqrt [ vsqrt ] cord-map ; inline
|
||||
M: cord sum [ sum ] cord-both + ; inline
|
||||
M: cord vabs [ vabs ] cord-map ; inline
|
||||
M: cord vbitand [ vbitand ] cord-2map ; inline
|
||||
M: cord vbitandn [ vbitandn ] cord-2map ; inline
|
||||
M: cord vbitor [ vbitor ] cord-2map ; inline
|
||||
M: cord vbitxor [ vbitxor ] cord-2map ; inline
|
||||
M: cord vbitnot [ vbitnot ] cord-map ; inline
|
||||
M: cord vand [ vand ] cord-2map ; inline
|
||||
M: cord vandn [ vandn ] cord-2map ; inline
|
||||
M: cord vor [ vor ] cord-2map ; inline
|
||||
M: cord vxor [ vxor ] cord-2map ; inline
|
||||
M: cord vnot [ vnot ] cord-map ; inline
|
||||
M: cord vlshift '[ _ vlshift ] cord-map ; inline
|
||||
M: cord vrshift '[ _ vrshift ] cord-map ; inline
|
||||
M: cord (vmerge-head) [ head>> ] bi@ (vmerge) cord-append ; inline
|
||||
M: cord (vmerge-tail) [ tail>> ] bi@ (vmerge) cord-append ; inline
|
||||
M: cord v<= [ v<= ] cord-2map ; inline
|
||||
M: cord v< [ v< ] cord-2map ; inline
|
||||
M: cord v= [ v= ] cord-2map ; inline
|
||||
M: cord v> [ v> ] cord-2map ; inline
|
||||
M: cord v>= [ v>= ] cord-2map ; inline
|
||||
M: cord vunordered? [ vunordered? ] cord-2map ; inline
|
||||
M: cord vany? [ vany? ] cord-both or ; inline
|
||||
M: cord vall? [ vall? ] cord-both and ; inline
|
||||
M: cord vnone? [ vnone? ] cord-both and ; inline
|
||||
|
||||
M: cord n+v [ n+v ] with cord-map ; inline
|
||||
M: cord n-v [ n-v ] with cord-map ; inline
|
||||
M: cord n*v [ n*v ] with cord-map ; inline
|
||||
M: cord n/v [ n/v ] with cord-map ; inline
|
||||
M: cord v+n '[ _ v+n ] cord-map ; inline
|
||||
M: cord v-n '[ _ v-n ] cord-map ; inline
|
||||
M: cord v*n '[ _ v*n ] cord-map ; inline
|
||||
M: cord v/n '[ _ v/n ] cord-map ; inline
|
||||
|
||||
M: cord norm-sq [ norm-sq ] cord-both + ; inline
|
||||
M: cord distance v- norm ; inline
|
||||
|
||||
|
|
@ -4,5 +4,3 @@ USING: mirrors specialized-arrays math.vectors ;
|
|||
IN: specialized-arrays.mirrors
|
||||
|
||||
INSTANCE: specialized-array enumerated-sequence
|
||||
INSTANCE: simd-128 enumerated-sequence
|
||||
INSTANCE: simd-256 enumerated-sequence
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien alien.c-types alien.data alien.parser
|
||||
assocs byte-arrays classes compiler.units functors kernel lexer
|
||||
libc math math.vectors math.vectors.private
|
||||
math.vectors.specialization namespaces
|
||||
libc math math.vectors math.vectors.private namespaces
|
||||
parser prettyprint.custom sequences sequences.private strings
|
||||
summary vocabs vocabs.loader vocabs.parser vocabs.generated
|
||||
words fry combinators make ;
|
||||
|
@ -69,8 +68,6 @@ TUPLE: A
|
|||
[ drop \ T bad-byte-array-length ] unless
|
||||
<direct-A> ; inline
|
||||
|
||||
M: A new-underlying drop byte-array>A ;
|
||||
|
||||
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
|
||||
|
||||
M: A length length>> ; inline
|
||||
|
@ -96,8 +93,6 @@ M: A resize
|
|||
|
||||
M: A byte-length length \ T heap-size * ; inline
|
||||
|
||||
M: A element-type drop \ T ; inline
|
||||
|
||||
M: A direct-array-syntax drop \ A@ ;
|
||||
|
||||
M: A pprint-delims drop \ A{ \ } ;
|
||||
|
@ -109,8 +104,6 @@ SYNTAX: A@ scan-object scan-object <direct-A> suffix! ;
|
|||
|
||||
INSTANCE: A specialized-array
|
||||
|
||||
A T c-type-boxed-class f specialize-vector-words
|
||||
|
||||
;FUNCTOR
|
||||
|
||||
GENERIC: (underlying-type) ( c-type -- c-type' )
|
||||
|
|
|
@ -3,7 +3,8 @@ USING: accessors urls io.encodings.ascii io.files math.parser
|
|||
http.client kernel ;
|
||||
|
||||
: deploy-test-5 ( -- )
|
||||
URL" http://apple.com"
|
||||
URL" http://localhost/foo.html" clone
|
||||
"resource:port-number" ascii file-contents string>number >>port
|
||||
http-get 2drop ;
|
||||
|
||||
MAIN: deploy-test-5
|
||||
|
|
|
@ -13,6 +13,9 @@ ERROR: not-in-a-method-error ;
|
|||
: create-method-in ( class generic -- method )
|
||||
create-method dup set-word dup save-location ;
|
||||
|
||||
: define-inline-method ( class generic quot -- )
|
||||
[ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
|
||||
|
||||
: CREATE-METHOD ( -- method )
|
||||
scan-word bootstrap-word scan-word create-method-in ;
|
||||
|
||||
|
|
|
@ -929,7 +929,8 @@ PRIVATE>
|
|||
: trim ( seq quot -- newseq )
|
||||
[ trim-slice ] [ drop ] 2bi like ; inline
|
||||
|
||||
: sum ( seq -- n ) 0 [ + ] binary-reduce ;
|
||||
GENERIC: sum ( seq -- n )
|
||||
M: object sum 0 [ + ] binary-reduce ; inline
|
||||
|
||||
: product ( seq -- n ) 1 [ * ] binary-reduce ;
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ USING: alien.data.map fry generalizations kernel locals math.vectors
|
|||
math.vectors.conversion math math.vectors.simd sequences
|
||||
specialized-arrays tools.test ;
|
||||
FROM: alien.c-types => uchar short int float ;
|
||||
SIMDS: float int short uchar ;
|
||||
SPECIALIZED-ARRAYS: int float float-4 uchar-16 ;
|
||||
IN: alien.data.map.tests
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
USING: kernel locals math math.matrices.simd math.order math.vectors
|
||||
math.vectors.simd prettyprint sequences typed ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SIMD: c:float
|
||||
IN: benchmark.3d-matrix-vector
|
||||
|
||||
: v2min ( xy -- xx )
|
||||
|
|
|
@ -2,9 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types fry kernel locals math
|
||||
math.constants math.functions math.vectors math.vectors.simd
|
||||
prettyprint combinators.smart sequences hints classes.struct
|
||||
specialized-arrays ;
|
||||
SIMD: double
|
||||
math.vectors.simd.cords prettyprint combinators.smart sequences
|
||||
hints classes.struct specialized-arrays ;
|
||||
IN: benchmark.nbody-simd
|
||||
|
||||
: solar-mass ( -- x ) 4 pi sq * ; inline
|
||||
|
|
|
@ -3,10 +3,9 @@
|
|||
|
||||
USING: arrays accessors io io.files io.files.temp
|
||||
io.encodings.binary kernel math math.constants math.functions
|
||||
math.vectors math.vectors.simd math.parser make sequences
|
||||
sequences.private words hints classes.struct ;
|
||||
math.vectors math.vectors.simd math.vectors.simd.cords math.parser
|
||||
make sequences sequences.private words hints classes.struct ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SIMD: c:double
|
||||
IN: benchmark.raytracer-simd
|
||||
|
||||
! parameters
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
USING: kernel io math math.functions math.parser math.vectors
|
||||
math.vectors.simd sequences specialized-arrays ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SIMD: c:float
|
||||
SPECIALIZED-ARRAY: float-4
|
||||
IN: benchmark.simd-1
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
! (c)Joe Groff bsd license
|
||||
USING: io kernel math.vectors.simd terrain.generation threads ;
|
||||
FROM: alien.c-types => float ;
|
||||
SIMD: float
|
||||
IN: benchmark.terrain-generation
|
||||
|
||||
: terrain-generation-benchmark ( -- )
|
||||
|
|
|
@ -11,7 +11,6 @@ specialized-vectors ;
|
|||
FROM: alien.c-types => float ;
|
||||
SPECIALIZED-ARRAY: float
|
||||
SPECIALIZED-VECTOR: uint
|
||||
SIMD: float
|
||||
IN: gpu.demos.bunny
|
||||
|
||||
GLSL-SHADER-FILE: bunny-vertex-shader vertex-shader "bunny.v.glsl"
|
||||
|
|
|
@ -3,7 +3,6 @@ USING: accessors alien.data.map arrays destructors fry grouping
|
|||
kernel math math.ranges math.vectors.simd opengl opengl.gl sequences
|
||||
sequences.product specialized-arrays ;
|
||||
FROM: alien.c-types => float ;
|
||||
SIMD: float
|
||||
SPECIALIZED-ARRAY: float-4
|
||||
IN: grid-meshes
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ USING: classes.struct math.matrices.simd math.vectors.simd math
|
|||
literals math.constants math.functions specialized-arrays tools.test ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
FROM: math.matrices => m~ ;
|
||||
SIMD: c:float
|
||||
SPECIALIZED-ARRAY: float-4
|
||||
IN: math.matrices.simd.tests
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ math math.combinatorics math.functions math.matrices.simd math.vectors
|
|||
math.vectors.simd sequences sequences.private specialized-arrays
|
||||
typed ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SIMD: c:float
|
||||
SPECIALIZED-ARRAY: float-4
|
||||
IN: math.matrices.simd
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ math.libm math.matrices.simd math.vectors math.vectors.conversion math.vectors.s
|
|||
memoize random random.mersenne-twister sequences sequences.private specialized-arrays
|
||||
typed ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SIMDS: c:float c:int c:short c:ushort c:uchar ;
|
||||
SPECIALIZED-ARRAYS: c:float c:uchar float-4 uchar-16 ;
|
||||
IN: noise
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ combinators.smart fry grouping images kernel math
|
|||
math.matrices.simd math.order math.vectors noise random
|
||||
sequences math.vectors.simd typed ;
|
||||
FROM: alien.c-types => float uchar ;
|
||||
SIMDS: float uchar ;
|
||||
IN: terrain.generation
|
||||
|
||||
CONSTANT: terrain-segment-size { 512 512 }
|
||||
|
|
|
@ -11,7 +11,6 @@ math.matrices.simd noise ui.gestures combinators.short-circuit
|
|||
destructors grid-meshes math.vectors.simd ;
|
||||
QUALIFIED-WITH: alien.c-types c
|
||||
SPECIALIZED-ARRAY: c:float
|
||||
SIMD: c:float
|
||||
IN: terrain
|
||||
|
||||
CONSTANT: FOV $[ 2.0 sqrt 1 + ]
|
||||
|
|
|
@ -86,6 +86,15 @@ cell factor_vm::frame_executing(stack_frame *frame)
|
|||
return frame_code(frame)->owner;
|
||||
}
|
||||
|
||||
cell factor_vm::frame_executing_quot(stack_frame *frame)
|
||||
{
|
||||
tagged<object> executing(frame_executing(frame));
|
||||
code_block *compiled = frame_code(frame);
|
||||
if(!compiled->optimized_p() && executing->type() == WORD_TYPE)
|
||||
executing = executing.as<word>()->def;
|
||||
return executing.value();
|
||||
}
|
||||
|
||||
stack_frame *factor_vm::frame_successor(stack_frame *frame)
|
||||
{
|
||||
check_frame(frame);
|
||||
|
@ -133,7 +142,7 @@ struct stack_frame_accumulator {
|
|||
|
||||
void operator()(stack_frame *frame)
|
||||
{
|
||||
data_root<object> executing(parent->frame_executing(frame),parent);
|
||||
data_root<object> executing(parent->frame_executing_quot(frame),parent);
|
||||
data_root<object> scan(parent->frame_scan(frame),parent);
|
||||
|
||||
frames.add(executing.value());
|
||||
|
@ -166,23 +175,18 @@ stack_frame *factor_vm::innermost_stack_frame(callstack *stack)
|
|||
return frame;
|
||||
}
|
||||
|
||||
stack_frame *factor_vm::innermost_stack_frame_quot(callstack *callstack)
|
||||
{
|
||||
stack_frame *inner = innermost_stack_frame(callstack);
|
||||
tagged<quotation>(frame_executing(inner)).untag_check(this);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/* Some primitives implementing a limited form of callstack mutation.
|
||||
Used by the single stepper. */
|
||||
void factor_vm::primitive_innermost_stack_frame_executing()
|
||||
{
|
||||
dpush(frame_executing(innermost_stack_frame(untag_check<callstack>(dpop()))));
|
||||
stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
|
||||
dpush(frame_executing_quot(frame));
|
||||
}
|
||||
|
||||
void factor_vm::primitive_innermost_stack_frame_scan()
|
||||
{
|
||||
dpush(frame_scan(innermost_stack_frame_quot(untag_check<callstack>(dpop()))));
|
||||
stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
|
||||
dpush(frame_scan(frame));
|
||||
}
|
||||
|
||||
void factor_vm::primitive_set_innermost_stack_frame_quot()
|
||||
|
@ -195,7 +199,7 @@ void factor_vm::primitive_set_innermost_stack_frame_quot()
|
|||
|
||||
jit_compile_quot(quot.value(),true);
|
||||
|
||||
stack_frame *inner = innermost_stack_frame_quot(callstack.untagged());
|
||||
stack_frame *inner = innermost_stack_frame(callstack.untagged());
|
||||
cell offset = (char *)FRAME_RETURN_ADDRESS(inner,this) - (char *)inner->xt;
|
||||
inner->xt = quot->xt;
|
||||
FRAME_RETURN_ADDRESS(inner,this) = (char *)quot->xt + offset;
|
||||
|
|
|
@ -20,7 +20,7 @@ fixnum instruction_operand::load_value_masked(cell mask, cell bits, cell shift)
|
|||
{
|
||||
fixnum *ptr = (fixnum *)pointer;
|
||||
|
||||
return (((*ptr & mask) << bits) >> bits) << shift;
|
||||
return (((*ptr & (fixnum)mask) << bits) >> bits) << shift;
|
||||
}
|
||||
|
||||
fixnum instruction_operand::load_value(cell relative_to)
|
||||
|
@ -36,7 +36,7 @@ fixnum instruction_operand::load_value(cell relative_to)
|
|||
case RC_ABSOLUTE_PPC_2_2:
|
||||
return load_value_2_2();
|
||||
case RC_ABSOLUTE_PPC_2:
|
||||
return load_value_masked(rel_absolute_ppc_2_mask,0,0);
|
||||
return load_value_masked(rel_absolute_ppc_2_mask,16,0);
|
||||
case RC_RELATIVE_PPC_2:
|
||||
return load_value_masked(rel_relative_ppc_2_mask,16,0) + relative_to;
|
||||
case RC_RELATIVE_PPC_3:
|
||||
|
@ -80,12 +80,6 @@ void instruction_operand::store_value_2_2(fixnum value)
|
|||
void instruction_operand::store_value_masked(fixnum value, cell mask, cell shift)
|
||||
{
|
||||
cell *ptr = (cell *)pointer;
|
||||
|
||||
/* This is unaccurate but good enough */
|
||||
fixnum test = (fixnum)mask >> 1;
|
||||
if(value <= -test || value >= test)
|
||||
critical_error("Value does not fit inside relocation",0);
|
||||
|
||||
*ptr = ((*ptr & ~mask) | ((value >> shift) & mask));
|
||||
}
|
||||
|
||||
|
|
|
@ -568,11 +568,11 @@ struct factor_vm
|
|||
code_block *frame_code(stack_frame *frame);
|
||||
code_block_type frame_type(stack_frame *frame);
|
||||
cell frame_executing(stack_frame *frame);
|
||||
cell frame_executing_quot(stack_frame *frame);
|
||||
stack_frame *frame_successor(stack_frame *frame);
|
||||
cell frame_scan(stack_frame *frame);
|
||||
void primitive_callstack_to_array();
|
||||
stack_frame *innermost_stack_frame(callstack *stack);
|
||||
stack_frame *innermost_stack_frame_quot(callstack *callstack);
|
||||
void primitive_innermost_stack_frame_executing();
|
||||
void primitive_innermost_stack_frame_scan();
|
||||
void primitive_set_innermost_stack_frame_quot();
|
||||
|
|
Loading…
Reference in New Issue