alien.complex: Fix functor
parent
1ca1a9b6b3
commit
1e9b407037
|
@ -1,12 +1,12 @@
|
||||||
! Copyright (C) 2009 Slava Pestov.
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors alien.c-types alien.complex.functor kernel
|
USING: accessors alien alien.c-types alien.complex.functor
|
||||||
sequences ;
|
classes.struct kernel math quotations ;
|
||||||
|
FROM: alien.c-types => float double ;
|
||||||
IN: alien.complex
|
IN: alien.complex
|
||||||
|
|
||||||
<<
|
COMPLEX-TYPE: float complex-float
|
||||||
{ "float" "double" } [ dup "complex-" prepend define-complex-type ] each
|
COMPLEX-TYPE: double complex-double
|
||||||
>>
|
|
||||||
|
|
||||||
<<
|
<<
|
||||||
! This overrides the fact that small structures are never returned
|
! This overrides the fact that small structures are never returned
|
||||||
|
|
|
@ -3,20 +3,25 @@
|
||||||
USING: functors2 ;
|
USING: functors2 ;
|
||||||
IN: alien.complex.functor
|
IN: alien.complex.functor
|
||||||
|
|
||||||
FUNCTOR: define-complex-type ( N: name T: name -- ) [[
|
SAME-FUNCTOR: complex-type ( N: existing-word T: name -- ) [[
|
||||||
|
USING: alien alien.c-types classes.struct kernel quotations ;
|
||||||
|
QUALIFIED: math
|
||||||
|
|
||||||
STRUCT: ${T}-class { real ${N}-type } { imaginary ${N}-type } ;
|
<<
|
||||||
|
STRUCT: ${T} { real ${N} } { imaginary ${N} } ;
|
||||||
|
|
||||||
: <${T}> ( z -- alien )
|
: <${T}> ( z -- alien )
|
||||||
>rect ${T}-class <struct-boa> >c-ptr ;
|
math:>rect ${T} <struct-boa> >c-ptr ;
|
||||||
|
|
||||||
: *${T} ( alien -- z )
|
: *${T} ( alien -- z )
|
||||||
T-class memory>struct [ real>> ] [ imaginary>> ] bi rect> ; inline
|
${T} memory>struct [ real>> ] [ imaginary>> ] bi math:rect> ; inline
|
||||||
|
|
||||||
${T}-class lookup-c-type
|
>>
|
||||||
<${T}> 1quotation >>unboxer-quot
|
|
||||||
*${T} 1quotation >>boxer-quot
|
\ ${T} lookup-c-type
|
||||||
complex >>boxed-class
|
[ <${T}> ] >>unboxer-quot
|
||||||
drop
|
[ *${T} ] >>boxer-quot
|
||||||
|
complex >>boxed-class
|
||||||
|
drop
|
||||||
|
|
||||||
]]
|
]]
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Slava Pestov
|
|
@ -0,0 +1,163 @@
|
||||||
|
! Copyright (C) 2009 Joe Groff.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors arrays classes combinators
|
||||||
|
compiler.cfg.instructions compiler.cfg.registers
|
||||||
|
compiler.cfg.stacks compiler.cfg.stacks.local
|
||||||
|
compiler.tree.propagation.info cpu.architecture fry
|
||||||
|
generalizations kernel locals macros make math quotations
|
||||||
|
sequences sequences.generalizations ;
|
||||||
|
IN: compiler.cfg.intrinsics.simd.backend
|
||||||
|
|
||||||
|
! Selection of implementation based on available CPU instructions
|
||||||
|
|
||||||
|
GENERIC: insn-available? ( ## -- reps )
|
||||||
|
|
||||||
|
M: object insn-available? drop t ;
|
||||||
|
|
||||||
|
M: ##zero-vector insn-available? rep>> %zero-vector-reps member? ;
|
||||||
|
M: ##fill-vector insn-available? rep>> %fill-vector-reps member? ;
|
||||||
|
M: ##gather-vector-2 insn-available? rep>> %gather-vector-2-reps member? ;
|
||||||
|
M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ;
|
||||||
|
M: ##gather-int-vector-2 insn-available? rep>> %gather-int-vector-2-reps member? ;
|
||||||
|
M: ##gather-int-vector-4 insn-available? rep>> %gather-int-vector-4-reps member? ;
|
||||||
|
M: ##select-vector insn-available? rep>> %select-vector-reps member? ;
|
||||||
|
M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ;
|
||||||
|
M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ;
|
||||||
|
M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ;
|
||||||
|
M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ;
|
||||||
|
M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ;
|
||||||
|
M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ;
|
||||||
|
M: ##float-pack-vector insn-available? rep>> %float-pack-vector-reps member? ;
|
||||||
|
M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ;
|
||||||
|
M: ##unsigned-pack-vector insn-available? rep>> %unsigned-pack-vector-reps member? ;
|
||||||
|
M: ##unpack-vector-head insn-available? rep>> %unpack-vector-head-reps member? ;
|
||||||
|
M: ##unpack-vector-tail insn-available? rep>> %unpack-vector-tail-reps member? ;
|
||||||
|
M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ;
|
||||||
|
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
|
||||||
|
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
|
||||||
|
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
|
||||||
|
M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ;
|
||||||
|
M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
|
||||||
|
M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
|
||||||
|
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;
|
||||||
|
M: ##add-sub-vector insn-available? rep>> %add-sub-vector-reps member? ;
|
||||||
|
M: ##sub-vector insn-available? rep>> %sub-vector-reps member? ;
|
||||||
|
M: ##saturated-sub-vector insn-available? rep>> %saturated-sub-vector-reps member? ;
|
||||||
|
M: ##mul-vector insn-available? rep>> %mul-vector-reps member? ;
|
||||||
|
M: ##mul-high-vector insn-available? rep>> %mul-high-vector-reps member? ;
|
||||||
|
M: ##mul-horizontal-add-vector insn-available? rep>> %mul-horizontal-add-vector-reps member? ;
|
||||||
|
M: ##saturated-mul-vector insn-available? rep>> %saturated-mul-vector-reps member? ;
|
||||||
|
M: ##div-vector insn-available? rep>> %div-vector-reps member? ;
|
||||||
|
M: ##min-vector insn-available? rep>> %min-vector-reps member? ;
|
||||||
|
M: ##max-vector insn-available? rep>> %max-vector-reps member? ;
|
||||||
|
M: ##avg-vector insn-available? rep>> %avg-vector-reps member? ;
|
||||||
|
M: ##dot-vector insn-available? rep>> %dot-vector-reps member? ;
|
||||||
|
M: ##sad-vector insn-available? rep>> %sad-vector-reps member? ;
|
||||||
|
M: ##sqrt-vector insn-available? rep>> %sqrt-vector-reps member? ;
|
||||||
|
M: ##horizontal-add-vector insn-available? rep>> %horizontal-add-vector-reps member? ;
|
||||||
|
M: ##horizontal-sub-vector insn-available? rep>> %horizontal-sub-vector-reps member? ;
|
||||||
|
M: ##abs-vector insn-available? rep>> %abs-vector-reps member? ;
|
||||||
|
M: ##and-vector insn-available? rep>> %and-vector-reps member? ;
|
||||||
|
M: ##andn-vector insn-available? rep>> %andn-vector-reps member? ;
|
||||||
|
M: ##or-vector insn-available? rep>> %or-vector-reps member? ;
|
||||||
|
M: ##xor-vector insn-available? rep>> %xor-vector-reps member? ;
|
||||||
|
M: ##not-vector insn-available? rep>> %not-vector-reps member? ;
|
||||||
|
M: ##shl-vector insn-available? rep>> %shl-vector-reps member? ;
|
||||||
|
M: ##shr-vector insn-available? rep>> %shr-vector-reps member? ;
|
||||||
|
M: ##shl-vector-imm insn-available? rep>> %shl-vector-imm-reps member? ;
|
||||||
|
M: ##shr-vector-imm insn-available? rep>> %shr-vector-imm-reps member? ;
|
||||||
|
M: ##horizontal-shl-vector-imm insn-available? rep>> %horizontal-shl-vector-imm-reps member? ;
|
||||||
|
M: ##horizontal-shr-vector-imm insn-available? rep>> %horizontal-shr-vector-imm-reps member? ;
|
||||||
|
|
||||||
|
: [vector-op-checked] ( #dup quot -- quot )
|
||||||
|
'[ _ ndup [ @ ] { } make dup [ insn-available? ] all? ] ;
|
||||||
|
|
||||||
|
GENERIC#: >vector-op-cond 2 ( quot #pick #dup -- quotpair )
|
||||||
|
M:: callable >vector-op-cond ( quot #pick #dup -- quotpair )
|
||||||
|
#dup quot [vector-op-checked] '[ 2drop @ ]
|
||||||
|
#dup '[ % _ nnip ]
|
||||||
|
2array ;
|
||||||
|
|
||||||
|
M:: pair >vector-op-cond ( pair #pick #dup -- quotpair )
|
||||||
|
pair first2 :> ( class quot )
|
||||||
|
#pick class #dup quot [vector-op-checked]
|
||||||
|
'[ 2drop _ npick _ instance? _ [ f f f ] if ]
|
||||||
|
#dup '[ % _ nnip ]
|
||||||
|
2array ;
|
||||||
|
|
||||||
|
MACRO: v-vector-op ( trials -- quot )
|
||||||
|
[ 1 2 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
MACRO: vl-vector-op ( trials -- quot )
|
||||||
|
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
MACRO: vvl-vector-op ( trials -- quot )
|
||||||
|
[ 1 4 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
MACRO: vv-vector-op ( trials -- quot )
|
||||||
|
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
MACRO: vv-cc-vector-op ( trials -- quot )
|
||||||
|
[ 2 4 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
MACRO: vvvv-vector-op ( trials -- quot )
|
||||||
|
[ 1 5 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||||
|
|
||||||
|
! Intrinsic code emission
|
||||||
|
|
||||||
|
MACRO: check-elements ( quots -- quot )
|
||||||
|
[ length '[ _ firstn ] ]
|
||||||
|
[ '[ _ spread ] ]
|
||||||
|
[ length 1 - \ and <repetition> [ ] like ]
|
||||||
|
tri 3append ;
|
||||||
|
|
||||||
|
ERROR: bad-simd-intrinsic node ;
|
||||||
|
|
||||||
|
MACRO: if-literals-match ( quots -- quot )
|
||||||
|
[ length ] [ ] [ length ] tri
|
||||||
|
! n quots n
|
||||||
|
'[
|
||||||
|
! node quot
|
||||||
|
[
|
||||||
|
dup node-input-infos
|
||||||
|
_ tail-slice* [ literal>> ] map
|
||||||
|
dup _ check-elements
|
||||||
|
] dip
|
||||||
|
swap [
|
||||||
|
! node literals quot
|
||||||
|
[ _ firstn ] dip call
|
||||||
|
drop
|
||||||
|
] [ 2drop bad-simd-intrinsic ] if
|
||||||
|
] ;
|
||||||
|
|
||||||
|
CONSTANT: unary [ ds-drop ds-pop ]
|
||||||
|
CONSTANT: unary/param [ [ -2 <ds-loc> inc-stack ds-pop ] dip ]
|
||||||
|
CONSTANT: binary [ ds-drop 2inputs ]
|
||||||
|
CONSTANT: binary/param [ [ -2 <ds-loc> inc-stack 2inputs ] dip ]
|
||||||
|
CONSTANT: quaternary
|
||||||
|
[
|
||||||
|
ds-drop
|
||||||
|
d: 3 peek-loc
|
||||||
|
d: 2 peek-loc
|
||||||
|
d: 1 peek-loc
|
||||||
|
d: 0 peek-loc
|
||||||
|
-4 <ds-loc> inc-stack
|
||||||
|
]
|
||||||
|
|
||||||
|
:: emit-vector-op ( trials params-quot op-quot literal-preds -- quot )
|
||||||
|
params-quot trials op-quot literal-preds
|
||||||
|
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
|
||||||
|
|
||||||
|
MACRO: emit-v-vector-op ( trials -- quot )
|
||||||
|
unary [ v-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||||
|
MACRO: emit-vl-vector-op ( trials literal-pred -- quot )
|
||||||
|
[ unary/param [ vl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
|
||||||
|
MACRO: emit-vv-vector-op ( trials -- quot )
|
||||||
|
binary [ vv-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||||
|
MACRO: emit-vvl-vector-op ( trials literal-pred -- quot )
|
||||||
|
[ binary/param [ vvl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
|
||||||
|
MACRO: emit-vvvv-vector-op ( trials -- quot )
|
||||||
|
quaternary [ vvvv-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||||
|
|
||||||
|
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- quot )
|
||||||
|
literal-pred imm-trials literal-pred var-trials
|
||||||
|
'[
|
||||||
|
dup node-input-infos 2 tail-slice* first literal>> @
|
||||||
|
[ _ _ emit-vl-vector-op ]
|
||||||
|
[ _ emit-vv-vector-op ] if
|
||||||
|
] ;
|
|
@ -0,0 +1,8 @@
|
||||||
|
USING: compiler.cfg.instructions compiler.tree help.markup help.syntax
|
||||||
|
math.vectors ;
|
||||||
|
IN: compiler.cfg.intrinsics.simd
|
||||||
|
|
||||||
|
HELP: emit-simd-v+
|
||||||
|
{ $values { "node" node } }
|
||||||
|
{ $description "Emits instructions for SIMD vector addition." }
|
||||||
|
{ $see-also ##add-vector v+ } ;
|
|
@ -0,0 +1,630 @@
|
||||||
|
! Copyright (C) 2009 Joe Groff.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors arrays assocs biassocs byte-arrays classes
|
||||||
|
compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
|
||||||
|
compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
|
||||||
|
compiler.cfg.stacks.local compiler.test compiler.tree
|
||||||
|
compiler.tree.propagation.info cpu.architecture fry kernel locals make
|
||||||
|
namespaces sequences system tools.test words ;
|
||||||
|
IN: compiler.cfg.intrinsics.simd.tests
|
||||||
|
|
||||||
|
:: test-node ( rep -- node )
|
||||||
|
T{ #call
|
||||||
|
{ in-d { 1 2 3 4 } }
|
||||||
|
{ out-d { 5 } }
|
||||||
|
{ info H{
|
||||||
|
{ 1 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 2 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 3 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
|
||||||
|
{ 5 T{ value-info-state { class byte-array } } }
|
||||||
|
} }
|
||||||
|
} ;
|
||||||
|
|
||||||
|
:: test-node-literal ( lit rep -- node )
|
||||||
|
lit class-of :> lit-class
|
||||||
|
T{ #call
|
||||||
|
{ in-d { 1 2 3 4 } }
|
||||||
|
{ out-d { 5 } }
|
||||||
|
{ info H{
|
||||||
|
{ 1 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 2 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 3 T{ value-info-state { class lit-class } { literal? t } { literal lit } } }
|
||||||
|
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
|
||||||
|
{ 5 T{ value-info-state { class byte-array } } }
|
||||||
|
} }
|
||||||
|
} ;
|
||||||
|
|
||||||
|
: test-node-nonliteral-rep ( -- node )
|
||||||
|
T{ #call
|
||||||
|
{ in-d { 1 2 3 4 } }
|
||||||
|
{ out-d { 5 } }
|
||||||
|
{ info H{
|
||||||
|
{ 1 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 2 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 3 T{ value-info-state { class byte-array } } }
|
||||||
|
{ 4 T{ value-info-state { class object } } }
|
||||||
|
{ 5 T{ value-info-state { class byte-array } } }
|
||||||
|
} }
|
||||||
|
} ;
|
||||||
|
|
||||||
|
: test-compiler-env ( -- x )
|
||||||
|
H{ } clone
|
||||||
|
T{ basic-block } 0 0 0 0 height-state boa >>height
|
||||||
|
\ basic-block pick set-at
|
||||||
|
|
||||||
|
0 0 0 0 height-state boa \ height-state pick set-at
|
||||||
|
HS{ } clone \ local-peek-set pick set-at
|
||||||
|
H{ } clone \ replaces pick set-at
|
||||||
|
H{ } <biassoc> \ locs>vregs pick set-at ;
|
||||||
|
|
||||||
|
: make-classes ( quot -- seq )
|
||||||
|
{ } make [ class-of ] map ; inline
|
||||||
|
|
||||||
|
: test-emit ( cpu rep quot -- node )
|
||||||
|
[
|
||||||
|
[ new \ cpu ] 2dip '[
|
||||||
|
test-compiler-env [ _ test-node @ ] with-variables
|
||||||
|
] with-variable
|
||||||
|
] make-classes ; inline
|
||||||
|
|
||||||
|
: test-emit-literal ( cpu lit rep quot -- node )
|
||||||
|
[
|
||||||
|
[ new \ cpu ] 3dip '[
|
||||||
|
test-compiler-env [ _ _ test-node-literal @ ] with-variables
|
||||||
|
] with-variable
|
||||||
|
] make-classes ; inline
|
||||||
|
|
||||||
|
: test-emit-nonliteral-rep ( cpu quot -- node )
|
||||||
|
[
|
||||||
|
[ new \ cpu ] dip '[
|
||||||
|
test-compiler-env [ test-node-nonliteral-rep @ ] with-variables
|
||||||
|
] with-variable
|
||||||
|
] make-classes ; inline
|
||||||
|
|
||||||
|
CONSTANT: signed-reps
|
||||||
|
{ char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
|
||||||
|
CONSTANT: all-reps
|
||||||
|
{
|
||||||
|
char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
|
||||||
|
uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
|
||||||
|
}
|
||||||
|
|
||||||
|
TUPLE: scalar-cpu ;
|
||||||
|
|
||||||
|
TUPLE: simple-ops-cpu ;
|
||||||
|
M: simple-ops-cpu %zero-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %fill-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %add-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %sub-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %mul-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %div-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %andn-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %and-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %or-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %xor-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %merge-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %move-vector-mask-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %test-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
|
||||||
|
M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
|
||||||
|
M: simple-ops-cpu %alien-vector-reps all-reps ;
|
||||||
|
|
||||||
|
! v+
|
||||||
|
{ { ##add-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! v-
|
||||||
|
{ { ##sub-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vneg
|
||||||
|
{ { ##load-reference ##sub-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##sub-vector } }
|
||||||
|
[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! v*
|
||||||
|
{ { ##mul-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! v/
|
||||||
|
{ { ##div-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: addsub-cpu < simple-ops-cpu ;
|
||||||
|
M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
|
||||||
|
|
||||||
|
! v+-
|
||||||
|
{ { ##add-sub-vector } }
|
||||||
|
[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##load-reference ##xor-vector ##add-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##load-reference ##xor-vector ##sub-vector ##add-vector } }
|
||||||
|
[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: saturating-cpu < simple-ops-cpu ;
|
||||||
|
M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
|
||||||
|
M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
|
||||||
|
M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
|
||||||
|
|
||||||
|
! vs+
|
||||||
|
{ { ##add-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##add-vector } }
|
||||||
|
[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##saturated-add-vector } }
|
||||||
|
[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vs-
|
||||||
|
{ { ##sub-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##sub-vector } }
|
||||||
|
[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##saturated-sub-vector } }
|
||||||
|
[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vs*
|
||||||
|
{ { ##mul-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##mul-vector } }
|
||||||
|
[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##saturated-mul-vector } }
|
||||||
|
[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: minmax-cpu < simple-ops-cpu ;
|
||||||
|
M: minmax-cpu %min-vector-reps signed-reps ;
|
||||||
|
M: minmax-cpu %max-vector-reps signed-reps ;
|
||||||
|
M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
|
||||||
|
M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||||
|
|
||||||
|
TUPLE: compare-cpu < simple-ops-cpu ;
|
||||||
|
M: compare-cpu %compare-vector-reps drop signed-reps ;
|
||||||
|
M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||||
|
|
||||||
|
! vmin
|
||||||
|
{ { ##min-vector } }
|
||||||
|
[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||||
|
[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vmax
|
||||||
|
{ { ##max-vector } }
|
||||||
|
[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||||
|
[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: dot-cpu < simple-ops-cpu ;
|
||||||
|
M: dot-cpu %dot-vector-reps { float-4-rep } ;
|
||||||
|
|
||||||
|
TUPLE: horizontal-cpu < simple-ops-cpu ;
|
||||||
|
M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
|
||||||
|
M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
|
||||||
|
M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
|
||||||
|
|
||||||
|
! v.
|
||||||
|
{ { ##dot-vector } }
|
||||||
|
[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
|
||||||
|
[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ {
|
||||||
|
##mul-vector
|
||||||
|
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||||
|
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||||
|
##vector>scalar
|
||||||
|
} }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vsqrt
|
||||||
|
{ { ##sqrt-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! sum
|
||||||
|
{ { ##horizontal-add-vector ##vector>scalar } }
|
||||||
|
[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
|
||||||
|
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ {
|
||||||
|
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||||
|
##horizontal-add-vector ##horizontal-add-vector
|
||||||
|
##vector>scalar
|
||||||
|
} }
|
||||||
|
[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ {
|
||||||
|
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||||
|
##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
|
||||||
|
##vector>scalar
|
||||||
|
} }
|
||||||
|
[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: abs-cpu < simple-ops-cpu ;
|
||||||
|
M: abs-cpu %abs-vector-reps signed-reps ;
|
||||||
|
|
||||||
|
! vabs
|
||||||
|
{ { } }
|
||||||
|
[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##abs-vector } }
|
||||||
|
[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##load-reference ##andn-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||||
|
[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vand
|
||||||
|
{ { ##and-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vandn
|
||||||
|
{ { ##andn-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vor
|
||||||
|
{ { ##or-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vxor
|
||||||
|
{ { ##xor-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: not-cpu < simple-ops-cpu ;
|
||||||
|
M: not-cpu %not-vector-reps signed-reps ;
|
||||||
|
|
||||||
|
! vnot
|
||||||
|
{ { ##not-vector } }
|
||||||
|
[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##fill-vector ##xor-vector } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: shift-cpu < simple-ops-cpu ;
|
||||||
|
M: shift-cpu %shl-vector-reps signed-reps ;
|
||||||
|
M: shift-cpu %shr-vector-reps signed-reps ;
|
||||||
|
|
||||||
|
TUPLE: shift-imm-cpu < simple-ops-cpu ;
|
||||||
|
M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
|
||||||
|
M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
|
||||||
|
|
||||||
|
TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
|
||||||
|
M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
|
||||||
|
M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
|
||||||
|
|
||||||
|
! vlshift
|
||||||
|
{ { ##shl-vector-imm } }
|
||||||
|
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##shl-vector } }
|
||||||
|
[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vrshift
|
||||||
|
{ { ##shr-vector-imm } }
|
||||||
|
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##shr-vector } }
|
||||||
|
[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! hlshift
|
||||||
|
{ { ##horizontal-shl-vector-imm } }
|
||||||
|
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! hrshift
|
||||||
|
{ { ##horizontal-shr-vector-imm } }
|
||||||
|
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
|
||||||
|
M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
|
||||||
|
|
||||||
|
TUPLE: shuffle-cpu < simple-ops-cpu ;
|
||||||
|
M: shuffle-cpu %shuffle-vector-reps signed-reps ;
|
||||||
|
|
||||||
|
! vshuffle-elements
|
||||||
|
{ { ##load-reference ##shuffle-vector } }
|
||||||
|
[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##shuffle-vector-imm } }
|
||||||
|
[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vshuffle-bytes
|
||||||
|
{ { ##shuffle-vector } }
|
||||||
|
[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vmerge-head
|
||||||
|
{ { ##merge-vector-head } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vmerge-tail
|
||||||
|
{ { ##merge-vector-tail } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! v<= etc.
|
||||||
|
{ { ##compare-vector } }
|
||||||
|
[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##min-vector ##compare-vector } }
|
||||||
|
[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##load-reference ##xor-vector ##xor-vector ##compare-vector } }
|
||||||
|
[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vany? etc.
|
||||||
|
{ { ##test-vector } }
|
||||||
|
[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: convert-cpu < simple-ops-cpu ;
|
||||||
|
M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
|
||||||
|
M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
|
||||||
|
|
||||||
|
! v>float
|
||||||
|
{ { } }
|
||||||
|
[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##integer>float-vector } }
|
||||||
|
[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! v>integer
|
||||||
|
{ { } }
|
||||||
|
[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##float>integer-vector } }
|
||||||
|
[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vpack-signed
|
||||||
|
{ { ##signed-pack-vector } }
|
||||||
|
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vpack-unsigned
|
||||||
|
{ { ##unsigned-pack-vector } }
|
||||||
|
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
TUPLE: unpack-head-cpu < simple-ops-cpu ;
|
||||||
|
M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
|
||||||
|
TUPLE: unpack-cpu < unpack-head-cpu ;
|
||||||
|
M: unpack-cpu %unpack-vector-tail-reps all-reps ;
|
||||||
|
|
||||||
|
! vunpack-head
|
||||||
|
{ { ##unpack-vector-head } }
|
||||||
|
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##merge-vector-head } }
|
||||||
|
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##merge-vector-head ##shr-vector-imm } }
|
||||||
|
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##compare-vector ##merge-vector-head } }
|
||||||
|
[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! vunpack-tail
|
||||||
|
{ { ##unpack-vector-tail } }
|
||||||
|
[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##tail>head-vector ##unpack-vector-head } }
|
||||||
|
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##merge-vector-tail } }
|
||||||
|
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##merge-vector-tail ##shr-vector-imm } }
|
||||||
|
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
{ { ##zero-vector ##compare-vector ##merge-vector-tail } }
|
||||||
|
[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! with
|
||||||
|
{ { ##scalar>vector ##shuffle-vector-imm } }
|
||||||
|
[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! gather-2
|
||||||
|
{ { ##gather-vector-2 } }
|
||||||
|
[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! gather-4
|
||||||
|
{ { ##gather-vector-4 } }
|
||||||
|
[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! select
|
||||||
|
{ { ##shuffle-vector-imm ##vector>scalar } }
|
||||||
|
[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ]
|
||||||
|
unit-test
|
||||||
|
|
||||||
|
! ^load-neg-zero-vector
|
||||||
|
{
|
||||||
|
V{
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 1 }
|
||||||
|
{ obj B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 } }
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 2 }
|
||||||
|
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} [
|
||||||
|
[
|
||||||
|
{ float-4-rep double-2-rep } [ ^load-neg-zero-vector drop ] each
|
||||||
|
] V{ } make
|
||||||
|
] cfg-unit-test
|
||||||
|
|
||||||
|
! ^load-add-sub-vector
|
||||||
|
{
|
||||||
|
V{
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 1 }
|
||||||
|
{ obj B{ 0 0 0 128 0 0 0 0 0 0 0 128 0 0 0 0 } }
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 2 }
|
||||||
|
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 0 } }
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 3 }
|
||||||
|
{ obj
|
||||||
|
B{ 255 0 255 0 255 0 255 0 255 0 255 0 255 0 255 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 4 }
|
||||||
|
{ obj
|
||||||
|
B{ 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 5 }
|
||||||
|
{ obj
|
||||||
|
B{ 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 6 }
|
||||||
|
{ obj
|
||||||
|
B{ 255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
float-4-rep
|
||||||
|
double-2-rep
|
||||||
|
char-16-rep
|
||||||
|
short-8-rep
|
||||||
|
int-4-rep
|
||||||
|
longlong-2-rep
|
||||||
|
} [ ^load-add-sub-vector drop ] each
|
||||||
|
] V{ } make
|
||||||
|
] cfg-unit-test
|
||||||
|
|
||||||
|
! ^load-half-vector
|
||||||
|
{
|
||||||
|
V{
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 1 }
|
||||||
|
{ obj B{ 0 0 0 63 0 0 0 63 0 0 0 63 0 0 0 63 } }
|
||||||
|
}
|
||||||
|
T{ ##load-reference
|
||||||
|
{ dst 2 }
|
||||||
|
{ obj B{ 0 0 0 0 0 0 224 63 0 0 0 0 0 0 224 63 } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} [
|
||||||
|
[
|
||||||
|
{ float-4-rep double-2-rep } [ ^load-half-vector drop ] each
|
||||||
|
] V{ } make
|
||||||
|
] cfg-unit-test
|
||||||
|
|
||||||
|
! sign-bit-mask
|
||||||
|
{
|
||||||
|
{
|
||||||
|
B{ 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 }
|
||||||
|
B{ 0 128 0 128 0 128 0 128 0 128 0 128 0 128 0 128 }
|
||||||
|
B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 }
|
||||||
|
B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 }
|
||||||
|
}
|
||||||
|
} [
|
||||||
|
{ char-16-rep short-8-rep int-4-rep longlong-2-rep } [ sign-bit-mask ] map
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
|
||||||
|
! test with nonliteral/invalid reps
|
||||||
|
[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
|
||||||
|
[ bad-simd-intrinsic? ] must-fail-with
|
||||||
|
|
||||||
|
[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
|
||||||
|
[ bad-simd-intrinsic? ] must-fail-with
|
||||||
|
|
||||||
|
[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
|
||||||
|
[ bad-simd-intrinsic? ] must-fail-with
|
|
@ -0,0 +1,719 @@
|
||||||
|
! Copyright (C) 2009 Slava Pestov, Joe Groff.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors alien.c-types arrays assocs byte-arrays combinators
|
||||||
|
combinators.short-circuit compiler.cfg.comparisons
|
||||||
|
compiler.cfg.hats compiler.cfg.instructions
|
||||||
|
compiler.cfg.intrinsics compiler.cfg.intrinsics.alien
|
||||||
|
compiler.cfg.intrinsics.simd.backend compiler.cfg.stacks
|
||||||
|
cpu.architecture fry kernel layouts locals math math.vectors
|
||||||
|
math.vectors.simd.intrinsics sequences specialized-arrays ;
|
||||||
|
FROM: alien.c-types => heap-size char short int longlong float double ;
|
||||||
|
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
|
||||||
|
IN: compiler.cfg.intrinsics.simd
|
||||||
|
|
||||||
|
! compound vector ops
|
||||||
|
|
||||||
|
CONSTANT: rep>bit-mask {
|
||||||
|
{
|
||||||
|
char-16-rep uchar-array{
|
||||||
|
0x80 0x80 0x80 0x80
|
||||||
|
0x80 0x80 0x80 0x80
|
||||||
|
0x80 0x80 0x80 0x80
|
||||||
|
0x80 0x80 0x80 0x80
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
short-8-rep ushort-array{
|
||||||
|
0x8000 0x8000 0x8000 0x8000
|
||||||
|
0x8000 0x8000 0x8000 0x8000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int-4-rep uint-array{
|
||||||
|
0x8000,0000 0x8000,0000
|
||||||
|
0x8000,0000 0x8000,0000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
longlong-2-rep ulonglong-array{
|
||||||
|
0x8000,0000,0000,0000
|
||||||
|
0x8000,0000,0000,0000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
: sign-bit-mask ( rep -- byte-array )
|
||||||
|
signed-rep rep>bit-mask at underlying>> ;
|
||||||
|
|
||||||
|
CONSTANT: rep>neg-zero {
|
||||||
|
{ float-4-rep float-array{ -0.0 -0.0 -0.0 -0.0 } }
|
||||||
|
{ double-2-rep double-array{ -0.0 -0.0 } }
|
||||||
|
}
|
||||||
|
|
||||||
|
: ^load-neg-zero-vector ( rep -- dst )
|
||||||
|
rep>neg-zero at underlying>> ^^load-literal ;
|
||||||
|
|
||||||
|
CONSTANT: rep>add-sub {
|
||||||
|
{ float-4-rep float-array{ -0.0 0.0 -0.0 0.0 } }
|
||||||
|
{ double-2-rep double-array{ -0.0 0.0 } }
|
||||||
|
{ char-16-rep char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } }
|
||||||
|
{ short-8-rep short-array{ -1 0 -1 0 -1 0 -1 0 } }
|
||||||
|
{ int-4-rep int-array{ -1 0 -1 0 } }
|
||||||
|
{ longlong-2-rep longlong-array{ -1 0 } }
|
||||||
|
}
|
||||||
|
|
||||||
|
: ^load-add-sub-vector ( rep -- dst )
|
||||||
|
signed-rep rep>add-sub at underlying>> ^^load-literal ;
|
||||||
|
|
||||||
|
CONSTANT: rep>half {
|
||||||
|
{ float-4-rep float-array{ 0.5 0.5 0.5 0.5 } }
|
||||||
|
{ double-2-rep double-array{ 0.5 0.5 } }
|
||||||
|
}
|
||||||
|
|
||||||
|
: ^load-half-vector ( rep -- dst )
|
||||||
|
rep>half at underlying>> ^^load-literal ;
|
||||||
|
|
||||||
|
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||||
|
rep-component-type heap-size
|
||||||
|
[ dup <repetition> >byte-array ]
|
||||||
|
[ <iota> >byte-array ] bi
|
||||||
|
'[ _ n*v _ v+ ] map concat ;
|
||||||
|
|
||||||
|
: ^load-immediate-shuffle ( shuffle rep -- dst )
|
||||||
|
>variable-shuffle ^^load-literal ;
|
||||||
|
|
||||||
|
:: ^blend-vector ( mask true false rep -- dst )
|
||||||
|
true mask rep ^^and-vector
|
||||||
|
mask false rep ^^andn-vector
|
||||||
|
rep ^^or-vector ;
|
||||||
|
|
||||||
|
: ^not-vector ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[ ^^not-vector ]
|
||||||
|
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
:: ^swap-compare-vector ( src1 src2 rep {cc,swap} -- dst )
|
||||||
|
{cc,swap} first2 :> ( cc swap? )
|
||||||
|
swap?
|
||||||
|
[ src2 src1 rep cc ^^compare-vector ]
|
||||||
|
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||||
|
|
||||||
|
:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||||
|
rep orig-cc %compare-vector-ccs :> ( ccs not? )
|
||||||
|
|
||||||
|
ccs empty?
|
||||||
|
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||||
|
[
|
||||||
|
ccs unclip :> ( rest-ccs first-cc )
|
||||||
|
src1 src2 rep first-cc ^swap-compare-vector :> first-dst
|
||||||
|
|
||||||
|
rest-ccs first-dst
|
||||||
|
[ [ src1 src2 rep ] dip ^swap-compare-vector rep ^^or-vector ]
|
||||||
|
reduce
|
||||||
|
|
||||||
|
not? [ rep ^not-vector ] when
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
|
||||||
|
cc order-cc {
|
||||||
|
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
|
||||||
|
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
|
||||||
|
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
|
||||||
|
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: ^compare-vector ( src1 src2 rep cc -- dst )
|
||||||
|
{
|
||||||
|
[ ^(compare-vector) ]
|
||||||
|
[ ^minmax-compare-vector ]
|
||||||
|
{ unsigned-int-vector-rep |[ src1 src2 rep cc |
|
||||||
|
rep sign-bit-mask ^^load-literal :> sign-bits
|
||||||
|
src1 sign-bits rep ^^xor-vector
|
||||||
|
src2 sign-bits rep ^^xor-vector
|
||||||
|
rep signed-rep cc ^(compare-vector)
|
||||||
|
] }
|
||||||
|
} vv-cc-vector-op ;
|
||||||
|
|
||||||
|
: ^unpack-vector-head ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[ ^^unpack-vector-head ]
|
||||||
|
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
|
||||||
|
{ signed-int-vector-rep |[ src rep |
|
||||||
|
src src rep ^^merge-vector-head :> merged
|
||||||
|
rep rep-component-type heap-size 8 * :> bits
|
||||||
|
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||||
|
] }
|
||||||
|
{ signed-int-vector-rep |[ src rep |
|
||||||
|
rep ^^zero-vector :> zero
|
||||||
|
zero src rep cc> ^compare-vector :> sign
|
||||||
|
src sign rep ^^merge-vector-head
|
||||||
|
] }
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: ^unpack-vector-tail ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[ ^^unpack-vector-tail ]
|
||||||
|
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
|
||||||
|
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
|
||||||
|
{ signed-int-vector-rep |[ src rep |
|
||||||
|
src src rep ^^merge-vector-tail :> merged
|
||||||
|
rep rep-component-type heap-size 8 * :> bits
|
||||||
|
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||||
|
] }
|
||||||
|
{ signed-int-vector-rep |[ src rep |
|
||||||
|
rep ^^zero-vector :> zero
|
||||||
|
zero src rep cc> ^compare-vector :> sign
|
||||||
|
src sign rep ^^merge-vector-tail
|
||||||
|
] }
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
PREDICATE: fixnum-vector-rep < int-vector-rep
|
||||||
|
rep-component-type heap-size cell < ;
|
||||||
|
|
||||||
|
: ^(sum-vector-2) ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
|[ src rep |
|
||||||
|
src src rep ^^merge-vector-head :> head
|
||||||
|
src src rep ^^merge-vector-tail :> tail
|
||||||
|
head tail rep ^^add-vector
|
||||||
|
]
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: ^(sum-vector-4) ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ] bi
|
||||||
|
]
|
||||||
|
|[ src rep |
|
||||||
|
src src rep ^^merge-vector-head :> head
|
||||||
|
src src rep ^^merge-vector-tail :> tail
|
||||||
|
head tail rep ^^add-vector :> src'
|
||||||
|
|
||||||
|
rep widen-vector-rep :> rep'
|
||||||
|
src' src' rep' ^^merge-vector-head :> head'
|
||||||
|
src' src' rep' ^^merge-vector-tail :> tail'
|
||||||
|
head' tail' rep ^^add-vector
|
||||||
|
]
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: ^(sum-vector-8) ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ] tri
|
||||||
|
]
|
||||||
|
|[ src rep |
|
||||||
|
src src rep ^^merge-vector-head :> head
|
||||||
|
src src rep ^^merge-vector-tail :> tail
|
||||||
|
head tail rep ^^add-vector :> src'
|
||||||
|
|
||||||
|
rep widen-vector-rep :> rep'
|
||||||
|
src' src' rep' ^^merge-vector-head :> head'
|
||||||
|
src' src' rep' ^^merge-vector-tail :> tail'
|
||||||
|
head' tail' rep ^^add-vector :> src''
|
||||||
|
|
||||||
|
rep' widen-vector-rep :> rep''
|
||||||
|
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||||
|
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||||
|
head'' tail'' rep ^^add-vector
|
||||||
|
]
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: ^(sum-vector-16) ( src rep -- dst )
|
||||||
|
{
|
||||||
|
[
|
||||||
|
{
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
[ dupd ^^horizontal-add-vector ]
|
||||||
|
} cleave
|
||||||
|
]
|
||||||
|
|[ src rep |
|
||||||
|
src src rep ^^merge-vector-head :> head
|
||||||
|
src src rep ^^merge-vector-tail :> tail
|
||||||
|
head tail rep ^^add-vector :> src'
|
||||||
|
|
||||||
|
rep widen-vector-rep :> rep'
|
||||||
|
src' src' rep' ^^merge-vector-head :> head'
|
||||||
|
src' src' rep' ^^merge-vector-tail :> tail'
|
||||||
|
head' tail' rep ^^add-vector :> src''
|
||||||
|
|
||||||
|
rep' widen-vector-rep :> rep''
|
||||||
|
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||||
|
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||||
|
head'' tail'' rep ^^add-vector :> src'''
|
||||||
|
|
||||||
|
rep'' widen-vector-rep :> rep'''
|
||||||
|
src''' src''' rep''' ^^merge-vector-head :> head'''
|
||||||
|
src''' src''' rep''' ^^merge-vector-tail :> tail'''
|
||||||
|
head''' tail''' rep ^^add-vector
|
||||||
|
]
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: ^(sum-vector) ( src rep -- dst )
|
||||||
|
[
|
||||||
|
dup rep-length {
|
||||||
|
{ 2 [ ^(sum-vector-2) ] }
|
||||||
|
{ 4 [ ^(sum-vector-4) ] }
|
||||||
|
{ 8 [ ^(sum-vector-8) ] }
|
||||||
|
{ 16 [ ^(sum-vector-16) ] }
|
||||||
|
} case
|
||||||
|
] [ ^^vector>scalar ] bi ;
|
||||||
|
|
||||||
|
: ^sum-vector ( src rep -- dst )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ ^(sum-vector) ] }
|
||||||
|
{ fixnum-vector-rep |[ src rep |
|
||||||
|
src rep ^unpack-vector-head :> head
|
||||||
|
src rep ^unpack-vector-tail :> tail
|
||||||
|
rep widen-vector-rep :> wide-rep
|
||||||
|
head tail wide-rep ^^add-vector wide-rep
|
||||||
|
^(sum-vector)
|
||||||
|
] }
|
||||||
|
} v-vector-op ;
|
||||||
|
|
||||||
|
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||||
|
|
||||||
|
: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
|
||||||
|
[ rep-length 0 pad-tail ] keep {
|
||||||
|
[ ^^shuffle-vector-imm ]
|
||||||
|
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
|
||||||
|
} vl-vector-op ;
|
||||||
|
|
||||||
|
: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
|
||||||
|
[ rep-length 0 pad-tail ] keep {
|
||||||
|
{ double-2-rep |[ src1 src2 shuffle rep |
|
||||||
|
shuffle first2 [ 4 mod ] bi@ :> ( i j )
|
||||||
|
{
|
||||||
|
{ [ i j [ 2 < ] both? ] [
|
||||||
|
src1 shuffle rep ^shuffle-vector-imm
|
||||||
|
] }
|
||||||
|
{ [ i j [ 2 >= ] both? ] [
|
||||||
|
src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
|
||||||
|
] }
|
||||||
|
{ [ i 2 < ] [
|
||||||
|
src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
|
||||||
|
] }
|
||||||
|
! [ j 2 < ]
|
||||||
|
[ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
|
||||||
|
} cond
|
||||||
|
] }
|
||||||
|
} vvl-vector-op ;
|
||||||
|
|
||||||
|
: ^broadcast-vector ( src n rep -- dst )
|
||||||
|
[ rep-length swap <array> ] keep
|
||||||
|
^shuffle-vector-imm ;
|
||||||
|
|
||||||
|
: ^with-vector ( src rep -- dst )
|
||||||
|
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
|
||||||
|
|
||||||
|
: ^select-vector ( src n rep -- dst )
|
||||||
|
{
|
||||||
|
[ ^^select-vector ]
|
||||||
|
[ [ ^broadcast-vector ] keep ^^vector>scalar ]
|
||||||
|
} vl-vector-op ;
|
||||||
|
|
||||||
|
! intrinsic emitters
|
||||||
|
|
||||||
|
: emit-simd-v+ ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^add-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v- ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^sub-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vneg ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||||
|
{ int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v+- ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^add-sub-vector ]
|
||||||
|
{ float-vector-rep |[ src1 src2 rep |
|
||||||
|
rep ^load-add-sub-vector :> signs
|
||||||
|
src2 signs rep ^^xor-vector :> src2'
|
||||||
|
src1 src2' rep ^^add-vector
|
||||||
|
] }
|
||||||
|
{ int-vector-rep |[ src1 src2 rep |
|
||||||
|
rep ^load-add-sub-vector :> signs
|
||||||
|
src2 signs rep ^^xor-vector :> src2'
|
||||||
|
src2' signs rep ^^sub-vector :> src2''
|
||||||
|
src1 src2'' rep ^^add-vector
|
||||||
|
] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vs+ ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ ^^add-vector ] }
|
||||||
|
{ int-vector-rep [ ^^saturated-add-vector ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vs- ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ ^^sub-vector ] }
|
||||||
|
{ int-vector-rep [ ^^saturated-sub-vector ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vs* ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ ^^mul-vector ] }
|
||||||
|
{ int-vector-rep [ ^^saturated-mul-vector ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v* ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^mul-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v*high ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^mul-high-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v*hs+ ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^mul-horizontal-add-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v/ ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^div-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vmin ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^min-vector ]
|
||||||
|
[
|
||||||
|
[ cc< ^compare-vector ]
|
||||||
|
[ ^blend-vector ] 3bi
|
||||||
|
]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vmax ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^max-vector ]
|
||||||
|
[
|
||||||
|
[ cc> ^compare-vector ]
|
||||||
|
[ ^blend-vector ] 3bi
|
||||||
|
]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vavg ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^avg-vector ]
|
||||||
|
{ float-vector-rep |[ src1 src2 rep |
|
||||||
|
src1 src2 rep ^^add-vector
|
||||||
|
rep ^load-half-vector rep ^^mul-vector
|
||||||
|
] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v. ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^dot-vector ]
|
||||||
|
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vsad ( node -- )
|
||||||
|
{
|
||||||
|
[
|
||||||
|
[ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
|
||||||
|
[ widen-vector-rep ^^vector>scalar ] bi
|
||||||
|
]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vsqrt ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^sqrt-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-sum ( node -- )
|
||||||
|
{
|
||||||
|
[ ^sum-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vabs ( node -- )
|
||||||
|
{
|
||||||
|
{ unsigned-int-vector-rep [ drop ] }
|
||||||
|
[ ^^abs-vector ]
|
||||||
|
{ float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
|
||||||
|
{ int-vector-rep |[ src rep |
|
||||||
|
rep ^^zero-vector :> zero
|
||||||
|
zero src rep ^^sub-vector :> -src
|
||||||
|
zero src rep cc> ^compare-vector :> sign
|
||||||
|
sign -src src rep ^blend-vector
|
||||||
|
] }
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vand ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^and-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vandn ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^andn-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vor ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^or-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vxor ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^xor-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vnot ( node -- )
|
||||||
|
{
|
||||||
|
[ ^not-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vlshift ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^shl-vector ]
|
||||||
|
} {
|
||||||
|
[ ^^shl-vector-imm ]
|
||||||
|
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vrshift ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^shr-vector ]
|
||||||
|
} {
|
||||||
|
[ ^^shr-vector-imm ]
|
||||||
|
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-hlshift ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^horizontal-shl-vector-imm ]
|
||||||
|
} [ integer? ] emit-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-hrshift ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^horizontal-shr-vector-imm ]
|
||||||
|
} [ integer? ] emit-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vshuffle-elements ( node -- )
|
||||||
|
{
|
||||||
|
[ ^shuffle-vector-imm ]
|
||||||
|
} [ shuffle? ] emit-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vshuffle2-elements ( node -- )
|
||||||
|
{
|
||||||
|
[ ^shuffle-2-vectors-imm ]
|
||||||
|
} [ shuffle? ] emit-vvl-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vshuffle-bytes ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^shuffle-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vmerge-head ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^merge-vector-head ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vmerge-tail ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^merge-vector-tail ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v<= ( node -- )
|
||||||
|
{
|
||||||
|
[ cc<= ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
: emit-simd-v< ( node -- )
|
||||||
|
{
|
||||||
|
[ cc< ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
: emit-simd-v= ( node -- )
|
||||||
|
{
|
||||||
|
[ cc= ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
: emit-simd-v> ( node -- )
|
||||||
|
{
|
||||||
|
[ cc> ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
: emit-simd-v>= ( node -- )
|
||||||
|
{
|
||||||
|
[ cc>= ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
: emit-simd-vunordered? ( node -- )
|
||||||
|
{
|
||||||
|
[ cc/<>= ^compare-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vany? ( node -- )
|
||||||
|
{
|
||||||
|
[ vcc-any ^^test-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
: emit-simd-vall? ( node -- )
|
||||||
|
{
|
||||||
|
[ vcc-all ^^test-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
: emit-simd-vnone? ( node -- )
|
||||||
|
{
|
||||||
|
[ vcc-none ^^test-vector ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
: emit-simd-vgetmask ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^move-vector-mask ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v>float ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ drop ] }
|
||||||
|
{ int-vector-rep [ ^^integer>float-vector ] }
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-v>integer ( node -- )
|
||||||
|
{
|
||||||
|
{ float-vector-rep [ ^^float>integer-vector ] }
|
||||||
|
{ int-vector-rep [ drop ] }
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vpack-signed ( node -- )
|
||||||
|
{
|
||||||
|
{ double-2-rep |[ src1 src2 rep |
|
||||||
|
src1 double-2-rep ^^float-pack-vector :> dst-head
|
||||||
|
src2 double-2-rep ^^float-pack-vector :> dst-tail
|
||||||
|
dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
|
||||||
|
] }
|
||||||
|
{ int-vector-rep [ ^^signed-pack-vector ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vpack-unsigned ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^unsigned-pack-vector ]
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vunpack-head ( node -- )
|
||||||
|
{
|
||||||
|
[ ^unpack-vector-head ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-vunpack-tail ( node -- )
|
||||||
|
{
|
||||||
|
[ ^unpack-vector-tail ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-with ( node -- )
|
||||||
|
{
|
||||||
|
{ fixnum-vector-rep [ ^with-vector ] }
|
||||||
|
{ float-vector-rep [ ^with-vector ] }
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-gather-2 ( node -- )
|
||||||
|
{
|
||||||
|
{ fixnum-vector-rep [ ^^gather-int-vector-2 ] }
|
||||||
|
{ fixnum-vector-rep [ ^^gather-vector-2 ] }
|
||||||
|
{ float-vector-rep [ ^^gather-vector-2 ] }
|
||||||
|
} emit-vv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-gather-4 ( node -- )
|
||||||
|
{
|
||||||
|
{ fixnum-vector-rep [ ^^gather-int-vector-4 ] }
|
||||||
|
{ fixnum-vector-rep [ ^^gather-vector-4 ] }
|
||||||
|
{ float-vector-rep [ ^^gather-vector-4 ] }
|
||||||
|
} emit-vvvv-vector-op ;
|
||||||
|
|
||||||
|
: emit-simd-select ( node -- )
|
||||||
|
{
|
||||||
|
{ fixnum-vector-rep [ ^select-vector ] }
|
||||||
|
{ float-vector-rep [ ^select-vector ] }
|
||||||
|
} [ integer? ] emit-vl-vector-op ;
|
||||||
|
|
||||||
|
: emit-alien-vector ( block node -- block' )
|
||||||
|
dup [
|
||||||
|
'[
|
||||||
|
ds-drop prepare-load-memory
|
||||||
|
_ f ^^load-memory-imm ds-push
|
||||||
|
]
|
||||||
|
[ inline-load-memory? ] inline-accessor
|
||||||
|
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||||
|
|
||||||
|
: emit-set-alien-vector ( block node -- block' )
|
||||||
|
dup [
|
||||||
|
'[
|
||||||
|
ds-drop prepare-store-memory
|
||||||
|
_ f ##store-memory-imm,
|
||||||
|
]
|
||||||
|
[ byte-array inline-store-memory? ] inline-accessor
|
||||||
|
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||||
|
|
||||||
|
: enable-simd ( -- )
|
||||||
|
{
|
||||||
|
{ (simd-v+) [ emit-simd-v+ ] }
|
||||||
|
{ (simd-v-) [ emit-simd-v- ] }
|
||||||
|
{ (simd-vneg) [ emit-simd-vneg ] }
|
||||||
|
{ (simd-v+-) [ emit-simd-v+- ] }
|
||||||
|
{ (simd-vs+) [ emit-simd-vs+ ] }
|
||||||
|
{ (simd-vs-) [ emit-simd-vs- ] }
|
||||||
|
{ (simd-vs*) [ emit-simd-vs* ] }
|
||||||
|
{ (simd-v*) [ emit-simd-v* ] }
|
||||||
|
{ (simd-v*high) [ emit-simd-v*high ] }
|
||||||
|
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
|
||||||
|
{ (simd-v/) [ emit-simd-v/ ] }
|
||||||
|
{ (simd-vmin) [ emit-simd-vmin ] }
|
||||||
|
{ (simd-vmax) [ emit-simd-vmax ] }
|
||||||
|
{ (simd-vavg) [ emit-simd-vavg ] }
|
||||||
|
{ (simd-v.) [ emit-simd-v. ] }
|
||||||
|
{ (simd-vsad) [ emit-simd-vsad ] }
|
||||||
|
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
|
||||||
|
{ (simd-sum) [ emit-simd-sum ] }
|
||||||
|
{ (simd-vabs) [ emit-simd-vabs ] }
|
||||||
|
{ (simd-vbitand) [ emit-simd-vand ] }
|
||||||
|
{ (simd-vbitandn) [ emit-simd-vandn ] }
|
||||||
|
{ (simd-vbitor) [ emit-simd-vor ] }
|
||||||
|
{ (simd-vbitxor) [ emit-simd-vxor ] }
|
||||||
|
{ (simd-vbitnot) [ emit-simd-vnot ] }
|
||||||
|
{ (simd-vand) [ emit-simd-vand ] }
|
||||||
|
{ (simd-vandn) [ emit-simd-vandn ] }
|
||||||
|
{ (simd-vor) [ emit-simd-vor ] }
|
||||||
|
{ (simd-vxor) [ emit-simd-vxor ] }
|
||||||
|
{ (simd-vnot) [ emit-simd-vnot ] }
|
||||||
|
{ (simd-vlshift) [ emit-simd-vlshift ] }
|
||||||
|
{ (simd-vrshift) [ emit-simd-vrshift ] }
|
||||||
|
{ (simd-hlshift) [ emit-simd-hlshift ] }
|
||||||
|
{ (simd-hrshift) [ emit-simd-hrshift ] }
|
||||||
|
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
|
||||||
|
{ (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
|
||||||
|
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
|
||||||
|
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
|
||||||
|
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
|
||||||
|
{ (simd-v<=) [ emit-simd-v<= ] }
|
||||||
|
{ (simd-v<) [ emit-simd-v< ] }
|
||||||
|
{ (simd-v=) [ emit-simd-v= ] }
|
||||||
|
{ (simd-v>) [ emit-simd-v> ] }
|
||||||
|
{ (simd-v>=) [ emit-simd-v>= ] }
|
||||||
|
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
|
||||||
|
{ (simd-vany?) [ emit-simd-vany? ] }
|
||||||
|
{ (simd-vall?) [ emit-simd-vall? ] }
|
||||||
|
{ (simd-vnone?) [ emit-simd-vnone? ] }
|
||||||
|
{ (simd-v>float) [ emit-simd-v>float ] }
|
||||||
|
{ (simd-v>integer) [ emit-simd-v>integer ] }
|
||||||
|
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
|
||||||
|
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
|
||||||
|
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
|
||||||
|
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
|
||||||
|
{ (simd-with) [ emit-simd-with ] }
|
||||||
|
{ (simd-gather-2) [ emit-simd-gather-2 ] }
|
||||||
|
{ (simd-gather-4) [ emit-simd-gather-4 ] }
|
||||||
|
{ (simd-select) [ emit-simd-select ] }
|
||||||
|
{ alien-vector [ emit-alien-vector ] }
|
||||||
|
{ set-alien-vector [ emit-set-alien-vector ] }
|
||||||
|
{ assert-positive [ drop ] }
|
||||||
|
{ (simd-vgetmask) [ emit-simd-vgetmask ] }
|
||||||
|
} enable-intrinsics ;
|
||||||
|
|
||||||
|
enable-simd
|
|
@ -0,0 +1,135 @@
|
||||||
|
! Copyright (C) 2009 Slava Pestov.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors byte-arrays combinators compiler.cfg.builder
|
||||||
|
compiler.tree.propagation.info compiler.tree.propagation.nodes
|
||||||
|
continuations cpu.architecture fry kernel layouts math
|
||||||
|
math.intervals math.vectors.simd.intrinsics namespaces sequences
|
||||||
|
words ;
|
||||||
|
IN: compiler.tree.propagation.simd
|
||||||
|
|
||||||
|
CONSTANT: vector>vector-intrinsics
|
||||||
|
{
|
||||||
|
(simd-v+)
|
||||||
|
(simd-v-)
|
||||||
|
(simd-vneg)
|
||||||
|
(simd-v+-)
|
||||||
|
(simd-vs+)
|
||||||
|
(simd-vs-)
|
||||||
|
(simd-vs*)
|
||||||
|
(simd-v*)
|
||||||
|
(simd-v*high)
|
||||||
|
(simd-v*hs+)
|
||||||
|
(simd-v/)
|
||||||
|
(simd-vmin)
|
||||||
|
(simd-vmax)
|
||||||
|
(simd-vavg)
|
||||||
|
(simd-vsqrt)
|
||||||
|
(simd-vabs)
|
||||||
|
(simd-vbitand)
|
||||||
|
(simd-vbitandn)
|
||||||
|
(simd-vbitor)
|
||||||
|
(simd-vbitxor)
|
||||||
|
(simd-vbitnot)
|
||||||
|
(simd-vand)
|
||||||
|
(simd-vandn)
|
||||||
|
(simd-vor)
|
||||||
|
(simd-vxor)
|
||||||
|
(simd-vnot)
|
||||||
|
(simd-vlshift)
|
||||||
|
(simd-vrshift)
|
||||||
|
(simd-hlshift)
|
||||||
|
(simd-hrshift)
|
||||||
|
(simd-vshuffle-elements)
|
||||||
|
(simd-vshuffle2-elements)
|
||||||
|
(simd-vshuffle-bytes)
|
||||||
|
(simd-vmerge-head)
|
||||||
|
(simd-vmerge-tail)
|
||||||
|
(simd-v<=)
|
||||||
|
(simd-v<)
|
||||||
|
(simd-v=)
|
||||||
|
(simd-v>)
|
||||||
|
(simd-v>=)
|
||||||
|
(simd-vunordered?)
|
||||||
|
(simd-v>float)
|
||||||
|
(simd-v>integer)
|
||||||
|
(simd-vpack-signed)
|
||||||
|
(simd-vpack-unsigned)
|
||||||
|
(simd-vunpack-head)
|
||||||
|
(simd-vunpack-tail)
|
||||||
|
(simd-with)
|
||||||
|
(simd-gather-2)
|
||||||
|
(simd-gather-4)
|
||||||
|
alien-vector
|
||||||
|
}
|
||||||
|
|
||||||
|
CONSTANT: vector-other-intrinsics
|
||||||
|
{
|
||||||
|
(simd-v.)
|
||||||
|
(simd-vsad)
|
||||||
|
(simd-sum)
|
||||||
|
(simd-vany?)
|
||||||
|
(simd-vall?)
|
||||||
|
(simd-vnone?)
|
||||||
|
(simd-vgetmask)
|
||||||
|
(simd-select)
|
||||||
|
set-alien-vector
|
||||||
|
}
|
||||||
|
|
||||||
|
: vector-intrinsics ( -- x )
|
||||||
|
vector>vector-intrinsics vector-other-intrinsics append ;
|
||||||
|
|
||||||
|
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||||
|
|
||||||
|
: scalar-output-class ( rep -- class )
|
||||||
|
dup literal?>> [
|
||||||
|
literal>> scalar-rep-of {
|
||||||
|
{ float-rep [ float ] }
|
||||||
|
{ double-rep [ float ] }
|
||||||
|
{ longlong-scalar-rep [ integer ] }
|
||||||
|
{ ulonglong-scalar-rep [ integer ] }
|
||||||
|
{ int-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
|
||||||
|
{ uint-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
|
||||||
|
[ drop fixnum ]
|
||||||
|
} case
|
||||||
|
] [ drop real ] if
|
||||||
|
<class-info> ;
|
||||||
|
|
||||||
|
\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
|
{
|
||||||
|
(simd-vany?)
|
||||||
|
(simd-vall?)
|
||||||
|
(simd-vnone?)
|
||||||
|
} [ { boolean } "default-output-classes" set-word-prop ] each
|
||||||
|
|
||||||
|
\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ assert-positive [
|
||||||
|
real [0,inf] <class/interval-info> value-info-intersect
|
||||||
|
] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop
|
||||||
|
|
||||||
|
: clone-with-value-infos ( node -- node' )
|
||||||
|
clone dup in-d>> extract-value-info >>info ;
|
||||||
|
|
||||||
|
: try-intrinsic ( node intrinsic-quot -- ? )
|
||||||
|
'[
|
||||||
|
_ clone-with-value-infos
|
||||||
|
_ with-dummy-cfg-builder
|
||||||
|
t
|
||||||
|
] [ drop f ] recover ;
|
||||||
|
|
||||||
|
: inline-unless-intrinsic ( word -- )
|
||||||
|
dup '[
|
||||||
|
_ swap over "intrinsic" word-prop
|
||||||
|
"always-inline-simd-intrinsics" get not swap and
|
||||||
|
! word node intrinsic
|
||||||
|
[ try-intrinsic [ drop f ] [ def>> ] if ]
|
||||||
|
[ drop def>> ] if*
|
||||||
|
]
|
||||||
|
"custom-inlining" set-word-prop ;
|
||||||
|
|
||||||
|
vector-intrinsics [ inline-unless-intrinsic ] each
|
Loading…
Reference in New Issue