alien.complex: Fix functor
parent
1ca1a9b6b3
commit
1e9b407037
|
@ -1,12 +1,12 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types alien.complex.functor kernel
|
||||
sequences ;
|
||||
USING: accessors alien alien.c-types alien.complex.functor
|
||||
classes.struct kernel math quotations ;
|
||||
FROM: alien.c-types => float double ;
|
||||
IN: alien.complex
|
||||
|
||||
<<
|
||||
{ "float" "double" } [ dup "complex-" prepend define-complex-type ] each
|
||||
>>
|
||||
COMPLEX-TYPE: float complex-float
|
||||
COMPLEX-TYPE: double complex-double
|
||||
|
||||
<<
|
||||
! This overrides the fact that small structures are never returned
|
||||
|
|
|
@ -3,19 +3,24 @@
|
|||
USING: functors2 ;
|
||||
IN: alien.complex.functor
|
||||
|
||||
FUNCTOR: define-complex-type ( N: name T: name -- ) [[
|
||||
SAME-FUNCTOR: complex-type ( N: existing-word T: name -- ) [[
|
||||
USING: alien alien.c-types classes.struct kernel quotations ;
|
||||
QUALIFIED: math
|
||||
|
||||
STRUCT: ${T}-class { real ${N}-type } { imaginary ${N}-type } ;
|
||||
<<
|
||||
STRUCT: ${T} { real ${N} } { imaginary ${N} } ;
|
||||
|
||||
: <${T}> ( z -- alien )
|
||||
>rect ${T}-class <struct-boa> >c-ptr ;
|
||||
math:>rect ${T} <struct-boa> >c-ptr ;
|
||||
|
||||
: *${T} ( alien -- z )
|
||||
T-class memory>struct [ real>> ] [ imaginary>> ] bi rect> ; inline
|
||||
${T} memory>struct [ real>> ] [ imaginary>> ] bi math:rect> ; inline
|
||||
|
||||
${T}-class lookup-c-type
|
||||
<${T}> 1quotation >>unboxer-quot
|
||||
*${T} 1quotation >>boxer-quot
|
||||
>>
|
||||
|
||||
\ ${T} lookup-c-type
|
||||
[ <${T}> ] >>unboxer-quot
|
||||
[ *${T} ] >>boxer-quot
|
||||
complex >>boxed-class
|
||||
drop
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,163 @@
|
|||
! Copyright (C) 2009 Joe Groff.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays classes combinators
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.stacks compiler.cfg.stacks.local
|
||||
compiler.tree.propagation.info cpu.architecture fry
|
||||
generalizations kernel locals macros make math quotations
|
||||
sequences sequences.generalizations ;
|
||||
IN: compiler.cfg.intrinsics.simd.backend
|
||||
|
||||
! Selection of implementation based on available CPU instructions
|
||||
|
||||
GENERIC: insn-available? ( ## -- reps )
|
||||
|
||||
M: object insn-available? drop t ;
|
||||
|
||||
M: ##zero-vector insn-available? rep>> %zero-vector-reps member? ;
|
||||
M: ##fill-vector insn-available? rep>> %fill-vector-reps member? ;
|
||||
M: ##gather-vector-2 insn-available? rep>> %gather-vector-2-reps member? ;
|
||||
M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ;
|
||||
M: ##gather-int-vector-2 insn-available? rep>> %gather-int-vector-2-reps member? ;
|
||||
M: ##gather-int-vector-4 insn-available? rep>> %gather-int-vector-4-reps member? ;
|
||||
M: ##select-vector insn-available? rep>> %select-vector-reps member? ;
|
||||
M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ;
|
||||
M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ;
|
||||
M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ;
|
||||
M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ;
|
||||
M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ;
|
||||
M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ;
|
||||
M: ##float-pack-vector insn-available? rep>> %float-pack-vector-reps member? ;
|
||||
M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ;
|
||||
M: ##unsigned-pack-vector insn-available? rep>> %unsigned-pack-vector-reps member? ;
|
||||
M: ##unpack-vector-head insn-available? rep>> %unpack-vector-head-reps member? ;
|
||||
M: ##unpack-vector-tail insn-available? rep>> %unpack-vector-tail-reps member? ;
|
||||
M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ;
|
||||
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
|
||||
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
|
||||
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
|
||||
M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ;
|
||||
M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
|
||||
M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
|
||||
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;
|
||||
M: ##add-sub-vector insn-available? rep>> %add-sub-vector-reps member? ;
|
||||
M: ##sub-vector insn-available? rep>> %sub-vector-reps member? ;
|
||||
M: ##saturated-sub-vector insn-available? rep>> %saturated-sub-vector-reps member? ;
|
||||
M: ##mul-vector insn-available? rep>> %mul-vector-reps member? ;
|
||||
M: ##mul-high-vector insn-available? rep>> %mul-high-vector-reps member? ;
|
||||
M: ##mul-horizontal-add-vector insn-available? rep>> %mul-horizontal-add-vector-reps member? ;
|
||||
M: ##saturated-mul-vector insn-available? rep>> %saturated-mul-vector-reps member? ;
|
||||
M: ##div-vector insn-available? rep>> %div-vector-reps member? ;
|
||||
M: ##min-vector insn-available? rep>> %min-vector-reps member? ;
|
||||
M: ##max-vector insn-available? rep>> %max-vector-reps member? ;
|
||||
M: ##avg-vector insn-available? rep>> %avg-vector-reps member? ;
|
||||
M: ##dot-vector insn-available? rep>> %dot-vector-reps member? ;
|
||||
M: ##sad-vector insn-available? rep>> %sad-vector-reps member? ;
|
||||
M: ##sqrt-vector insn-available? rep>> %sqrt-vector-reps member? ;
|
||||
M: ##horizontal-add-vector insn-available? rep>> %horizontal-add-vector-reps member? ;
|
||||
M: ##horizontal-sub-vector insn-available? rep>> %horizontal-sub-vector-reps member? ;
|
||||
M: ##abs-vector insn-available? rep>> %abs-vector-reps member? ;
|
||||
M: ##and-vector insn-available? rep>> %and-vector-reps member? ;
|
||||
M: ##andn-vector insn-available? rep>> %andn-vector-reps member? ;
|
||||
M: ##or-vector insn-available? rep>> %or-vector-reps member? ;
|
||||
M: ##xor-vector insn-available? rep>> %xor-vector-reps member? ;
|
||||
M: ##not-vector insn-available? rep>> %not-vector-reps member? ;
|
||||
M: ##shl-vector insn-available? rep>> %shl-vector-reps member? ;
|
||||
M: ##shr-vector insn-available? rep>> %shr-vector-reps member? ;
|
||||
M: ##shl-vector-imm insn-available? rep>> %shl-vector-imm-reps member? ;
|
||||
M: ##shr-vector-imm insn-available? rep>> %shr-vector-imm-reps member? ;
|
||||
M: ##horizontal-shl-vector-imm insn-available? rep>> %horizontal-shl-vector-imm-reps member? ;
|
||||
M: ##horizontal-shr-vector-imm insn-available? rep>> %horizontal-shr-vector-imm-reps member? ;
|
||||
|
||||
: [vector-op-checked] ( #dup quot -- quot )
|
||||
'[ _ ndup [ @ ] { } make dup [ insn-available? ] all? ] ;
|
||||
|
||||
GENERIC#: >vector-op-cond 2 ( quot #pick #dup -- quotpair )
|
||||
M:: callable >vector-op-cond ( quot #pick #dup -- quotpair )
|
||||
#dup quot [vector-op-checked] '[ 2drop @ ]
|
||||
#dup '[ % _ nnip ]
|
||||
2array ;
|
||||
|
||||
M:: pair >vector-op-cond ( pair #pick #dup -- quotpair )
|
||||
pair first2 :> ( class quot )
|
||||
#pick class #dup quot [vector-op-checked]
|
||||
'[ 2drop _ npick _ instance? _ [ f f f ] if ]
|
||||
#dup '[ % _ nnip ]
|
||||
2array ;
|
||||
|
||||
MACRO: v-vector-op ( trials -- quot )
|
||||
[ 1 2 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
MACRO: vl-vector-op ( trials -- quot )
|
||||
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
MACRO: vvl-vector-op ( trials -- quot )
|
||||
[ 1 4 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
MACRO: vv-vector-op ( trials -- quot )
|
||||
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
MACRO: vv-cc-vector-op ( trials -- quot )
|
||||
[ 2 4 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
MACRO: vvvv-vector-op ( trials -- quot )
|
||||
[ 1 5 >vector-op-cond ] map '[ f f _ cond ] ;
|
||||
|
||||
! Intrinsic code emission
|
||||
|
||||
MACRO: check-elements ( quots -- quot )
|
||||
[ length '[ _ firstn ] ]
|
||||
[ '[ _ spread ] ]
|
||||
[ length 1 - \ and <repetition> [ ] like ]
|
||||
tri 3append ;
|
||||
|
||||
ERROR: bad-simd-intrinsic node ;
|
||||
|
||||
MACRO: if-literals-match ( quots -- quot )
|
||||
[ length ] [ ] [ length ] tri
|
||||
! n quots n
|
||||
'[
|
||||
! node quot
|
||||
[
|
||||
dup node-input-infos
|
||||
_ tail-slice* [ literal>> ] map
|
||||
dup _ check-elements
|
||||
] dip
|
||||
swap [
|
||||
! node literals quot
|
||||
[ _ firstn ] dip call
|
||||
drop
|
||||
] [ 2drop bad-simd-intrinsic ] if
|
||||
] ;
|
||||
|
||||
CONSTANT: unary [ ds-drop ds-pop ]
|
||||
CONSTANT: unary/param [ [ -2 <ds-loc> inc-stack ds-pop ] dip ]
|
||||
CONSTANT: binary [ ds-drop 2inputs ]
|
||||
CONSTANT: binary/param [ [ -2 <ds-loc> inc-stack 2inputs ] dip ]
|
||||
CONSTANT: quaternary
|
||||
[
|
||||
ds-drop
|
||||
d: 3 peek-loc
|
||||
d: 2 peek-loc
|
||||
d: 1 peek-loc
|
||||
d: 0 peek-loc
|
||||
-4 <ds-loc> inc-stack
|
||||
]
|
||||
|
||||
:: emit-vector-op ( trials params-quot op-quot literal-preds -- quot )
|
||||
params-quot trials op-quot literal-preds
|
||||
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
|
||||
|
||||
MACRO: emit-v-vector-op ( trials -- quot )
|
||||
unary [ v-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||
MACRO: emit-vl-vector-op ( trials literal-pred -- quot )
|
||||
[ unary/param [ vl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
|
||||
MACRO: emit-vv-vector-op ( trials -- quot )
|
||||
binary [ vv-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||
MACRO: emit-vvl-vector-op ( trials literal-pred -- quot )
|
||||
[ binary/param [ vvl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
|
||||
MACRO: emit-vvvv-vector-op ( trials -- quot )
|
||||
quaternary [ vvvv-vector-op ] { [ representation? ] } emit-vector-op ;
|
||||
|
||||
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- quot )
|
||||
literal-pred imm-trials literal-pred var-trials
|
||||
'[
|
||||
dup node-input-infos 2 tail-slice* first literal>> @
|
||||
[ _ _ emit-vl-vector-op ]
|
||||
[ _ emit-vv-vector-op ] if
|
||||
] ;
|
|
@ -0,0 +1,8 @@
|
|||
USING: compiler.cfg.instructions compiler.tree help.markup help.syntax
|
||||
math.vectors ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
HELP: emit-simd-v+
|
||||
{ $values { "node" node } }
|
||||
{ $description "Emits instructions for SIMD vector addition." }
|
||||
{ $see-also ##add-vector v+ } ;
|
|
@ -0,0 +1,630 @@
|
|||
! Copyright (C) 2009 Joe Groff.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs biassocs byte-arrays classes
|
||||
compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
|
||||
compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
|
||||
compiler.cfg.stacks.local compiler.test compiler.tree
|
||||
compiler.tree.propagation.info cpu.architecture fry kernel locals make
|
||||
namespaces sequences system tools.test words ;
|
||||
IN: compiler.cfg.intrinsics.simd.tests
|
||||
|
||||
:: test-node ( rep -- node )
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info-state { class byte-array } } }
|
||||
{ 2 T{ value-info-state { class byte-array } } }
|
||||
{ 3 T{ value-info-state { class byte-array } } }
|
||||
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
|
||||
{ 5 T{ value-info-state { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
:: test-node-literal ( lit rep -- node )
|
||||
lit class-of :> lit-class
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info-state { class byte-array } } }
|
||||
{ 2 T{ value-info-state { class byte-array } } }
|
||||
{ 3 T{ value-info-state { class lit-class } { literal? t } { literal lit } } }
|
||||
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
|
||||
{ 5 T{ value-info-state { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
: test-node-nonliteral-rep ( -- node )
|
||||
T{ #call
|
||||
{ in-d { 1 2 3 4 } }
|
||||
{ out-d { 5 } }
|
||||
{ info H{
|
||||
{ 1 T{ value-info-state { class byte-array } } }
|
||||
{ 2 T{ value-info-state { class byte-array } } }
|
||||
{ 3 T{ value-info-state { class byte-array } } }
|
||||
{ 4 T{ value-info-state { class object } } }
|
||||
{ 5 T{ value-info-state { class byte-array } } }
|
||||
} }
|
||||
} ;
|
||||
|
||||
: test-compiler-env ( -- x )
|
||||
H{ } clone
|
||||
T{ basic-block } 0 0 0 0 height-state boa >>height
|
||||
\ basic-block pick set-at
|
||||
|
||||
0 0 0 0 height-state boa \ height-state pick set-at
|
||||
HS{ } clone \ local-peek-set pick set-at
|
||||
H{ } clone \ replaces pick set-at
|
||||
H{ } <biassoc> \ locs>vregs pick set-at ;
|
||||
|
||||
: make-classes ( quot -- seq )
|
||||
{ } make [ class-of ] map ; inline
|
||||
|
||||
: test-emit ( cpu rep quot -- node )
|
||||
[
|
||||
[ new \ cpu ] 2dip '[
|
||||
test-compiler-env [ _ test-node @ ] with-variables
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
: test-emit-literal ( cpu lit rep quot -- node )
|
||||
[
|
||||
[ new \ cpu ] 3dip '[
|
||||
test-compiler-env [ _ _ test-node-literal @ ] with-variables
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
: test-emit-nonliteral-rep ( cpu quot -- node )
|
||||
[
|
||||
[ new \ cpu ] dip '[
|
||||
test-compiler-env [ test-node-nonliteral-rep @ ] with-variables
|
||||
] with-variable
|
||||
] make-classes ; inline
|
||||
|
||||
CONSTANT: signed-reps
|
||||
{ char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
|
||||
CONSTANT: all-reps
|
||||
{
|
||||
char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
|
||||
uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
|
||||
}
|
||||
|
||||
TUPLE: scalar-cpu ;
|
||||
|
||||
TUPLE: simple-ops-cpu ;
|
||||
M: simple-ops-cpu %zero-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %fill-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %add-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %sub-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %mul-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %div-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %andn-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %and-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %or-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %xor-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %merge-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %move-vector-mask-reps all-reps ;
|
||||
M: simple-ops-cpu %test-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
|
||||
M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
|
||||
M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
|
||||
M: simple-ops-cpu %alien-vector-reps all-reps ;
|
||||
|
||||
! v+
|
||||
{ { ##add-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v-
|
||||
{ { ##sub-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vneg
|
||||
{ { ##load-reference ##sub-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##sub-vector } }
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v*
|
||||
{ { ##mul-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v/
|
||||
{ { ##div-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: addsub-cpu < simple-ops-cpu ;
|
||||
M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
|
||||
|
||||
! v+-
|
||||
{ { ##add-sub-vector } }
|
||||
[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##load-reference ##xor-vector ##add-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##load-reference ##xor-vector ##sub-vector ##add-vector } }
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: saturating-cpu < simple-ops-cpu ;
|
||||
M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
|
||||
M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
|
||||
M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
|
||||
|
||||
! vs+
|
||||
{ { ##add-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##add-vector } }
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##saturated-add-vector } }
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vs-
|
||||
{ { ##sub-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##sub-vector } }
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##saturated-sub-vector } }
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vs*
|
||||
{ { ##mul-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##mul-vector } }
|
||||
[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##saturated-mul-vector } }
|
||||
[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: minmax-cpu < simple-ops-cpu ;
|
||||
M: minmax-cpu %min-vector-reps signed-reps ;
|
||||
M: minmax-cpu %max-vector-reps signed-reps ;
|
||||
M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
|
||||
M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||
|
||||
TUPLE: compare-cpu < simple-ops-cpu ;
|
||||
M: compare-cpu %compare-vector-reps drop signed-reps ;
|
||||
M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
|
||||
|
||||
! vmin
|
||||
{ { ##min-vector } }
|
||||
[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||
[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmax
|
||||
{ { ##max-vector } }
|
||||
[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||
[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: dot-cpu < simple-ops-cpu ;
|
||||
M: dot-cpu %dot-vector-reps { float-4-rep } ;
|
||||
|
||||
TUPLE: horizontal-cpu < simple-ops-cpu ;
|
||||
M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
|
||||
M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
|
||||
M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
|
||||
|
||||
! v.
|
||||
{ { ##dot-vector } }
|
||||
[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
|
||||
[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ {
|
||||
##mul-vector
|
||||
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||
##merge-vector-head ##merge-vector-tail ##add-vector
|
||||
##vector>scalar
|
||||
} }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vsqrt
|
||||
{ { ##sqrt-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! sum
|
||||
{ { ##horizontal-add-vector ##vector>scalar } }
|
||||
[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
|
||||
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ {
|
||||
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||
##horizontal-add-vector ##horizontal-add-vector
|
||||
##vector>scalar
|
||||
} }
|
||||
[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ {
|
||||
##unpack-vector-head ##unpack-vector-tail ##add-vector
|
||||
##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
|
||||
##vector>scalar
|
||||
} }
|
||||
[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: abs-cpu < simple-ops-cpu ;
|
||||
M: abs-cpu %abs-vector-reps signed-reps ;
|
||||
|
||||
! vabs
|
||||
{ { } }
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##abs-vector } }
|
||||
[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##load-reference ##andn-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } }
|
||||
[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vand
|
||||
{ { ##and-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vandn
|
||||
{ { ##andn-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vor
|
||||
{ { ##or-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vxor
|
||||
{ { ##xor-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: not-cpu < simple-ops-cpu ;
|
||||
M: not-cpu %not-vector-reps signed-reps ;
|
||||
|
||||
! vnot
|
||||
{ { ##not-vector } }
|
||||
[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##fill-vector ##xor-vector } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: shift-cpu < simple-ops-cpu ;
|
||||
M: shift-cpu %shl-vector-reps signed-reps ;
|
||||
M: shift-cpu %shr-vector-reps signed-reps ;
|
||||
|
||||
TUPLE: shift-imm-cpu < simple-ops-cpu ;
|
||||
M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
|
||||
M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
|
||||
|
||||
TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
|
||||
M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
|
||||
M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
|
||||
|
||||
! vlshift
|
||||
{ { ##shl-vector-imm } }
|
||||
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
{ { ##shl-vector } }
|
||||
[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vrshift
|
||||
{ { ##shr-vector-imm } }
|
||||
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
{ { ##shr-vector } }
|
||||
[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! hlshift
|
||||
{ { ##horizontal-shl-vector-imm } }
|
||||
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! hrshift
|
||||
{ { ##horizontal-shr-vector-imm } }
|
||||
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
|
||||
M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
|
||||
|
||||
TUPLE: shuffle-cpu < simple-ops-cpu ;
|
||||
M: shuffle-cpu %shuffle-vector-reps signed-reps ;
|
||||
|
||||
! vshuffle-elements
|
||||
{ { ##load-reference ##shuffle-vector } }
|
||||
[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
{ { ##shuffle-vector-imm } }
|
||||
[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! vshuffle-bytes
|
||||
{ { ##shuffle-vector } }
|
||||
[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmerge-head
|
||||
{ { ##merge-vector-head } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vmerge-tail
|
||||
{ { ##merge-vector-tail } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v<= etc.
|
||||
{ { ##compare-vector } }
|
||||
[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##min-vector ##compare-vector } }
|
||||
[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##load-reference ##xor-vector ##xor-vector ##compare-vector } }
|
||||
[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vany? etc.
|
||||
{ { ##test-vector } }
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: convert-cpu < simple-ops-cpu ;
|
||||
M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
|
||||
M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
|
||||
|
||||
! v>float
|
||||
{ { } }
|
||||
[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##integer>float-vector } }
|
||||
[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! v>integer
|
||||
{ { } }
|
||||
[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##float>integer-vector } }
|
||||
[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vpack-signed
|
||||
{ { ##signed-pack-vector } }
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vpack-unsigned
|
||||
{ { ##unsigned-pack-vector } }
|
||||
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
|
||||
unit-test
|
||||
|
||||
TUPLE: unpack-head-cpu < simple-ops-cpu ;
|
||||
M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
|
||||
TUPLE: unpack-cpu < unpack-head-cpu ;
|
||||
M: unpack-cpu %unpack-vector-tail-reps all-reps ;
|
||||
|
||||
! vunpack-head
|
||||
{ { ##unpack-vector-head } }
|
||||
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##merge-vector-head } }
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##merge-vector-head ##shr-vector-imm } }
|
||||
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##compare-vector ##merge-vector-head } }
|
||||
[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! vunpack-tail
|
||||
{ { ##unpack-vector-tail } }
|
||||
[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##tail>head-vector ##unpack-vector-head } }
|
||||
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##merge-vector-tail } }
|
||||
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##merge-vector-tail ##shr-vector-imm } }
|
||||
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
{ { ##zero-vector ##compare-vector ##merge-vector-tail } }
|
||||
[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! with
|
||||
{ { ##scalar>vector ##shuffle-vector-imm } }
|
||||
[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! gather-2
|
||||
{ { ##gather-vector-2 } }
|
||||
[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! gather-4
|
||||
{ { ##gather-vector-4 } }
|
||||
[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ]
|
||||
unit-test
|
||||
|
||||
! select
|
||||
{ { ##shuffle-vector-imm ##vector>scalar } }
|
||||
[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ]
|
||||
unit-test
|
||||
|
||||
! ^load-neg-zero-vector
|
||||
{
|
||||
V{
|
||||
T{ ##load-reference
|
||||
{ dst 1 }
|
||||
{ obj B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 } }
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 2 }
|
||||
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 } }
|
||||
}
|
||||
}
|
||||
} [
|
||||
[
|
||||
{ float-4-rep double-2-rep } [ ^load-neg-zero-vector drop ] each
|
||||
] V{ } make
|
||||
] cfg-unit-test
|
||||
|
||||
! ^load-add-sub-vector
|
||||
{
|
||||
V{
|
||||
T{ ##load-reference
|
||||
{ dst 1 }
|
||||
{ obj B{ 0 0 0 128 0 0 0 0 0 0 0 128 0 0 0 0 } }
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 2 }
|
||||
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 0 } }
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 3 }
|
||||
{ obj
|
||||
B{ 255 0 255 0 255 0 255 0 255 0 255 0 255 0 255 0 }
|
||||
}
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 4 }
|
||||
{ obj
|
||||
B{ 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 }
|
||||
}
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 5 }
|
||||
{ obj
|
||||
B{ 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 }
|
||||
}
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 6 }
|
||||
{ obj
|
||||
B{ 255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
} [
|
||||
[
|
||||
{
|
||||
float-4-rep
|
||||
double-2-rep
|
||||
char-16-rep
|
||||
short-8-rep
|
||||
int-4-rep
|
||||
longlong-2-rep
|
||||
} [ ^load-add-sub-vector drop ] each
|
||||
] V{ } make
|
||||
] cfg-unit-test
|
||||
|
||||
! ^load-half-vector
|
||||
{
|
||||
V{
|
||||
T{ ##load-reference
|
||||
{ dst 1 }
|
||||
{ obj B{ 0 0 0 63 0 0 0 63 0 0 0 63 0 0 0 63 } }
|
||||
}
|
||||
T{ ##load-reference
|
||||
{ dst 2 }
|
||||
{ obj B{ 0 0 0 0 0 0 224 63 0 0 0 0 0 0 224 63 } }
|
||||
}
|
||||
}
|
||||
} [
|
||||
[
|
||||
{ float-4-rep double-2-rep } [ ^load-half-vector drop ] each
|
||||
] V{ } make
|
||||
] cfg-unit-test
|
||||
|
||||
! sign-bit-mask
|
||||
{
|
||||
{
|
||||
B{ 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 }
|
||||
B{ 0 128 0 128 0 128 0 128 0 128 0 128 0 128 0 128 }
|
||||
B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 }
|
||||
B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 }
|
||||
}
|
||||
} [
|
||||
{ char-16-rep short-8-rep int-4-rep longlong-2-rep } [ sign-bit-mask ] map
|
||||
] unit-test
|
||||
|
||||
|
||||
! test with nonliteral/invalid reps
|
||||
[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
||||
|
||||
[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
||||
|
||||
[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
|
||||
[ bad-simd-intrinsic? ] must-fail-with
|
|
@ -0,0 +1,719 @@
|
|||
! Copyright (C) 2009 Slava Pestov, Joe Groff.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types arrays assocs byte-arrays combinators
|
||||
combinators.short-circuit compiler.cfg.comparisons
|
||||
compiler.cfg.hats compiler.cfg.instructions
|
||||
compiler.cfg.intrinsics compiler.cfg.intrinsics.alien
|
||||
compiler.cfg.intrinsics.simd.backend compiler.cfg.stacks
|
||||
cpu.architecture fry kernel layouts locals math math.vectors
|
||||
math.vectors.simd.intrinsics sequences specialized-arrays ;
|
||||
FROM: alien.c-types => heap-size char short int longlong float double ;
|
||||
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
! compound vector ops
|
||||
|
||||
CONSTANT: rep>bit-mask {
|
||||
{
|
||||
char-16-rep uchar-array{
|
||||
0x80 0x80 0x80 0x80
|
||||
0x80 0x80 0x80 0x80
|
||||
0x80 0x80 0x80 0x80
|
||||
0x80 0x80 0x80 0x80
|
||||
}
|
||||
}
|
||||
{
|
||||
short-8-rep ushort-array{
|
||||
0x8000 0x8000 0x8000 0x8000
|
||||
0x8000 0x8000 0x8000 0x8000
|
||||
}
|
||||
}
|
||||
{
|
||||
int-4-rep uint-array{
|
||||
0x8000,0000 0x8000,0000
|
||||
0x8000,0000 0x8000,0000
|
||||
}
|
||||
}
|
||||
{
|
||||
longlong-2-rep ulonglong-array{
|
||||
0x8000,0000,0000,0000
|
||||
0x8000,0000,0000,0000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
: sign-bit-mask ( rep -- byte-array )
|
||||
signed-rep rep>bit-mask at underlying>> ;
|
||||
|
||||
CONSTANT: rep>neg-zero {
|
||||
{ float-4-rep float-array{ -0.0 -0.0 -0.0 -0.0 } }
|
||||
{ double-2-rep double-array{ -0.0 -0.0 } }
|
||||
}
|
||||
|
||||
: ^load-neg-zero-vector ( rep -- dst )
|
||||
rep>neg-zero at underlying>> ^^load-literal ;
|
||||
|
||||
CONSTANT: rep>add-sub {
|
||||
{ float-4-rep float-array{ -0.0 0.0 -0.0 0.0 } }
|
||||
{ double-2-rep double-array{ -0.0 0.0 } }
|
||||
{ char-16-rep char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } }
|
||||
{ short-8-rep short-array{ -1 0 -1 0 -1 0 -1 0 } }
|
||||
{ int-4-rep int-array{ -1 0 -1 0 } }
|
||||
{ longlong-2-rep longlong-array{ -1 0 } }
|
||||
}
|
||||
|
||||
: ^load-add-sub-vector ( rep -- dst )
|
||||
signed-rep rep>add-sub at underlying>> ^^load-literal ;
|
||||
|
||||
CONSTANT: rep>half {
|
||||
{ float-4-rep float-array{ 0.5 0.5 0.5 0.5 } }
|
||||
{ double-2-rep double-array{ 0.5 0.5 } }
|
||||
}
|
||||
|
||||
: ^load-half-vector ( rep -- dst )
|
||||
rep>half at underlying>> ^^load-literal ;
|
||||
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
[ <iota> >byte-array ] bi
|
||||
'[ _ n*v _ v+ ] map concat ;
|
||||
|
||||
: ^load-immediate-shuffle ( shuffle rep -- dst )
|
||||
>variable-shuffle ^^load-literal ;
|
||||
|
||||
:: ^blend-vector ( mask true false rep -- dst )
|
||||
true mask rep ^^and-vector
|
||||
mask false rep ^^andn-vector
|
||||
rep ^^or-vector ;
|
||||
|
||||
: ^not-vector ( src rep -- dst )
|
||||
{
|
||||
[ ^^not-vector ]
|
||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
|
||||
} v-vector-op ;
|
||||
|
||||
:: ^swap-compare-vector ( src1 src2 rep {cc,swap} -- dst )
|
||||
{cc,swap} first2 :> ( cc swap? )
|
||||
swap?
|
||||
[ src2 src1 rep cc ^^compare-vector ]
|
||||
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||
|
||||
:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
|
||||
rep orig-cc %compare-vector-ccs :> ( ccs not? )
|
||||
|
||||
ccs empty?
|
||||
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||
[
|
||||
ccs unclip :> ( rest-ccs first-cc )
|
||||
src1 src2 rep first-cc ^swap-compare-vector :> first-dst
|
||||
|
||||
rest-ccs first-dst
|
||||
[ [ src1 src2 rep ] dip ^swap-compare-vector rep ^^or-vector ]
|
||||
reduce
|
||||
|
||||
not? [ rep ^not-vector ] when
|
||||
] if ;
|
||||
|
||||
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
|
||||
cc order-cc {
|
||||
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
|
||||
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
|
||||
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
|
||||
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
|
||||
} case ;
|
||||
|
||||
: ^compare-vector ( src1 src2 rep cc -- dst )
|
||||
{
|
||||
[ ^(compare-vector) ]
|
||||
[ ^minmax-compare-vector ]
|
||||
{ unsigned-int-vector-rep |[ src1 src2 rep cc |
|
||||
rep sign-bit-mask ^^load-literal :> sign-bits
|
||||
src1 sign-bits rep ^^xor-vector
|
||||
src2 sign-bits rep ^^xor-vector
|
||||
rep signed-rep cc ^(compare-vector)
|
||||
] }
|
||||
} vv-cc-vector-op ;
|
||||
|
||||
: ^unpack-vector-head ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-head ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
|
||||
{ signed-int-vector-rep |[ src rep |
|
||||
src src rep ^^merge-vector-head :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep |[ src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-head
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: ^unpack-vector-tail ( src rep -- dst )
|
||||
{
|
||||
[ ^^unpack-vector-tail ]
|
||||
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
|
||||
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
|
||||
{ signed-int-vector-rep |[ src rep |
|
||||
src src rep ^^merge-vector-tail :> merged
|
||||
rep rep-component-type heap-size 8 * :> bits
|
||||
merged bits rep widen-vector-rep ^^shr-vector-imm
|
||||
] }
|
||||
{ signed-int-vector-rep |[ src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
src sign rep ^^merge-vector-tail
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
PREDICATE: fixnum-vector-rep < int-vector-rep
|
||||
rep-component-type heap-size cell < ;
|
||||
|
||||
: ^(sum-vector-2) ( src rep -- dst )
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
|[ src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-4) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] bi
|
||||
]
|
||||
|[ src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-8) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ] tri
|
||||
]
|
||||
|[ src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-vector-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector-16) ( src rep -- dst )
|
||||
{
|
||||
[
|
||||
{
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
[ dupd ^^horizontal-add-vector ]
|
||||
} cleave
|
||||
]
|
||||
|[ src rep |
|
||||
src src rep ^^merge-vector-head :> head
|
||||
src src rep ^^merge-vector-tail :> tail
|
||||
head tail rep ^^add-vector :> src'
|
||||
|
||||
rep widen-vector-rep :> rep'
|
||||
src' src' rep' ^^merge-vector-head :> head'
|
||||
src' src' rep' ^^merge-vector-tail :> tail'
|
||||
head' tail' rep ^^add-vector :> src''
|
||||
|
||||
rep' widen-vector-rep :> rep''
|
||||
src'' src'' rep'' ^^merge-vector-head :> head''
|
||||
src'' src'' rep'' ^^merge-vector-tail :> tail''
|
||||
head'' tail'' rep ^^add-vector :> src'''
|
||||
|
||||
rep'' widen-vector-rep :> rep'''
|
||||
src''' src''' rep''' ^^merge-vector-head :> head'''
|
||||
src''' src''' rep''' ^^merge-vector-tail :> tail'''
|
||||
head''' tail''' rep ^^add-vector
|
||||
]
|
||||
} v-vector-op ;
|
||||
|
||||
: ^(sum-vector) ( src rep -- dst )
|
||||
[
|
||||
dup rep-length {
|
||||
{ 2 [ ^(sum-vector-2) ] }
|
||||
{ 4 [ ^(sum-vector-4) ] }
|
||||
{ 8 [ ^(sum-vector-8) ] }
|
||||
{ 16 [ ^(sum-vector-16) ] }
|
||||
} case
|
||||
] [ ^^vector>scalar ] bi ;
|
||||
|
||||
: ^sum-vector ( src rep -- dst )
|
||||
{
|
||||
{ float-vector-rep [ ^(sum-vector) ] }
|
||||
{ fixnum-vector-rep |[ src rep |
|
||||
src rep ^unpack-vector-head :> head
|
||||
src rep ^unpack-vector-tail :> tail
|
||||
rep widen-vector-rep :> wide-rep
|
||||
head tail wide-rep ^^add-vector wide-rep
|
||||
^(sum-vector)
|
||||
] }
|
||||
} v-vector-op ;
|
||||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
|
||||
[ rep-length 0 pad-tail ] keep {
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
|
||||
} vl-vector-op ;
|
||||
|
||||
: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
|
||||
[ rep-length 0 pad-tail ] keep {
|
||||
{ double-2-rep |[ src1 src2 shuffle rep |
|
||||
shuffle first2 [ 4 mod ] bi@ :> ( i j )
|
||||
{
|
||||
{ [ i j [ 2 < ] both? ] [
|
||||
src1 shuffle rep ^shuffle-vector-imm
|
||||
] }
|
||||
{ [ i j [ 2 >= ] both? ] [
|
||||
src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
|
||||
] }
|
||||
{ [ i 2 < ] [
|
||||
src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
|
||||
] }
|
||||
! [ j 2 < ]
|
||||
[ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
|
||||
} cond
|
||||
] }
|
||||
} vvl-vector-op ;
|
||||
|
||||
: ^broadcast-vector ( src n rep -- dst )
|
||||
[ rep-length swap <array> ] keep
|
||||
^shuffle-vector-imm ;
|
||||
|
||||
: ^with-vector ( src rep -- dst )
|
||||
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
|
||||
|
||||
: ^select-vector ( src n rep -- dst )
|
||||
{
|
||||
[ ^^select-vector ]
|
||||
[ [ ^broadcast-vector ] keep ^^vector>scalar ]
|
||||
} vl-vector-op ;
|
||||
|
||||
! intrinsic emitters
|
||||
|
||||
: emit-simd-v+ ( node -- )
|
||||
{
|
||||
[ ^^add-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v- ( node -- )
|
||||
{
|
||||
[ ^^sub-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vneg ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||
{ int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v+- ( node -- )
|
||||
{
|
||||
[ ^^add-sub-vector ]
|
||||
{ float-vector-rep |[ src1 src2 rep |
|
||||
rep ^load-add-sub-vector :> signs
|
||||
src2 signs rep ^^xor-vector :> src2'
|
||||
src1 src2' rep ^^add-vector
|
||||
] }
|
||||
{ int-vector-rep |[ src1 src2 rep |
|
||||
rep ^load-add-sub-vector :> signs
|
||||
src2 signs rep ^^xor-vector :> src2'
|
||||
src2' signs rep ^^sub-vector :> src2''
|
||||
src1 src2'' rep ^^add-vector
|
||||
] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs+ ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^add-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-add-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs- ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^sub-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-sub-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vs* ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^mul-vector ] }
|
||||
{ int-vector-rep [ ^^saturated-mul-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v* ( node -- )
|
||||
{
|
||||
[ ^^mul-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v*high ( node -- )
|
||||
{
|
||||
[ ^^mul-high-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v*hs+ ( node -- )
|
||||
{
|
||||
[ ^^mul-horizontal-add-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v/ ( node -- )
|
||||
{
|
||||
[ ^^div-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmin ( node -- )
|
||||
{
|
||||
[ ^^min-vector ]
|
||||
[
|
||||
[ cc< ^compare-vector ]
|
||||
[ ^blend-vector ] 3bi
|
||||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmax ( node -- )
|
||||
{
|
||||
[ ^^max-vector ]
|
||||
[
|
||||
[ cc> ^compare-vector ]
|
||||
[ ^blend-vector ] 3bi
|
||||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vavg ( node -- )
|
||||
{
|
||||
[ ^^avg-vector ]
|
||||
{ float-vector-rep |[ src1 src2 rep |
|
||||
src1 src2 rep ^^add-vector
|
||||
rep ^load-half-vector rep ^^mul-vector
|
||||
] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v. ( node -- )
|
||||
{
|
||||
[ ^^dot-vector ]
|
||||
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsad ( node -- )
|
||||
{
|
||||
[
|
||||
[ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
|
||||
[ widen-vector-rep ^^vector>scalar ] bi
|
||||
]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vsqrt ( node -- )
|
||||
{
|
||||
[ ^^sqrt-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-sum ( node -- )
|
||||
{
|
||||
[ ^sum-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vabs ( node -- )
|
||||
{
|
||||
{ unsigned-int-vector-rep [ drop ] }
|
||||
[ ^^abs-vector ]
|
||||
{ float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
|
||||
{ int-vector-rep |[ src rep |
|
||||
rep ^^zero-vector :> zero
|
||||
zero src rep ^^sub-vector :> -src
|
||||
zero src rep cc> ^compare-vector :> sign
|
||||
sign -src src rep ^blend-vector
|
||||
] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vand ( node -- )
|
||||
{
|
||||
[ ^^and-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vandn ( node -- )
|
||||
{
|
||||
[ ^^andn-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vor ( node -- )
|
||||
{
|
||||
[ ^^or-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vxor ( node -- )
|
||||
{
|
||||
[ ^^xor-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vnot ( node -- )
|
||||
{
|
||||
[ ^not-vector ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vlshift ( node -- )
|
||||
{
|
||||
[ ^^shl-vector ]
|
||||
} {
|
||||
[ ^^shl-vector-imm ]
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-vrshift ( node -- )
|
||||
{
|
||||
[ ^^shr-vector ]
|
||||
} {
|
||||
[ ^^shr-vector-imm ]
|
||||
} [ integer? ] emit-vv-or-vl-vector-op ;
|
||||
|
||||
: emit-simd-hlshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shl-vector-imm ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-hrshift ( node -- )
|
||||
{
|
||||
[ ^^horizontal-shr-vector-imm ]
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle-elements ( node -- )
|
||||
{
|
||||
[ ^shuffle-vector-imm ]
|
||||
} [ shuffle? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle2-elements ( node -- )
|
||||
{
|
||||
[ ^shuffle-2-vectors-imm ]
|
||||
} [ shuffle? ] emit-vvl-vector-op ;
|
||||
|
||||
: emit-simd-vshuffle-bytes ( node -- )
|
||||
{
|
||||
[ ^^shuffle-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmerge-head ( node -- )
|
||||
{
|
||||
[ ^^merge-vector-head ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vmerge-tail ( node -- )
|
||||
{
|
||||
[ ^^merge-vector-tail ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-v<= ( node -- )
|
||||
{
|
||||
[ cc<= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v< ( node -- )
|
||||
{
|
||||
[ cc< ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v= ( node -- )
|
||||
{
|
||||
[ cc= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v> ( node -- )
|
||||
{
|
||||
[ cc> ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-v>= ( node -- )
|
||||
{
|
||||
[ cc>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
: emit-simd-vunordered? ( node -- )
|
||||
{
|
||||
[ cc/<>= ^compare-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vany? ( node -- )
|
||||
{
|
||||
[ vcc-any ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
: emit-simd-vall? ( node -- )
|
||||
{
|
||||
[ vcc-all ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
: emit-simd-vnone? ( node -- )
|
||||
{
|
||||
[ vcc-none ^^test-vector ]
|
||||
} emit-v-vector-op ;
|
||||
: emit-simd-vgetmask ( node -- )
|
||||
{
|
||||
[ ^^move-vector-mask ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v>float ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ drop ] }
|
||||
{ int-vector-rep [ ^^integer>float-vector ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-v>integer ( node -- )
|
||||
{
|
||||
{ float-vector-rep [ ^^float>integer-vector ] }
|
||||
{ int-vector-rep [ drop ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vpack-signed ( node -- )
|
||||
{
|
||||
{ double-2-rep |[ src1 src2 rep |
|
||||
src1 double-2-rep ^^float-pack-vector :> dst-head
|
||||
src2 double-2-rep ^^float-pack-vector :> dst-tail
|
||||
dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
|
||||
] }
|
||||
{ int-vector-rep [ ^^signed-pack-vector ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vpack-unsigned ( node -- )
|
||||
{
|
||||
[ ^^unsigned-pack-vector ]
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-vunpack-head ( node -- )
|
||||
{
|
||||
[ ^unpack-vector-head ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-vunpack-tail ( node -- )
|
||||
{
|
||||
[ ^unpack-vector-tail ]
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-with ( node -- )
|
||||
{
|
||||
{ fixnum-vector-rep [ ^with-vector ] }
|
||||
{ float-vector-rep [ ^with-vector ] }
|
||||
} emit-v-vector-op ;
|
||||
|
||||
: emit-simd-gather-2 ( node -- )
|
||||
{
|
||||
{ fixnum-vector-rep [ ^^gather-int-vector-2 ] }
|
||||
{ fixnum-vector-rep [ ^^gather-vector-2 ] }
|
||||
{ float-vector-rep [ ^^gather-vector-2 ] }
|
||||
} emit-vv-vector-op ;
|
||||
|
||||
: emit-simd-gather-4 ( node -- )
|
||||
{
|
||||
{ fixnum-vector-rep [ ^^gather-int-vector-4 ] }
|
||||
{ fixnum-vector-rep [ ^^gather-vector-4 ] }
|
||||
{ float-vector-rep [ ^^gather-vector-4 ] }
|
||||
} emit-vvvv-vector-op ;
|
||||
|
||||
: emit-simd-select ( node -- )
|
||||
{
|
||||
{ fixnum-vector-rep [ ^select-vector ] }
|
||||
{ float-vector-rep [ ^select-vector ] }
|
||||
} [ integer? ] emit-vl-vector-op ;
|
||||
|
||||
: emit-alien-vector ( block node -- block' )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-load-memory
|
||||
_ f ^^load-memory-imm ds-push
|
||||
]
|
||||
[ inline-load-memory? ] inline-accessor
|
||||
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: emit-set-alien-vector ( block node -- block' )
|
||||
dup [
|
||||
'[
|
||||
ds-drop prepare-store-memory
|
||||
_ f ##store-memory-imm,
|
||||
]
|
||||
[ byte-array inline-store-memory? ] inline-accessor
|
||||
] with { [ %alien-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: enable-simd ( -- )
|
||||
{
|
||||
{ (simd-v+) [ emit-simd-v+ ] }
|
||||
{ (simd-v-) [ emit-simd-v- ] }
|
||||
{ (simd-vneg) [ emit-simd-vneg ] }
|
||||
{ (simd-v+-) [ emit-simd-v+- ] }
|
||||
{ (simd-vs+) [ emit-simd-vs+ ] }
|
||||
{ (simd-vs-) [ emit-simd-vs- ] }
|
||||
{ (simd-vs*) [ emit-simd-vs* ] }
|
||||
{ (simd-v*) [ emit-simd-v* ] }
|
||||
{ (simd-v*high) [ emit-simd-v*high ] }
|
||||
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
|
||||
{ (simd-v/) [ emit-simd-v/ ] }
|
||||
{ (simd-vmin) [ emit-simd-vmin ] }
|
||||
{ (simd-vmax) [ emit-simd-vmax ] }
|
||||
{ (simd-vavg) [ emit-simd-vavg ] }
|
||||
{ (simd-v.) [ emit-simd-v. ] }
|
||||
{ (simd-vsad) [ emit-simd-vsad ] }
|
||||
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
|
||||
{ (simd-sum) [ emit-simd-sum ] }
|
||||
{ (simd-vabs) [ emit-simd-vabs ] }
|
||||
{ (simd-vbitand) [ emit-simd-vand ] }
|
||||
{ (simd-vbitandn) [ emit-simd-vandn ] }
|
||||
{ (simd-vbitor) [ emit-simd-vor ] }
|
||||
{ (simd-vbitxor) [ emit-simd-vxor ] }
|
||||
{ (simd-vbitnot) [ emit-simd-vnot ] }
|
||||
{ (simd-vand) [ emit-simd-vand ] }
|
||||
{ (simd-vandn) [ emit-simd-vandn ] }
|
||||
{ (simd-vor) [ emit-simd-vor ] }
|
||||
{ (simd-vxor) [ emit-simd-vxor ] }
|
||||
{ (simd-vnot) [ emit-simd-vnot ] }
|
||||
{ (simd-vlshift) [ emit-simd-vlshift ] }
|
||||
{ (simd-vrshift) [ emit-simd-vrshift ] }
|
||||
{ (simd-hlshift) [ emit-simd-hlshift ] }
|
||||
{ (simd-hrshift) [ emit-simd-hrshift ] }
|
||||
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
|
||||
{ (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
|
||||
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
|
||||
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
|
||||
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
|
||||
{ (simd-v<=) [ emit-simd-v<= ] }
|
||||
{ (simd-v<) [ emit-simd-v< ] }
|
||||
{ (simd-v=) [ emit-simd-v= ] }
|
||||
{ (simd-v>) [ emit-simd-v> ] }
|
||||
{ (simd-v>=) [ emit-simd-v>= ] }
|
||||
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
|
||||
{ (simd-vany?) [ emit-simd-vany? ] }
|
||||
{ (simd-vall?) [ emit-simd-vall? ] }
|
||||
{ (simd-vnone?) [ emit-simd-vnone? ] }
|
||||
{ (simd-v>float) [ emit-simd-v>float ] }
|
||||
{ (simd-v>integer) [ emit-simd-v>integer ] }
|
||||
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
|
||||
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
|
||||
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
|
||||
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
|
||||
{ (simd-with) [ emit-simd-with ] }
|
||||
{ (simd-gather-2) [ emit-simd-gather-2 ] }
|
||||
{ (simd-gather-4) [ emit-simd-gather-4 ] }
|
||||
{ (simd-select) [ emit-simd-select ] }
|
||||
{ alien-vector [ emit-alien-vector ] }
|
||||
{ set-alien-vector [ emit-set-alien-vector ] }
|
||||
{ assert-positive [ drop ] }
|
||||
{ (simd-vgetmask) [ emit-simd-vgetmask ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
enable-simd
|
|
@ -0,0 +1,135 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays combinators compiler.cfg.builder
|
||||
compiler.tree.propagation.info compiler.tree.propagation.nodes
|
||||
continuations cpu.architecture fry kernel layouts math
|
||||
math.intervals math.vectors.simd.intrinsics namespaces sequences
|
||||
words ;
|
||||
IN: compiler.tree.propagation.simd
|
||||
|
||||
CONSTANT: vector>vector-intrinsics
|
||||
{
|
||||
(simd-v+)
|
||||
(simd-v-)
|
||||
(simd-vneg)
|
||||
(simd-v+-)
|
||||
(simd-vs+)
|
||||
(simd-vs-)
|
||||
(simd-vs*)
|
||||
(simd-v*)
|
||||
(simd-v*high)
|
||||
(simd-v*hs+)
|
||||
(simd-v/)
|
||||
(simd-vmin)
|
||||
(simd-vmax)
|
||||
(simd-vavg)
|
||||
(simd-vsqrt)
|
||||
(simd-vabs)
|
||||
(simd-vbitand)
|
||||
(simd-vbitandn)
|
||||
(simd-vbitor)
|
||||
(simd-vbitxor)
|
||||
(simd-vbitnot)
|
||||
(simd-vand)
|
||||
(simd-vandn)
|
||||
(simd-vor)
|
||||
(simd-vxor)
|
||||
(simd-vnot)
|
||||
(simd-vlshift)
|
||||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-vshuffle2-elements)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vmerge-head)
|
||||
(simd-vmerge-tail)
|
||||
(simd-v<=)
|
||||
(simd-v<)
|
||||
(simd-v=)
|
||||
(simd-v>)
|
||||
(simd-v>=)
|
||||
(simd-vunordered?)
|
||||
(simd-v>float)
|
||||
(simd-v>integer)
|
||||
(simd-vpack-signed)
|
||||
(simd-vpack-unsigned)
|
||||
(simd-vunpack-head)
|
||||
(simd-vunpack-tail)
|
||||
(simd-with)
|
||||
(simd-gather-2)
|
||||
(simd-gather-4)
|
||||
alien-vector
|
||||
}
|
||||
|
||||
CONSTANT: vector-other-intrinsics
|
||||
{
|
||||
(simd-v.)
|
||||
(simd-vsad)
|
||||
(simd-sum)
|
||||
(simd-vany?)
|
||||
(simd-vall?)
|
||||
(simd-vnone?)
|
||||
(simd-vgetmask)
|
||||
(simd-select)
|
||||
set-alien-vector
|
||||
}
|
||||
|
||||
: vector-intrinsics ( -- x )
|
||||
vector>vector-intrinsics vector-other-intrinsics append ;
|
||||
|
||||
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
|
||||
|
||||
: scalar-output-class ( rep -- class )
|
||||
dup literal?>> [
|
||||
literal>> scalar-rep-of {
|
||||
{ float-rep [ float ] }
|
||||
{ double-rep [ float ] }
|
||||
{ longlong-scalar-rep [ integer ] }
|
||||
{ ulonglong-scalar-rep [ integer ] }
|
||||
{ int-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
|
||||
{ uint-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
|
||||
[ drop fixnum ]
|
||||
} case
|
||||
] [ drop real ] if
|
||||
<class-info> ;
|
||||
|
||||
\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop
|
||||
|
||||
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||
|
||||
{
|
||||
(simd-vany?)
|
||||
(simd-vall?)
|
||||
(simd-vnone?)
|
||||
} [ { boolean } "default-output-classes" set-word-prop ] each
|
||||
|
||||
\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop
|
||||
|
||||
\ assert-positive [
|
||||
real [0,inf] <class/interval-info> value-info-intersect
|
||||
] "outputs" set-word-prop
|
||||
|
||||
\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop
|
||||
|
||||
: clone-with-value-infos ( node -- node' )
|
||||
clone dup in-d>> extract-value-info >>info ;
|
||||
|
||||
: try-intrinsic ( node intrinsic-quot -- ? )
|
||||
'[
|
||||
_ clone-with-value-infos
|
||||
_ with-dummy-cfg-builder
|
||||
t
|
||||
] [ drop f ] recover ;
|
||||
|
||||
: inline-unless-intrinsic ( word -- )
|
||||
dup '[
|
||||
_ swap over "intrinsic" word-prop
|
||||
"always-inline-simd-intrinsics" get not swap and
|
||||
! word node intrinsic
|
||||
[ try-intrinsic [ drop f ] [ def>> ] if ]
|
||||
[ drop def>> ] if*
|
||||
]
|
||||
"custom-inlining" set-word-prop ;
|
||||
|
||||
vector-intrinsics [ inline-unless-intrinsic ] each
|
Loading…
Reference in New Issue