alien.complex: Fix functor

modern-harvey2
Doug Coleman 2017-12-26 12:04:00 -08:00
parent 1ca1a9b6b3
commit 1e9b407037
8 changed files with 1677 additions and 16 deletions

View File

@ -1,12 +1,12 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types alien.complex.functor kernel
sequences ;
USING: accessors alien alien.c-types alien.complex.functor
classes.struct kernel math quotations ;
FROM: alien.c-types => float double ;
IN: alien.complex
<<
{ "float" "double" } [ dup "complex-" prepend define-complex-type ] each
>>
COMPLEX-TYPE: float complex-float
COMPLEX-TYPE: double complex-double
<<
! This overrides the fact that small structures are never returned

View File

@ -3,20 +3,25 @@
USING: functors2 ;
IN: alien.complex.functor
FUNCTOR: define-complex-type ( N: name T: name -- ) [[
SAME-FUNCTOR: complex-type ( N: existing-word T: name -- ) [[
USING: alien alien.c-types classes.struct kernel quotations ;
QUALIFIED: math
STRUCT: ${T}-class { real ${N}-type } { imaginary ${N}-type } ;
<<
STRUCT: ${T} { real ${N} } { imaginary ${N} } ;
: <${T}> ( z -- alien )
>rect ${T}-class <struct-boa> >c-ptr ;
: <${T}> ( z -- alien )
math:>rect ${T} <struct-boa> >c-ptr ;
: *${T} ( alien -- z )
T-class memory>struct [ real>> ] [ imaginary>> ] bi rect> ; inline
: *${T} ( alien -- z )
${T} memory>struct [ real>> ] [ imaginary>> ] bi math:rect> ; inline
${T}-class lookup-c-type
<${T}> 1quotation >>unboxer-quot
*${T} 1quotation >>boxer-quot
complex >>boxed-class
drop
>>
\ ${T} lookup-c-type
[ <${T}> ] >>unboxer-quot
[ *${T} ] >>boxer-quot
complex >>boxed-class
drop
]]

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,163 @@
! Copyright (C) 2009 Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays classes combinators
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.stacks compiler.cfg.stacks.local
compiler.tree.propagation.info cpu.architecture fry
generalizations kernel locals macros make math quotations
sequences sequences.generalizations ;
IN: compiler.cfg.intrinsics.simd.backend
! Selection of implementation based on available CPU instructions
GENERIC: insn-available? ( ## -- reps )
M: object insn-available? drop t ;
M: ##zero-vector insn-available? rep>> %zero-vector-reps member? ;
M: ##fill-vector insn-available? rep>> %fill-vector-reps member? ;
M: ##gather-vector-2 insn-available? rep>> %gather-vector-2-reps member? ;
M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ;
M: ##gather-int-vector-2 insn-available? rep>> %gather-int-vector-2-reps member? ;
M: ##gather-int-vector-4 insn-available? rep>> %gather-int-vector-4-reps member? ;
M: ##select-vector insn-available? rep>> %select-vector-reps member? ;
M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ;
M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ;
M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ;
M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ;
M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ;
M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ;
M: ##float-pack-vector insn-available? rep>> %float-pack-vector-reps member? ;
M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ;
M: ##unsigned-pack-vector insn-available? rep>> %unsigned-pack-vector-reps member? ;
M: ##unpack-vector-head insn-available? rep>> %unpack-vector-head-reps member? ;
M: ##unpack-vector-tail insn-available? rep>> %unpack-vector-tail-reps member? ;
M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ;
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ;
M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;
M: ##add-sub-vector insn-available? rep>> %add-sub-vector-reps member? ;
M: ##sub-vector insn-available? rep>> %sub-vector-reps member? ;
M: ##saturated-sub-vector insn-available? rep>> %saturated-sub-vector-reps member? ;
M: ##mul-vector insn-available? rep>> %mul-vector-reps member? ;
M: ##mul-high-vector insn-available? rep>> %mul-high-vector-reps member? ;
M: ##mul-horizontal-add-vector insn-available? rep>> %mul-horizontal-add-vector-reps member? ;
M: ##saturated-mul-vector insn-available? rep>> %saturated-mul-vector-reps member? ;
M: ##div-vector insn-available? rep>> %div-vector-reps member? ;
M: ##min-vector insn-available? rep>> %min-vector-reps member? ;
M: ##max-vector insn-available? rep>> %max-vector-reps member? ;
M: ##avg-vector insn-available? rep>> %avg-vector-reps member? ;
M: ##dot-vector insn-available? rep>> %dot-vector-reps member? ;
M: ##sad-vector insn-available? rep>> %sad-vector-reps member? ;
M: ##sqrt-vector insn-available? rep>> %sqrt-vector-reps member? ;
M: ##horizontal-add-vector insn-available? rep>> %horizontal-add-vector-reps member? ;
M: ##horizontal-sub-vector insn-available? rep>> %horizontal-sub-vector-reps member? ;
M: ##abs-vector insn-available? rep>> %abs-vector-reps member? ;
M: ##and-vector insn-available? rep>> %and-vector-reps member? ;
M: ##andn-vector insn-available? rep>> %andn-vector-reps member? ;
M: ##or-vector insn-available? rep>> %or-vector-reps member? ;
M: ##xor-vector insn-available? rep>> %xor-vector-reps member? ;
M: ##not-vector insn-available? rep>> %not-vector-reps member? ;
M: ##shl-vector insn-available? rep>> %shl-vector-reps member? ;
M: ##shr-vector insn-available? rep>> %shr-vector-reps member? ;
M: ##shl-vector-imm insn-available? rep>> %shl-vector-imm-reps member? ;
M: ##shr-vector-imm insn-available? rep>> %shr-vector-imm-reps member? ;
M: ##horizontal-shl-vector-imm insn-available? rep>> %horizontal-shl-vector-imm-reps member? ;
M: ##horizontal-shr-vector-imm insn-available? rep>> %horizontal-shr-vector-imm-reps member? ;
: [vector-op-checked] ( #dup quot -- quot )
'[ _ ndup [ @ ] { } make dup [ insn-available? ] all? ] ;
GENERIC#: >vector-op-cond 2 ( quot #pick #dup -- quotpair )
M:: callable >vector-op-cond ( quot #pick #dup -- quotpair )
#dup quot [vector-op-checked] '[ 2drop @ ]
#dup '[ % _ nnip ]
2array ;
M:: pair >vector-op-cond ( pair #pick #dup -- quotpair )
pair first2 :> ( class quot )
#pick class #dup quot [vector-op-checked]
'[ 2drop _ npick _ instance? _ [ f f f ] if ]
#dup '[ % _ nnip ]
2array ;
MACRO: v-vector-op ( trials -- quot )
[ 1 2 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vl-vector-op ( trials -- quot )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vvl-vector-op ( trials -- quot )
[ 1 4 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-vector-op ( trials -- quot )
[ 1 3 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vv-cc-vector-op ( trials -- quot )
[ 2 4 >vector-op-cond ] map '[ f f _ cond ] ;
MACRO: vvvv-vector-op ( trials -- quot )
[ 1 5 >vector-op-cond ] map '[ f f _ cond ] ;
! Intrinsic code emission
MACRO: check-elements ( quots -- quot )
[ length '[ _ firstn ] ]
[ '[ _ spread ] ]
[ length 1 - \ and <repetition> [ ] like ]
tri 3append ;
ERROR: bad-simd-intrinsic node ;
MACRO: if-literals-match ( quots -- quot )
[ length ] [ ] [ length ] tri
! n quots n
'[
! node quot
[
dup node-input-infos
_ tail-slice* [ literal>> ] map
dup _ check-elements
] dip
swap [
! node literals quot
[ _ firstn ] dip call
drop
] [ 2drop bad-simd-intrinsic ] if
] ;
CONSTANT: unary [ ds-drop ds-pop ]
CONSTANT: unary/param [ [ -2 <ds-loc> inc-stack ds-pop ] dip ]
CONSTANT: binary [ ds-drop 2inputs ]
CONSTANT: binary/param [ [ -2 <ds-loc> inc-stack 2inputs ] dip ]
CONSTANT: quaternary
[
ds-drop
d: 3 peek-loc
d: 2 peek-loc
d: 1 peek-loc
d: 0 peek-loc
-4 <ds-loc> inc-stack
]
:: emit-vector-op ( trials params-quot op-quot literal-preds -- quot )
params-quot trials op-quot literal-preds
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
MACRO: emit-v-vector-op ( trials -- quot )
unary [ v-vector-op ] { [ representation? ] } emit-vector-op ;
MACRO: emit-vl-vector-op ( trials literal-pred -- quot )
[ unary/param [ vl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
MACRO: emit-vv-vector-op ( trials -- quot )
binary [ vv-vector-op ] { [ representation? ] } emit-vector-op ;
MACRO: emit-vvl-vector-op ( trials literal-pred -- quot )
[ binary/param [ vvl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ;
MACRO: emit-vvvv-vector-op ( trials -- quot )
quaternary [ vvvv-vector-op ] { [ representation? ] } emit-vector-op ;
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- quot )
literal-pred imm-trials literal-pred var-trials
'[
dup node-input-infos 2 tail-slice* first literal>> @
[ _ _ emit-vl-vector-op ]
[ _ emit-vv-vector-op ] if
] ;

View File

@ -0,0 +1,8 @@
USING: compiler.cfg.instructions compiler.tree help.markup help.syntax
math.vectors ;
IN: compiler.cfg.intrinsics.simd
HELP: emit-simd-v+
{ $values { "node" node } }
{ $description "Emits instructions for SIMD vector addition." }
{ $see-also ##add-vector v+ } ;

View File

@ -0,0 +1,630 @@
! Copyright (C) 2009 Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs biassocs byte-arrays classes
compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
compiler.cfg.stacks.local compiler.test compiler.tree
compiler.tree.propagation.info cpu.architecture fry kernel locals make
namespaces sequences system tools.test words ;
IN: compiler.cfg.intrinsics.simd.tests
:: test-node ( rep -- node )
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info-state { class byte-array } } }
{ 2 T{ value-info-state { class byte-array } } }
{ 3 T{ value-info-state { class byte-array } } }
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
{ 5 T{ value-info-state { class byte-array } } }
} }
} ;
:: test-node-literal ( lit rep -- node )
lit class-of :> lit-class
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info-state { class byte-array } } }
{ 2 T{ value-info-state { class byte-array } } }
{ 3 T{ value-info-state { class lit-class } { literal? t } { literal lit } } }
{ 4 T{ value-info-state { class word } { literal? t } { literal rep } } }
{ 5 T{ value-info-state { class byte-array } } }
} }
} ;
: test-node-nonliteral-rep ( -- node )
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info-state { class byte-array } } }
{ 2 T{ value-info-state { class byte-array } } }
{ 3 T{ value-info-state { class byte-array } } }
{ 4 T{ value-info-state { class object } } }
{ 5 T{ value-info-state { class byte-array } } }
} }
} ;
: test-compiler-env ( -- x )
H{ } clone
T{ basic-block } 0 0 0 0 height-state boa >>height
\ basic-block pick set-at
0 0 0 0 height-state boa \ height-state pick set-at
HS{ } clone \ local-peek-set pick set-at
H{ } clone \ replaces pick set-at
H{ } <biassoc> \ locs>vregs pick set-at ;
: make-classes ( quot -- seq )
{ } make [ class-of ] map ; inline
: test-emit ( cpu rep quot -- node )
[
[ new \ cpu ] 2dip '[
test-compiler-env [ _ test-node @ ] with-variables
] with-variable
] make-classes ; inline
: test-emit-literal ( cpu lit rep quot -- node )
[
[ new \ cpu ] 3dip '[
test-compiler-env [ _ _ test-node-literal @ ] with-variables
] with-variable
] make-classes ; inline
: test-emit-nonliteral-rep ( cpu quot -- node )
[
[ new \ cpu ] dip '[
test-compiler-env [ test-node-nonliteral-rep @ ] with-variables
] with-variable
] make-classes ; inline
CONSTANT: signed-reps
{ char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
CONSTANT: all-reps
{
char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
}
TUPLE: scalar-cpu ;
TUPLE: simple-ops-cpu ;
M: simple-ops-cpu %zero-vector-reps all-reps ;
M: simple-ops-cpu %fill-vector-reps all-reps ;
M: simple-ops-cpu %add-vector-reps all-reps ;
M: simple-ops-cpu %sub-vector-reps all-reps ;
M: simple-ops-cpu %mul-vector-reps all-reps ;
M: simple-ops-cpu %div-vector-reps all-reps ;
M: simple-ops-cpu %andn-vector-reps all-reps ;
M: simple-ops-cpu %and-vector-reps all-reps ;
M: simple-ops-cpu %or-vector-reps all-reps ;
M: simple-ops-cpu %xor-vector-reps all-reps ;
M: simple-ops-cpu %merge-vector-reps all-reps ;
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
M: simple-ops-cpu %move-vector-mask-reps all-reps ;
M: simple-ops-cpu %test-vector-reps all-reps ;
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
M: simple-ops-cpu %alien-vector-reps all-reps ;
! v+
{ { ##add-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
unit-test
! v-
{ { ##sub-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
unit-test
! vneg
{ { ##load-reference ##sub-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
unit-test
{ { ##zero-vector ##sub-vector } }
[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
unit-test
! v*
{ { ##mul-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
unit-test
! v/
{ { ##div-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
unit-test
TUPLE: addsub-cpu < simple-ops-cpu ;
M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
! v+-
{ { ##add-sub-vector } }
[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
{ { ##load-reference ##xor-vector ##add-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
{ { ##load-reference ##xor-vector ##sub-vector ##add-vector } }
[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
TUPLE: saturating-cpu < simple-ops-cpu ;
M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
! vs+
{ { ##add-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
{ { ##add-vector } }
[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
{ { ##saturated-add-vector } }
[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
! vs-
{ { ##sub-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
{ { ##sub-vector } }
[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
{ { ##saturated-sub-vector } }
[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
! vs*
{ { ##mul-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
{ { ##mul-vector } }
[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
{ { ##saturated-mul-vector } }
[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
TUPLE: minmax-cpu < simple-ops-cpu ;
M: minmax-cpu %min-vector-reps signed-reps ;
M: minmax-cpu %max-vector-reps signed-reps ;
M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
TUPLE: compare-cpu < simple-ops-cpu ;
M: compare-cpu %compare-vector-reps drop signed-reps ;
M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
! vmin
{ { ##min-vector } }
[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
unit-test
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
unit-test
! vmax
{ { ##max-vector } }
[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
unit-test
{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } }
[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
unit-test
TUPLE: dot-cpu < simple-ops-cpu ;
M: dot-cpu %dot-vector-reps { float-4-rep } ;
TUPLE: horizontal-cpu < simple-ops-cpu ;
M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
! v.
{ { ##dot-vector } }
[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
{ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
{ {
##mul-vector
##merge-vector-head ##merge-vector-tail ##add-vector
##merge-vector-head ##merge-vector-tail ##add-vector
##vector>scalar
} }
[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
! vsqrt
{ { ##sqrt-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
unit-test
! sum
{ { ##horizontal-add-vector ##vector>scalar } }
[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
unit-test
{ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } }
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
unit-test
{ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector ##horizontal-add-vector
##vector>scalar
} }
[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
unit-test
{ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
##vector>scalar
} }
[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
unit-test
TUPLE: abs-cpu < simple-ops-cpu ;
M: abs-cpu %abs-vector-reps signed-reps ;
! vabs
{ { } }
[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
{ { ##abs-vector } }
[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
{ { ##load-reference ##andn-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
{ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } }
[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
! vand
{ { ##and-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
unit-test
! vandn
{ { ##andn-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
unit-test
! vor
{ { ##or-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
unit-test
! vxor
{ { ##xor-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
unit-test
TUPLE: not-cpu < simple-ops-cpu ;
M: not-cpu %not-vector-reps signed-reps ;
! vnot
{ { ##not-vector } }
[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
unit-test
{ { ##fill-vector ##xor-vector } }
[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
unit-test
TUPLE: shift-cpu < simple-ops-cpu ;
M: shift-cpu %shl-vector-reps signed-reps ;
M: shift-cpu %shr-vector-reps signed-reps ;
TUPLE: shift-imm-cpu < simple-ops-cpu ;
M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
! vlshift
{ { ##shl-vector-imm } }
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
unit-test
{ { ##shl-vector } }
[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
unit-test
! vrshift
{ { ##shr-vector-imm } }
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
unit-test
{ { ##shr-vector } }
[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
unit-test
! hlshift
{ { ##horizontal-shl-vector-imm } }
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
unit-test
! hrshift
{ { ##horizontal-shr-vector-imm } }
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
unit-test
TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
TUPLE: shuffle-cpu < simple-ops-cpu ;
M: shuffle-cpu %shuffle-vector-reps signed-reps ;
! vshuffle-elements
{ { ##load-reference ##shuffle-vector } }
[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
unit-test
{ { ##shuffle-vector-imm } }
[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
unit-test
! vshuffle-bytes
{ { ##shuffle-vector } }
[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
unit-test
! vmerge-head
{ { ##merge-vector-head } }
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
unit-test
! vmerge-tail
{ { ##merge-vector-tail } }
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
unit-test
! v<= etc.
{ { ##compare-vector } }
[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
{ { ##min-vector ##compare-vector } }
[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
{ { ##load-reference ##xor-vector ##xor-vector ##compare-vector } }
[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
! vany? etc.
{ { ##test-vector } }
[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
unit-test
TUPLE: convert-cpu < simple-ops-cpu ;
M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
! v>float
{ { } }
[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
unit-test
{ { ##integer>float-vector } }
[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
unit-test
! v>integer
{ { } }
[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
unit-test
{ { ##float>integer-vector } }
[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
unit-test
! vpack-signed
{ { ##signed-pack-vector } }
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
unit-test
! vpack-unsigned
{ { ##unsigned-pack-vector } }
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
unit-test
TUPLE: unpack-head-cpu < simple-ops-cpu ;
M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
TUPLE: unpack-cpu < unpack-head-cpu ;
M: unpack-cpu %unpack-vector-tail-reps all-reps ;
! vunpack-head
{ { ##unpack-vector-head } }
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
{ { ##zero-vector ##merge-vector-head } }
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
{ { ##merge-vector-head ##shr-vector-imm } }
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
{ { ##zero-vector ##compare-vector ##merge-vector-head } }
[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
! vunpack-tail
{ { ##unpack-vector-tail } }
[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
{ { ##tail>head-vector ##unpack-vector-head } }
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
{ { ##zero-vector ##merge-vector-tail } }
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
{ { ##merge-vector-tail ##shr-vector-imm } }
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
{ { ##zero-vector ##compare-vector ##merge-vector-tail } }
[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
! with
{ { ##scalar>vector ##shuffle-vector-imm } }
[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ]
unit-test
! gather-2
{ { ##gather-vector-2 } }
[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ]
unit-test
! gather-4
{ { ##gather-vector-4 } }
[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ]
unit-test
! select
{ { ##shuffle-vector-imm ##vector>scalar } }
[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ]
unit-test
! ^load-neg-zero-vector
{
V{
T{ ##load-reference
{ dst 1 }
{ obj B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 } }
}
T{ ##load-reference
{ dst 2 }
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 } }
}
}
} [
[
{ float-4-rep double-2-rep } [ ^load-neg-zero-vector drop ] each
] V{ } make
] cfg-unit-test
! ^load-add-sub-vector
{
V{
T{ ##load-reference
{ dst 1 }
{ obj B{ 0 0 0 128 0 0 0 0 0 0 0 128 0 0 0 0 } }
}
T{ ##load-reference
{ dst 2 }
{ obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 0 } }
}
T{ ##load-reference
{ dst 3 }
{ obj
B{ 255 0 255 0 255 0 255 0 255 0 255 0 255 0 255 0 }
}
}
T{ ##load-reference
{ dst 4 }
{ obj
B{ 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 }
}
}
T{ ##load-reference
{ dst 5 }
{ obj
B{ 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 }
}
}
T{ ##load-reference
{ dst 6 }
{ obj
B{ 255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0 }
}
}
}
} [
[
{
float-4-rep
double-2-rep
char-16-rep
short-8-rep
int-4-rep
longlong-2-rep
} [ ^load-add-sub-vector drop ] each
] V{ } make
] cfg-unit-test
! ^load-half-vector
{
V{
T{ ##load-reference
{ dst 1 }
{ obj B{ 0 0 0 63 0 0 0 63 0 0 0 63 0 0 0 63 } }
}
T{ ##load-reference
{ dst 2 }
{ obj B{ 0 0 0 0 0 0 224 63 0 0 0 0 0 0 224 63 } }
}
}
} [
[
{ float-4-rep double-2-rep } [ ^load-half-vector drop ] each
] V{ } make
] cfg-unit-test
! sign-bit-mask
{
{
B{ 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 }
B{ 0 128 0 128 0 128 0 128 0 128 0 128 0 128 0 128 }
B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 }
B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 }
}
} [
{ char-16-rep short-8-rep int-4-rep longlong-2-rep } [ sign-bit-mask ] map
] unit-test
! test with nonliteral/invalid reps
[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
[ bad-simd-intrinsic? ] must-fail-with
[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
[ bad-simd-intrinsic? ] must-fail-with
[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
[ bad-simd-intrinsic? ] must-fail-with

View File

@ -0,0 +1,719 @@
! Copyright (C) 2009 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types arrays assocs byte-arrays combinators
combinators.short-circuit compiler.cfg.comparisons
compiler.cfg.hats compiler.cfg.instructions
compiler.cfg.intrinsics compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.simd.backend compiler.cfg.stacks
cpu.architecture fry kernel layouts locals math math.vectors
math.vectors.simd.intrinsics sequences specialized-arrays ;
FROM: alien.c-types => heap-size char short int longlong float double ;
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
IN: compiler.cfg.intrinsics.simd
! compound vector ops
CONSTANT: rep>bit-mask {
{
char-16-rep uchar-array{
0x80 0x80 0x80 0x80
0x80 0x80 0x80 0x80
0x80 0x80 0x80 0x80
0x80 0x80 0x80 0x80
}
}
{
short-8-rep ushort-array{
0x8000 0x8000 0x8000 0x8000
0x8000 0x8000 0x8000 0x8000
}
}
{
int-4-rep uint-array{
0x8000,0000 0x8000,0000
0x8000,0000 0x8000,0000
}
}
{
longlong-2-rep ulonglong-array{
0x8000,0000,0000,0000
0x8000,0000,0000,0000
}
}
}
: sign-bit-mask ( rep -- byte-array )
signed-rep rep>bit-mask at underlying>> ;
CONSTANT: rep>neg-zero {
{ float-4-rep float-array{ -0.0 -0.0 -0.0 -0.0 } }
{ double-2-rep double-array{ -0.0 -0.0 } }
}
: ^load-neg-zero-vector ( rep -- dst )
rep>neg-zero at underlying>> ^^load-literal ;
CONSTANT: rep>add-sub {
{ float-4-rep float-array{ -0.0 0.0 -0.0 0.0 } }
{ double-2-rep double-array{ -0.0 0.0 } }
{ char-16-rep char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } }
{ short-8-rep short-array{ -1 0 -1 0 -1 0 -1 0 } }
{ int-4-rep int-array{ -1 0 -1 0 } }
{ longlong-2-rep longlong-array{ -1 0 } }
}
: ^load-add-sub-vector ( rep -- dst )
signed-rep rep>add-sub at underlying>> ^^load-literal ;
CONSTANT: rep>half {
{ float-4-rep float-array{ 0.5 0.5 0.5 0.5 } }
{ double-2-rep double-array{ 0.5 0.5 } }
}
: ^load-half-vector ( rep -- dst )
rep>half at underlying>> ^^load-literal ;
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ <iota> >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: ^load-immediate-shuffle ( shuffle rep -- dst )
>variable-shuffle ^^load-literal ;
:: ^blend-vector ( mask true false rep -- dst )
true mask rep ^^and-vector
mask false rep ^^andn-vector
rep ^^or-vector ;
: ^not-vector ( src rep -- dst )
{
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
} v-vector-op ;
:: ^swap-compare-vector ( src1 src2 rep {cc,swap} -- dst )
{cc,swap} first2 :> ( cc swap? )
swap?
[ src2 src1 rep cc ^^compare-vector ]
[ src1 src2 rep cc ^^compare-vector ] if ;
:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
rep orig-cc %compare-vector-ccs :> ( ccs not? )
ccs empty?
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
[
ccs unclip :> ( rest-ccs first-cc )
src1 src2 rep first-cc ^swap-compare-vector :> first-dst
rest-ccs first-dst
[ [ src1 src2 rep ] dip ^swap-compare-vector rep ^^or-vector ]
reduce
not? [ rep ^not-vector ] when
] if ;
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
} case ;
: ^compare-vector ( src1 src2 rep cc -- dst )
{
[ ^(compare-vector) ]
[ ^minmax-compare-vector ]
{ unsigned-int-vector-rep |[ src1 src2 rep cc |
rep sign-bit-mask ^^load-literal :> sign-bits
src1 sign-bits rep ^^xor-vector
src2 sign-bits rep ^^xor-vector
rep signed-rep cc ^(compare-vector)
] }
} vv-cc-vector-op ;
: ^unpack-vector-head ( src rep -- dst )
{
[ ^^unpack-vector-head ]
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
{ signed-int-vector-rep |[ src rep |
src src rep ^^merge-vector-head :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep |[ src rep |
rep ^^zero-vector :> zero
zero src rep cc> ^compare-vector :> sign
src sign rep ^^merge-vector-head
] }
} v-vector-op ;
: ^unpack-vector-tail ( src rep -- dst )
{
[ ^^unpack-vector-tail ]
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
{ signed-int-vector-rep |[ src rep |
src src rep ^^merge-vector-tail :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep |[ src rep |
rep ^^zero-vector :> zero
zero src rep cc> ^compare-vector :> sign
src sign rep ^^merge-vector-tail
] }
} v-vector-op ;
PREDICATE: fixnum-vector-rep < int-vector-rep
rep-component-type heap-size cell < ;
: ^(sum-vector-2) ( src rep -- dst )
{
[ dupd ^^horizontal-add-vector ]
|[ src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-4) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ] bi
]
|[ src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-8) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ] tri
]
|[ src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-16) ( src rep -- dst )
{
[
{
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
} cleave
]
|[ src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector :> src'''
rep'' widen-vector-rep :> rep'''
src''' src''' rep''' ^^merge-vector-head :> head'''
src''' src''' rep''' ^^merge-vector-tail :> tail'''
head''' tail''' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector) ( src rep -- dst )
[
dup rep-length {
{ 2 [ ^(sum-vector-2) ] }
{ 4 [ ^(sum-vector-4) ] }
{ 8 [ ^(sum-vector-8) ] }
{ 16 [ ^(sum-vector-16) ] }
} case
] [ ^^vector>scalar ] bi ;
: ^sum-vector ( src rep -- dst )
{
{ float-vector-rep [ ^(sum-vector) ] }
{ fixnum-vector-rep |[ src rep |
src rep ^unpack-vector-head :> head
src rep ^unpack-vector-tail :> tail
rep widen-vector-rep :> wide-rep
head tail wide-rep ^^add-vector wide-rep
^(sum-vector)
] }
} v-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
[ rep-length 0 pad-tail ] keep {
[ ^^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
[ rep-length 0 pad-tail ] keep {
{ double-2-rep |[ src1 src2 shuffle rep |
shuffle first2 [ 4 mod ] bi@ :> ( i j )
{
{ [ i j [ 2 < ] both? ] [
src1 shuffle rep ^shuffle-vector-imm
] }
{ [ i j [ 2 >= ] both? ] [
src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
] }
{ [ i 2 < ] [
src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
] }
! [ j 2 < ]
[ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
} cond
] }
} vvl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
: ^with-vector ( src rep -- dst )
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
: ^select-vector ( src n rep -- dst )
{
[ ^^select-vector ]
[ [ ^broadcast-vector ] keep ^^vector>scalar ]
} vl-vector-op ;
! intrinsic emitters
: emit-simd-v+ ( node -- )
{
[ ^^add-vector ]
} emit-vv-vector-op ;
: emit-simd-v- ( node -- )
{
[ ^^sub-vector ]
} emit-vv-vector-op ;
: emit-simd-vneg ( node -- )
{
{ float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
{ int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
} emit-v-vector-op ;
: emit-simd-v+- ( node -- )
{
[ ^^add-sub-vector ]
{ float-vector-rep |[ src1 src2 rep |
rep ^load-add-sub-vector :> signs
src2 signs rep ^^xor-vector :> src2'
src1 src2' rep ^^add-vector
] }
{ int-vector-rep |[ src1 src2 rep |
rep ^load-add-sub-vector :> signs
src2 signs rep ^^xor-vector :> src2'
src2' signs rep ^^sub-vector :> src2''
src1 src2'' rep ^^add-vector
] }
} emit-vv-vector-op ;
: emit-simd-vs+ ( node -- )
{
{ float-vector-rep [ ^^add-vector ] }
{ int-vector-rep [ ^^saturated-add-vector ] }
} emit-vv-vector-op ;
: emit-simd-vs- ( node -- )
{
{ float-vector-rep [ ^^sub-vector ] }
{ int-vector-rep [ ^^saturated-sub-vector ] }
} emit-vv-vector-op ;
: emit-simd-vs* ( node -- )
{
{ float-vector-rep [ ^^mul-vector ] }
{ int-vector-rep [ ^^saturated-mul-vector ] }
} emit-vv-vector-op ;
: emit-simd-v* ( node -- )
{
[ ^^mul-vector ]
} emit-vv-vector-op ;
: emit-simd-v*high ( node -- )
{
[ ^^mul-high-vector ]
} emit-vv-vector-op ;
: emit-simd-v*hs+ ( node -- )
{
[ ^^mul-horizontal-add-vector ]
} emit-vv-vector-op ;
: emit-simd-v/ ( node -- )
{
[ ^^div-vector ]
} emit-vv-vector-op ;
: emit-simd-vmin ( node -- )
{
[ ^^min-vector ]
[
[ cc< ^compare-vector ]
[ ^blend-vector ] 3bi
]
} emit-vv-vector-op ;
: emit-simd-vmax ( node -- )
{
[ ^^max-vector ]
[
[ cc> ^compare-vector ]
[ ^blend-vector ] 3bi
]
} emit-vv-vector-op ;
: emit-simd-vavg ( node -- )
{
[ ^^avg-vector ]
{ float-vector-rep |[ src1 src2 rep |
src1 src2 rep ^^add-vector
rep ^load-half-vector rep ^^mul-vector
] }
} emit-vv-vector-op ;
: emit-simd-v. ( node -- )
{
[ ^^dot-vector ]
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
} emit-vv-vector-op ;
: emit-simd-vsad ( node -- )
{
[
[ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
[ widen-vector-rep ^^vector>scalar ] bi
]
} emit-vv-vector-op ;
: emit-simd-vsqrt ( node -- )
{
[ ^^sqrt-vector ]
} emit-v-vector-op ;
: emit-simd-sum ( node -- )
{
[ ^sum-vector ]
} emit-v-vector-op ;
: emit-simd-vabs ( node -- )
{
{ unsigned-int-vector-rep [ drop ] }
[ ^^abs-vector ]
{ float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
{ int-vector-rep |[ src rep |
rep ^^zero-vector :> zero
zero src rep ^^sub-vector :> -src
zero src rep cc> ^compare-vector :> sign
sign -src src rep ^blend-vector
] }
} emit-v-vector-op ;
: emit-simd-vand ( node -- )
{
[ ^^and-vector ]
} emit-vv-vector-op ;
: emit-simd-vandn ( node -- )
{
[ ^^andn-vector ]
} emit-vv-vector-op ;
: emit-simd-vor ( node -- )
{
[ ^^or-vector ]
} emit-vv-vector-op ;
: emit-simd-vxor ( node -- )
{
[ ^^xor-vector ]
} emit-vv-vector-op ;
: emit-simd-vnot ( node -- )
{
[ ^not-vector ]
} emit-v-vector-op ;
: emit-simd-vlshift ( node -- )
{
[ ^^shl-vector ]
} {
[ ^^shl-vector-imm ]
} [ integer? ] emit-vv-or-vl-vector-op ;
: emit-simd-vrshift ( node -- )
{
[ ^^shr-vector ]
} {
[ ^^shr-vector-imm ]
} [ integer? ] emit-vv-or-vl-vector-op ;
: emit-simd-hlshift ( node -- )
{
[ ^^horizontal-shl-vector-imm ]
} [ integer? ] emit-vl-vector-op ;
: emit-simd-hrshift ( node -- )
{
[ ^^horizontal-shr-vector-imm ]
} [ integer? ] emit-vl-vector-op ;
: emit-simd-vshuffle-elements ( node -- )
{
[ ^shuffle-vector-imm ]
} [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle2-elements ( node -- )
{
[ ^shuffle-2-vectors-imm ]
} [ shuffle? ] emit-vvl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- )
{
[ ^^shuffle-vector ]
} emit-vv-vector-op ;
: emit-simd-vmerge-head ( node -- )
{
[ ^^merge-vector-head ]
} emit-vv-vector-op ;
: emit-simd-vmerge-tail ( node -- )
{
[ ^^merge-vector-tail ]
} emit-vv-vector-op ;
: emit-simd-v<= ( node -- )
{
[ cc<= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v< ( node -- )
{
[ cc< ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v= ( node -- )
{
[ cc= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v> ( node -- )
{
[ cc> ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v>= ( node -- )
{
[ cc>= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-vunordered? ( node -- )
{
[ cc/<>= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-vany? ( node -- )
{
[ vcc-any ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-vall? ( node -- )
{
[ vcc-all ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-vnone? ( node -- )
{
[ vcc-none ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-vgetmask ( node -- )
{
[ ^^move-vector-mask ]
} emit-v-vector-op ;
: emit-simd-v>float ( node -- )
{
{ float-vector-rep [ drop ] }
{ int-vector-rep [ ^^integer>float-vector ] }
} emit-v-vector-op ;
: emit-simd-v>integer ( node -- )
{
{ float-vector-rep [ ^^float>integer-vector ] }
{ int-vector-rep [ drop ] }
} emit-v-vector-op ;
: emit-simd-vpack-signed ( node -- )
{
{ double-2-rep |[ src1 src2 rep |
src1 double-2-rep ^^float-pack-vector :> dst-head
src2 double-2-rep ^^float-pack-vector :> dst-tail
dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
] }
{ int-vector-rep [ ^^signed-pack-vector ] }
} emit-vv-vector-op ;
: emit-simd-vpack-unsigned ( node -- )
{
[ ^^unsigned-pack-vector ]
} emit-vv-vector-op ;
: emit-simd-vunpack-head ( node -- )
{
[ ^unpack-vector-head ]
} emit-v-vector-op ;
: emit-simd-vunpack-tail ( node -- )
{
[ ^unpack-vector-tail ]
} emit-v-vector-op ;
: emit-simd-with ( node -- )
{
{ fixnum-vector-rep [ ^with-vector ] }
{ float-vector-rep [ ^with-vector ] }
} emit-v-vector-op ;
: emit-simd-gather-2 ( node -- )
{
{ fixnum-vector-rep [ ^^gather-int-vector-2 ] }
{ fixnum-vector-rep [ ^^gather-vector-2 ] }
{ float-vector-rep [ ^^gather-vector-2 ] }
} emit-vv-vector-op ;
: emit-simd-gather-4 ( node -- )
{
{ fixnum-vector-rep [ ^^gather-int-vector-4 ] }
{ fixnum-vector-rep [ ^^gather-vector-4 ] }
{ float-vector-rep [ ^^gather-vector-4 ] }
} emit-vvvv-vector-op ;
: emit-simd-select ( node -- )
{
{ fixnum-vector-rep [ ^select-vector ] }
{ float-vector-rep [ ^select-vector ] }
} [ integer? ] emit-vl-vector-op ;
: emit-alien-vector ( block node -- block' )
dup [
'[
ds-drop prepare-load-memory
_ f ^^load-memory-imm ds-push
]
[ inline-load-memory? ] inline-accessor
] with { [ %alien-vector-reps member? ] } if-literals-match ;
: emit-set-alien-vector ( block node -- block' )
dup [
'[
ds-drop prepare-store-memory
_ f ##store-memory-imm,
]
[ byte-array inline-store-memory? ] inline-accessor
] with { [ %alien-vector-reps member? ] } if-literals-match ;
: enable-simd ( -- )
{
{ (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v*high) [ emit-simd-v*high ] }
{ (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] }
{ assert-positive [ drop ] }
{ (simd-vgetmask) [ emit-simd-vgetmask ] }
} enable-intrinsics ;
enable-simd

View File

@ -0,0 +1,135 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators compiler.cfg.builder
compiler.tree.propagation.info compiler.tree.propagation.nodes
continuations cpu.architecture fry kernel layouts math
math.intervals math.vectors.simd.intrinsics namespaces sequences
words ;
IN: compiler.tree.propagation.simd
CONSTANT: vector>vector-intrinsics
{
(simd-v+)
(simd-v-)
(simd-vneg)
(simd-v+-)
(simd-vs+)
(simd-vs-)
(simd-vs*)
(simd-v*)
(simd-v*high)
(simd-v*hs+)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-vavg)
(simd-vsqrt)
(simd-vabs)
(simd-vbitand)
(simd-vbitandn)
(simd-vbitor)
(simd-vbitxor)
(simd-vbitnot)
(simd-vand)
(simd-vandn)
(simd-vor)
(simd-vxor)
(simd-vnot)
(simd-vlshift)
(simd-vrshift)
(simd-hlshift)
(simd-hrshift)
(simd-vshuffle-elements)
(simd-vshuffle2-elements)
(simd-vshuffle-bytes)
(simd-vmerge-head)
(simd-vmerge-tail)
(simd-v<=)
(simd-v<)
(simd-v=)
(simd-v>)
(simd-v>=)
(simd-vunordered?)
(simd-v>float)
(simd-v>integer)
(simd-vpack-signed)
(simd-vpack-unsigned)
(simd-vunpack-head)
(simd-vunpack-tail)
(simd-with)
(simd-gather-2)
(simd-gather-4)
alien-vector
}
CONSTANT: vector-other-intrinsics
{
(simd-v.)
(simd-vsad)
(simd-sum)
(simd-vany?)
(simd-vall?)
(simd-vnone?)
(simd-vgetmask)
(simd-select)
set-alien-vector
}
: vector-intrinsics ( -- x )
vector>vector-intrinsics vector-other-intrinsics append ;
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
: scalar-output-class ( rep -- class )
dup literal?>> [
literal>> scalar-rep-of {
{ float-rep [ float ] }
{ double-rep [ float ] }
{ longlong-scalar-rep [ integer ] }
{ ulonglong-scalar-rep [ integer ] }
{ int-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
{ uint-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] }
[ drop fixnum ]
} case
] [ drop real ] if
<class-info> ;
\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop
\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
{
(simd-vany?)
(simd-vall?)
(simd-vnone?)
} [ { boolean } "default-output-classes" set-word-prop ] each
\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop
: clone-with-value-infos ( node -- node' )
clone dup in-d>> extract-value-info >>info ;
: try-intrinsic ( node intrinsic-quot -- ? )
'[
_ clone-with-value-infos
_ with-dummy-cfg-builder
t
] [ drop f ] recover ;
: inline-unless-intrinsic ( word -- )
dup '[
_ swap over "intrinsic" word-prop
"always-inline-simd-intrinsics" get not swap and
! word node intrinsic
[ try-intrinsic [ drop f ] [ def>> ] if ]
[ drop def>> ] if*
]
"custom-inlining" set-word-prop ;
vector-intrinsics [ inline-unless-intrinsic ] each