diff --git a/basis/alien/complex/complex.factor b/basis/alien/complex/complex.factor index 8b2cdc1c72..a12cc8c517 100644 --- a/basis/alien/complex/complex.factor +++ b/basis/alien/complex/complex.factor @@ -1,12 +1,12 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types alien.complex.functor kernel -sequences ; +USING: accessors alien alien.c-types alien.complex.functor +classes.struct kernel math quotations ; +FROM: alien.c-types => float double ; IN: alien.complex -<< -{ "float" "double" } [ dup "complex-" prepend define-complex-type ] each ->> +COMPLEX-TYPE: float complex-float +COMPLEX-TYPE: double complex-double << ! This overrides the fact that small structures are never returned diff --git a/basis/alien/complex/functor/functor.factor b/basis/alien/complex/functor/functor.factor index 141dc0d608..405fff61f9 100644 --- a/basis/alien/complex/functor/functor.factor +++ b/basis/alien/complex/functor/functor.factor @@ -3,20 +3,25 @@ USING: functors2 ; IN: alien.complex.functor -FUNCTOR: define-complex-type ( N: name T: name -- ) [[ +SAME-FUNCTOR: complex-type ( N: existing-word T: name -- ) [[ + USING: alien alien.c-types classes.struct kernel quotations ; + QUALIFIED: math -STRUCT: ${T}-class { real ${N}-type } { imaginary ${N}-type } ; + << + STRUCT: ${T} { real ${N} } { imaginary ${N} } ; -: <${T}> ( z -- alien ) - >rect ${T}-class >c-ptr ; + : <${T}> ( z -- alien ) + math:>rect ${T} >c-ptr ; -: *${T} ( alien -- z ) - T-class memory>struct [ real>> ] [ imaginary>> ] bi rect> ; inline + : *${T} ( alien -- z ) + ${T} memory>struct [ real>> ] [ imaginary>> ] bi math:rect> ; inline -${T}-class lookup-c-type -<${T}> 1quotation >>unboxer-quot -*${T} 1quotation >>boxer-quot -complex >>boxed-class -drop + >> + + \ ${T} lookup-c-type + [ <${T}> ] >>unboxer-quot + [ *${T} ] >>boxer-quot + complex >>boxed-class + drop ]] diff --git a/removed/compiler/cfg/intrinsics/simd/authors.txt b/removed/compiler/cfg/intrinsics/simd/authors.txt new file mode 100644 index 0000000000..1901f27a24 --- /dev/null +++ b/removed/compiler/cfg/intrinsics/simd/authors.txt @@ -0,0 +1 @@ +Slava Pestov diff --git a/removed/compiler/cfg/intrinsics/simd/backend/backend.factor b/removed/compiler/cfg/intrinsics/simd/backend/backend.factor new file mode 100644 index 0000000000..efbfed68c0 --- /dev/null +++ b/removed/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -0,0 +1,163 @@ +! Copyright (C) 2009 Joe Groff. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays classes combinators +compiler.cfg.instructions compiler.cfg.registers +compiler.cfg.stacks compiler.cfg.stacks.local +compiler.tree.propagation.info cpu.architecture fry +generalizations kernel locals macros make math quotations +sequences sequences.generalizations ; +IN: compiler.cfg.intrinsics.simd.backend + +! Selection of implementation based on available CPU instructions + +GENERIC: insn-available? ( ## -- reps ) + +M: object insn-available? drop t ; + +M: ##zero-vector insn-available? rep>> %zero-vector-reps member? ; +M: ##fill-vector insn-available? rep>> %fill-vector-reps member? ; +M: ##gather-vector-2 insn-available? rep>> %gather-vector-2-reps member? ; +M: ##gather-vector-4 insn-available? rep>> %gather-vector-4-reps member? ; +M: ##gather-int-vector-2 insn-available? rep>> %gather-int-vector-2-reps member? ; +M: ##gather-int-vector-4 insn-available? rep>> %gather-int-vector-4-reps member? ; +M: ##select-vector insn-available? rep>> %select-vector-reps member? ; +M: ##store-memory-imm insn-available? rep>> %alien-vector-reps member? ; +M: ##shuffle-vector insn-available? rep>> %shuffle-vector-reps member? ; +M: ##shuffle-vector-imm insn-available? rep>> %shuffle-vector-imm-reps member? ; +M: ##shuffle-vector-halves-imm insn-available? rep>> %shuffle-vector-halves-imm-reps member? ; +M: ##merge-vector-head insn-available? rep>> %merge-vector-reps member? ; +M: ##merge-vector-tail insn-available? rep>> %merge-vector-reps member? ; +M: ##float-pack-vector insn-available? rep>> %float-pack-vector-reps member? ; +M: ##signed-pack-vector insn-available? rep>> %signed-pack-vector-reps member? ; +M: ##unsigned-pack-vector insn-available? rep>> %unsigned-pack-vector-reps member? ; +M: ##unpack-vector-head insn-available? rep>> %unpack-vector-head-reps member? ; +M: ##unpack-vector-tail insn-available? rep>> %unpack-vector-tail-reps member? ; +M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ; +M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ; +M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ; +M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ; +M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ; +M: ##test-vector insn-available? rep>> %test-vector-reps member? ; +M: ##add-vector insn-available? rep>> %add-vector-reps member? ; +M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ; +M: ##add-sub-vector insn-available? rep>> %add-sub-vector-reps member? ; +M: ##sub-vector insn-available? rep>> %sub-vector-reps member? ; +M: ##saturated-sub-vector insn-available? rep>> %saturated-sub-vector-reps member? ; +M: ##mul-vector insn-available? rep>> %mul-vector-reps member? ; +M: ##mul-high-vector insn-available? rep>> %mul-high-vector-reps member? ; +M: ##mul-horizontal-add-vector insn-available? rep>> %mul-horizontal-add-vector-reps member? ; +M: ##saturated-mul-vector insn-available? rep>> %saturated-mul-vector-reps member? ; +M: ##div-vector insn-available? rep>> %div-vector-reps member? ; +M: ##min-vector insn-available? rep>> %min-vector-reps member? ; +M: ##max-vector insn-available? rep>> %max-vector-reps member? ; +M: ##avg-vector insn-available? rep>> %avg-vector-reps member? ; +M: ##dot-vector insn-available? rep>> %dot-vector-reps member? ; +M: ##sad-vector insn-available? rep>> %sad-vector-reps member? ; +M: ##sqrt-vector insn-available? rep>> %sqrt-vector-reps member? ; +M: ##horizontal-add-vector insn-available? rep>> %horizontal-add-vector-reps member? ; +M: ##horizontal-sub-vector insn-available? rep>> %horizontal-sub-vector-reps member? ; +M: ##abs-vector insn-available? rep>> %abs-vector-reps member? ; +M: ##and-vector insn-available? rep>> %and-vector-reps member? ; +M: ##andn-vector insn-available? rep>> %andn-vector-reps member? ; +M: ##or-vector insn-available? rep>> %or-vector-reps member? ; +M: ##xor-vector insn-available? rep>> %xor-vector-reps member? ; +M: ##not-vector insn-available? rep>> %not-vector-reps member? ; +M: ##shl-vector insn-available? rep>> %shl-vector-reps member? ; +M: ##shr-vector insn-available? rep>> %shr-vector-reps member? ; +M: ##shl-vector-imm insn-available? rep>> %shl-vector-imm-reps member? ; +M: ##shr-vector-imm insn-available? rep>> %shr-vector-imm-reps member? ; +M: ##horizontal-shl-vector-imm insn-available? rep>> %horizontal-shl-vector-imm-reps member? ; +M: ##horizontal-shr-vector-imm insn-available? rep>> %horizontal-shr-vector-imm-reps member? ; + +: [vector-op-checked] ( #dup quot -- quot ) + '[ _ ndup [ @ ] { } make dup [ insn-available? ] all? ] ; + +GENERIC#: >vector-op-cond 2 ( quot #pick #dup -- quotpair ) +M:: callable >vector-op-cond ( quot #pick #dup -- quotpair ) + #dup quot [vector-op-checked] '[ 2drop @ ] + #dup '[ % _ nnip ] + 2array ; + +M:: pair >vector-op-cond ( pair #pick #dup -- quotpair ) + pair first2 :> ( class quot ) + #pick class #dup quot [vector-op-checked] + '[ 2drop _ npick _ instance? _ [ f f f ] if ] + #dup '[ % _ nnip ] + 2array ; + +MACRO: v-vector-op ( trials -- quot ) + [ 1 2 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vl-vector-op ( trials -- quot ) + [ 1 3 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vvl-vector-op ( trials -- quot ) + [ 1 4 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vv-vector-op ( trials -- quot ) + [ 1 3 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vv-cc-vector-op ( trials -- quot ) + [ 2 4 >vector-op-cond ] map '[ f f _ cond ] ; +MACRO: vvvv-vector-op ( trials -- quot ) + [ 1 5 >vector-op-cond ] map '[ f f _ cond ] ; + +! Intrinsic code emission + +MACRO: check-elements ( quots -- quot ) + [ length '[ _ firstn ] ] + [ '[ _ spread ] ] + [ length 1 - \ and [ ] like ] + tri 3append ; + +ERROR: bad-simd-intrinsic node ; + +MACRO: if-literals-match ( quots -- quot ) + [ length ] [ ] [ length ] tri + ! n quots n + '[ + ! node quot + [ + dup node-input-infos + _ tail-slice* [ literal>> ] map + dup _ check-elements + ] dip + swap [ + ! node literals quot + [ _ firstn ] dip call + drop + ] [ 2drop bad-simd-intrinsic ] if + ] ; + +CONSTANT: unary [ ds-drop ds-pop ] +CONSTANT: unary/param [ [ -2 inc-stack ds-pop ] dip ] +CONSTANT: binary [ ds-drop 2inputs ] +CONSTANT: binary/param [ [ -2 inc-stack 2inputs ] dip ] +CONSTANT: quaternary + [ + ds-drop + d: 3 peek-loc + d: 2 peek-loc + d: 1 peek-loc + d: 0 peek-loc + -4 inc-stack + ] + +:: emit-vector-op ( trials params-quot op-quot literal-preds -- quot ) + params-quot trials op-quot literal-preds + '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ; + +MACRO: emit-v-vector-op ( trials -- quot ) + unary [ v-vector-op ] { [ representation? ] } emit-vector-op ; +MACRO: emit-vl-vector-op ( trials literal-pred -- quot ) + [ unary/param [ vl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ; +MACRO: emit-vv-vector-op ( trials -- quot ) + binary [ vv-vector-op ] { [ representation? ] } emit-vector-op ; +MACRO: emit-vvl-vector-op ( trials literal-pred -- quot ) + [ binary/param [ vvl-vector-op ] { [ representation? ] } ] dip prefix emit-vector-op ; +MACRO: emit-vvvv-vector-op ( trials -- quot ) + quaternary [ vvvv-vector-op ] { [ representation? ] } emit-vector-op ; + +MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- quot ) + literal-pred imm-trials literal-pred var-trials + '[ + dup node-input-infos 2 tail-slice* first literal>> @ + [ _ _ emit-vl-vector-op ] + [ _ emit-vv-vector-op ] if + ] ; diff --git a/removed/compiler/cfg/intrinsics/simd/simd-docs.factor b/removed/compiler/cfg/intrinsics/simd/simd-docs.factor new file mode 100644 index 0000000000..20ac11f618 --- /dev/null +++ b/removed/compiler/cfg/intrinsics/simd/simd-docs.factor @@ -0,0 +1,8 @@ +USING: compiler.cfg.instructions compiler.tree help.markup help.syntax +math.vectors ; +IN: compiler.cfg.intrinsics.simd + +HELP: emit-simd-v+ +{ $values { "node" node } } +{ $description "Emits instructions for SIMD vector addition." } +{ $see-also ##add-vector v+ } ; diff --git a/removed/compiler/cfg/intrinsics/simd/simd-tests.factor b/removed/compiler/cfg/intrinsics/simd/simd-tests.factor new file mode 100644 index 0000000000..3f3915a170 --- /dev/null +++ b/removed/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -0,0 +1,630 @@ +! Copyright (C) 2009 Joe Groff. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs biassocs byte-arrays classes +compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions +compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend +compiler.cfg.stacks.local compiler.test compiler.tree +compiler.tree.propagation.info cpu.architecture fry kernel locals make +namespaces sequences system tools.test words ; +IN: compiler.cfg.intrinsics.simd.tests + +:: test-node ( rep -- node ) + T{ #call + { in-d { 1 2 3 4 } } + { out-d { 5 } } + { info H{ + { 1 T{ value-info-state { class byte-array } } } + { 2 T{ value-info-state { class byte-array } } } + { 3 T{ value-info-state { class byte-array } } } + { 4 T{ value-info-state { class word } { literal? t } { literal rep } } } + { 5 T{ value-info-state { class byte-array } } } + } } + } ; + +:: test-node-literal ( lit rep -- node ) + lit class-of :> lit-class + T{ #call + { in-d { 1 2 3 4 } } + { out-d { 5 } } + { info H{ + { 1 T{ value-info-state { class byte-array } } } + { 2 T{ value-info-state { class byte-array } } } + { 3 T{ value-info-state { class lit-class } { literal? t } { literal lit } } } + { 4 T{ value-info-state { class word } { literal? t } { literal rep } } } + { 5 T{ value-info-state { class byte-array } } } + } } + } ; + +: test-node-nonliteral-rep ( -- node ) + T{ #call + { in-d { 1 2 3 4 } } + { out-d { 5 } } + { info H{ + { 1 T{ value-info-state { class byte-array } } } + { 2 T{ value-info-state { class byte-array } } } + { 3 T{ value-info-state { class byte-array } } } + { 4 T{ value-info-state { class object } } } + { 5 T{ value-info-state { class byte-array } } } + } } + } ; + +: test-compiler-env ( -- x ) + H{ } clone + T{ basic-block } 0 0 0 0 height-state boa >>height + \ basic-block pick set-at + + 0 0 0 0 height-state boa \ height-state pick set-at + HS{ } clone \ local-peek-set pick set-at + H{ } clone \ replaces pick set-at + H{ } \ locs>vregs pick set-at ; + +: make-classes ( quot -- seq ) + { } make [ class-of ] map ; inline + +: test-emit ( cpu rep quot -- node ) + [ + [ new \ cpu ] 2dip '[ + test-compiler-env [ _ test-node @ ] with-variables + ] with-variable + ] make-classes ; inline + +: test-emit-literal ( cpu lit rep quot -- node ) + [ + [ new \ cpu ] 3dip '[ + test-compiler-env [ _ _ test-node-literal @ ] with-variables + ] with-variable + ] make-classes ; inline + +: test-emit-nonliteral-rep ( cpu quot -- node ) + [ + [ new \ cpu ] dip '[ + test-compiler-env [ test-node-nonliteral-rep @ ] with-variables + ] with-variable + ] make-classes ; inline + +CONSTANT: signed-reps + { char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep } +CONSTANT: all-reps + { + char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep + uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep + } + +TUPLE: scalar-cpu ; + +TUPLE: simple-ops-cpu ; +M: simple-ops-cpu %zero-vector-reps all-reps ; +M: simple-ops-cpu %fill-vector-reps all-reps ; +M: simple-ops-cpu %add-vector-reps all-reps ; +M: simple-ops-cpu %sub-vector-reps all-reps ; +M: simple-ops-cpu %mul-vector-reps all-reps ; +M: simple-ops-cpu %div-vector-reps all-reps ; +M: simple-ops-cpu %andn-vector-reps all-reps ; +M: simple-ops-cpu %and-vector-reps all-reps ; +M: simple-ops-cpu %or-vector-reps all-reps ; +M: simple-ops-cpu %xor-vector-reps all-reps ; +M: simple-ops-cpu %merge-vector-reps all-reps ; +M: simple-ops-cpu %sqrt-vector-reps all-reps ; +M: simple-ops-cpu %move-vector-mask-reps all-reps ; +M: simple-ops-cpu %test-vector-reps all-reps ; +M: simple-ops-cpu %signed-pack-vector-reps all-reps ; +M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ; +M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ; +M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ; +M: simple-ops-cpu %alien-vector-reps all-reps ; + +! v+ +{ { ##add-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ] +unit-test + +! v- +{ { ##sub-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ] +unit-test + +! vneg +{ { ##load-reference ##sub-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ] +unit-test + +{ { ##zero-vector ##sub-vector } } +[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ] +unit-test + +! v* +{ { ##mul-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ] +unit-test + +! v/ +{ { ##div-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ] +unit-test + +TUPLE: addsub-cpu < simple-ops-cpu ; +M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ; + +! v+- +{ { ##add-sub-vector } } +[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ] +unit-test + +{ { ##load-reference ##xor-vector ##add-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ] +unit-test + +{ { ##load-reference ##xor-vector ##sub-vector ##add-vector } } +[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ] +unit-test + +TUPLE: saturating-cpu < simple-ops-cpu ; +M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ; +M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ; +M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ; + +! vs+ +{ { ##add-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +{ { ##add-vector } } +[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +{ { ##saturated-add-vector } } +[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +! vs- +{ { ##sub-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +{ { ##sub-vector } } +[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +{ { ##saturated-sub-vector } } +[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +! vs* +{ { ##mul-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +{ { ##mul-vector } } +[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +{ { ##saturated-mul-vector } } +[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +TUPLE: minmax-cpu < simple-ops-cpu ; +M: minmax-cpu %min-vector-reps signed-reps ; +M: minmax-cpu %max-vector-reps signed-reps ; +M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ; +M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ; + +TUPLE: compare-cpu < simple-ops-cpu ; +M: compare-cpu %compare-vector-reps drop signed-reps ; +M: compare-cpu %compare-vector-ccs nip f 2array 1array f ; + +! vmin +{ { ##min-vector } } +[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ] +unit-test + +{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } } +[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ] +unit-test + +! vmax +{ { ##max-vector } } +[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ] +unit-test + +{ { ##compare-vector ##and-vector ##andn-vector ##or-vector } } +[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ] +unit-test + +TUPLE: dot-cpu < simple-ops-cpu ; +M: dot-cpu %dot-vector-reps { float-4-rep } ; + +TUPLE: horizontal-cpu < simple-ops-cpu ; +M: horizontal-cpu %horizontal-add-vector-reps signed-reps ; +M: horizontal-cpu %unpack-vector-head-reps signed-reps ; +M: horizontal-cpu %unpack-vector-tail-reps signed-reps ; + +! v. +{ { ##dot-vector } } +[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +{ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } } +[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +{ { + ##mul-vector + ##merge-vector-head ##merge-vector-tail ##add-vector + ##merge-vector-head ##merge-vector-tail ##add-vector + ##vector>scalar +} } +[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +! vsqrt +{ { ##sqrt-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ] +unit-test + +! sum +{ { ##horizontal-add-vector ##vector>scalar } } +[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ] +unit-test + +{ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } } +[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ] +unit-test + +{ { + ##unpack-vector-head ##unpack-vector-tail ##add-vector + ##horizontal-add-vector ##horizontal-add-vector + ##vector>scalar +} } +[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ] +unit-test + +{ { + ##unpack-vector-head ##unpack-vector-tail ##add-vector + ##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector + ##vector>scalar +} } +[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ] +unit-test + +TUPLE: abs-cpu < simple-ops-cpu ; +M: abs-cpu %abs-vector-reps signed-reps ; + +! vabs +{ { } } +[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +{ { ##abs-vector } } +[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +{ { ##load-reference ##andn-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +{ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } } +[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +! vand +{ { ##and-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ] +unit-test + +! vandn +{ { ##andn-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ] +unit-test + +! vor +{ { ##or-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ] +unit-test + +! vxor +{ { ##xor-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ] +unit-test + +TUPLE: not-cpu < simple-ops-cpu ; +M: not-cpu %not-vector-reps signed-reps ; + +! vnot +{ { ##not-vector } } +[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ] +unit-test + +{ { ##fill-vector ##xor-vector } } +[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ] +unit-test + +TUPLE: shift-cpu < simple-ops-cpu ; +M: shift-cpu %shl-vector-reps signed-reps ; +M: shift-cpu %shr-vector-reps signed-reps ; + +TUPLE: shift-imm-cpu < simple-ops-cpu ; +M: shift-imm-cpu %shl-vector-imm-reps signed-reps ; +M: shift-imm-cpu %shr-vector-imm-reps signed-reps ; + +TUPLE: horizontal-shift-cpu < simple-ops-cpu ; +M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ; +M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ; + +! vlshift +{ { ##shl-vector-imm } } +[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ] +unit-test + +{ { ##shl-vector } } +[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ] +unit-test + +! vrshift +{ { ##shr-vector-imm } } +[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ] +unit-test + +{ { ##shr-vector } } +[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ] +unit-test + +! hlshift +{ { ##horizontal-shl-vector-imm } } +[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ] +unit-test + +! hrshift +{ { ##horizontal-shr-vector-imm } } +[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ] +unit-test + +TUPLE: shuffle-imm-cpu < simple-ops-cpu ; +M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ; + +TUPLE: shuffle-cpu < simple-ops-cpu ; +M: shuffle-cpu %shuffle-vector-reps signed-reps ; + +! vshuffle-elements +{ { ##load-reference ##shuffle-vector } } +[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ] +unit-test + +{ { ##shuffle-vector-imm } } +[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ] +unit-test + +! vshuffle-bytes +{ { ##shuffle-vector } } +[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ] +unit-test + +! vmerge-head +{ { ##merge-vector-head } } +[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ] +unit-test + +! vmerge-tail +{ { ##merge-vector-tail } } +[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ] +unit-test + +! v<= etc. +{ { ##compare-vector } } +[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +{ { ##min-vector ##compare-vector } } +[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +{ { ##load-reference ##xor-vector ##xor-vector ##compare-vector } } +[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +! vany? etc. +{ { ##test-vector } } +[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ] +unit-test + +TUPLE: convert-cpu < simple-ops-cpu ; +M: convert-cpu %integer>float-vector-reps { int-4-rep } ; +M: convert-cpu %float>integer-vector-reps { float-4-rep } ; + +! v>float +{ { } } +[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ] +unit-test + +{ { ##integer>float-vector } } +[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ] +unit-test + +! v>integer +{ { } } +[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ] +unit-test + +{ { ##float>integer-vector } } +[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ] +unit-test + +! vpack-signed +{ { ##signed-pack-vector } } +[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ] +unit-test + +! vpack-unsigned +{ { ##unsigned-pack-vector } } +[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ] +unit-test + +TUPLE: unpack-head-cpu < simple-ops-cpu ; +M: unpack-head-cpu %unpack-vector-head-reps all-reps ; +TUPLE: unpack-cpu < unpack-head-cpu ; +M: unpack-cpu %unpack-vector-tail-reps all-reps ; + +! vunpack-head +{ { ##unpack-vector-head } } +[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +{ { ##zero-vector ##merge-vector-head } } +[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +{ { ##merge-vector-head ##shr-vector-imm } } +[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +{ { ##zero-vector ##compare-vector ##merge-vector-head } } +[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +! vunpack-tail +{ { ##unpack-vector-tail } } +[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +{ { ##tail>head-vector ##unpack-vector-head } } +[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +{ { ##zero-vector ##merge-vector-tail } } +[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +{ { ##merge-vector-tail ##shr-vector-imm } } +[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +{ { ##zero-vector ##compare-vector ##merge-vector-tail } } +[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +! with +{ { ##scalar>vector ##shuffle-vector-imm } } +[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ] +unit-test + +! gather-2 +{ { ##gather-vector-2 } } +[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ] +unit-test + +! gather-4 +{ { ##gather-vector-4 } } +[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ] +unit-test + +! select +{ { ##shuffle-vector-imm ##vector>scalar } } +[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ] +unit-test + +! ^load-neg-zero-vector +{ + V{ + T{ ##load-reference + { dst 1 } + { obj B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 } } + } + T{ ##load-reference + { dst 2 } + { obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 } } + } + } +} [ + [ + { float-4-rep double-2-rep } [ ^load-neg-zero-vector drop ] each + ] V{ } make +] cfg-unit-test + +! ^load-add-sub-vector +{ + V{ + T{ ##load-reference + { dst 1 } + { obj B{ 0 0 0 128 0 0 0 0 0 0 0 128 0 0 0 0 } } + } + T{ ##load-reference + { dst 2 } + { obj B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 0 } } + } + T{ ##load-reference + { dst 3 } + { obj + B{ 255 0 255 0 255 0 255 0 255 0 255 0 255 0 255 0 } + } + } + T{ ##load-reference + { dst 4 } + { obj + B{ 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 } + } + } + T{ ##load-reference + { dst 5 } + { obj + B{ 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 } + } + } + T{ ##load-reference + { dst 6 } + { obj + B{ 255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0 } + } + } + } +} [ + [ + { + float-4-rep + double-2-rep + char-16-rep + short-8-rep + int-4-rep + longlong-2-rep + } [ ^load-add-sub-vector drop ] each + ] V{ } make +] cfg-unit-test + +! ^load-half-vector +{ + V{ + T{ ##load-reference + { dst 1 } + { obj B{ 0 0 0 63 0 0 0 63 0 0 0 63 0 0 0 63 } } + } + T{ ##load-reference + { dst 2 } + { obj B{ 0 0 0 0 0 0 224 63 0 0 0 0 0 0 224 63 } } + } + } +} [ + [ + { float-4-rep double-2-rep } [ ^load-half-vector drop ] each + ] V{ } make +] cfg-unit-test + +! sign-bit-mask +{ + { + B{ 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 } + B{ 0 128 0 128 0 128 0 128 0 128 0 128 0 128 0 128 } + B{ 0 0 0 128 0 0 0 128 0 0 0 128 0 0 0 128 } + B{ 0 0 0 0 0 0 0 128 0 0 0 0 0 0 0 128 } + } +} [ + { char-16-rep short-8-rep int-4-rep longlong-2-rep } [ sign-bit-mask ] map +] unit-test + + +! test with nonliteral/invalid reps +[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ] +[ bad-simd-intrinsic? ] must-fail-with + +[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ] +[ bad-simd-intrinsic? ] must-fail-with + +[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ] +[ bad-simd-intrinsic? ] must-fail-with diff --git a/removed/compiler/cfg/intrinsics/simd/simd.factor b/removed/compiler/cfg/intrinsics/simd/simd.factor new file mode 100644 index 0000000000..2d285b92d4 --- /dev/null +++ b/removed/compiler/cfg/intrinsics/simd/simd.factor @@ -0,0 +1,719 @@ +! Copyright (C) 2009 Slava Pestov, Joe Groff. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien.c-types arrays assocs byte-arrays combinators +combinators.short-circuit compiler.cfg.comparisons +compiler.cfg.hats compiler.cfg.instructions +compiler.cfg.intrinsics compiler.cfg.intrinsics.alien +compiler.cfg.intrinsics.simd.backend compiler.cfg.stacks +cpu.architecture fry kernel layouts locals math math.vectors +math.vectors.simd.intrinsics sequences specialized-arrays ; +FROM: alien.c-types => heap-size char short int longlong float double ; +SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ; +IN: compiler.cfg.intrinsics.simd + +! compound vector ops + +CONSTANT: rep>bit-mask { + { + char-16-rep uchar-array{ + 0x80 0x80 0x80 0x80 + 0x80 0x80 0x80 0x80 + 0x80 0x80 0x80 0x80 + 0x80 0x80 0x80 0x80 + } + } + { + short-8-rep ushort-array{ + 0x8000 0x8000 0x8000 0x8000 + 0x8000 0x8000 0x8000 0x8000 + } + } + { + int-4-rep uint-array{ + 0x8000,0000 0x8000,0000 + 0x8000,0000 0x8000,0000 + } + } + { + longlong-2-rep ulonglong-array{ + 0x8000,0000,0000,0000 + 0x8000,0000,0000,0000 + } + } +} + +: sign-bit-mask ( rep -- byte-array ) + signed-rep rep>bit-mask at underlying>> ; + +CONSTANT: rep>neg-zero { + { float-4-rep float-array{ -0.0 -0.0 -0.0 -0.0 } } + { double-2-rep double-array{ -0.0 -0.0 } } +} + +: ^load-neg-zero-vector ( rep -- dst ) + rep>neg-zero at underlying>> ^^load-literal ; + +CONSTANT: rep>add-sub { + { float-4-rep float-array{ -0.0 0.0 -0.0 0.0 } } + { double-2-rep double-array{ -0.0 0.0 } } + { char-16-rep char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } } + { short-8-rep short-array{ -1 0 -1 0 -1 0 -1 0 } } + { int-4-rep int-array{ -1 0 -1 0 } } + { longlong-2-rep longlong-array{ -1 0 } } +} + +: ^load-add-sub-vector ( rep -- dst ) + signed-rep rep>add-sub at underlying>> ^^load-literal ; + +CONSTANT: rep>half { + { float-4-rep float-array{ 0.5 0.5 0.5 0.5 } } + { double-2-rep double-array{ 0.5 0.5 } } +} + +: ^load-half-vector ( rep -- dst ) + rep>half at underlying>> ^^load-literal ; + +: >variable-shuffle ( shuffle rep -- shuffle' ) + rep-component-type heap-size + [ dup >byte-array ] + [ >byte-array ] bi + '[ _ n*v _ v+ ] map concat ; + +: ^load-immediate-shuffle ( shuffle rep -- dst ) + >variable-shuffle ^^load-literal ; + +:: ^blend-vector ( mask true false rep -- dst ) + true mask rep ^^and-vector + mask false rep ^^andn-vector + rep ^^or-vector ; + +: ^not-vector ( src rep -- dst ) + { + [ ^^not-vector ] + [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + } v-vector-op ; + +:: ^swap-compare-vector ( src1 src2 rep {cc,swap} -- dst ) + {cc,swap} first2 :> ( cc swap? ) + swap? + [ src2 src1 rep cc ^^compare-vector ] + [ src1 src2 rep cc ^^compare-vector ] if ; + +:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst ) + rep orig-cc %compare-vector-ccs :> ( ccs not? ) + + ccs empty? + [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] + [ + ccs unclip :> ( rest-ccs first-cc ) + src1 src2 rep first-cc ^swap-compare-vector :> first-dst + + rest-ccs first-dst + [ [ src1 src2 rep ] dip ^swap-compare-vector rep ^^or-vector ] + reduce + + not? [ rep ^not-vector ] when + ] if ; + +:: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) + cc order-cc { + { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] } + { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] } + { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] } + { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] } + } case ; + +: ^compare-vector ( src1 src2 rep cc -- dst ) + { + [ ^(compare-vector) ] + [ ^minmax-compare-vector ] + { unsigned-int-vector-rep |[ src1 src2 rep cc | + rep sign-bit-mask ^^load-literal :> sign-bits + src1 sign-bits rep ^^xor-vector + src2 sign-bits rep ^^xor-vector + rep signed-rep cc ^(compare-vector) + ] } + } vv-cc-vector-op ; + +: ^unpack-vector-head ( src rep -- dst ) + { + [ ^^unpack-vector-head ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] } + { signed-int-vector-rep |[ src rep | + src src rep ^^merge-vector-head :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep widen-vector-rep ^^shr-vector-imm + ] } + { signed-int-vector-rep |[ src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-head + ] } + } v-vector-op ; + +: ^unpack-vector-tail ( src rep -- dst ) + { + [ ^^unpack-vector-tail ] + [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] } + { signed-int-vector-rep |[ src rep | + src src rep ^^merge-vector-tail :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep widen-vector-rep ^^shr-vector-imm + ] } + { signed-int-vector-rep |[ src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-tail + ] } + } v-vector-op ; + +PREDICATE: fixnum-vector-rep < int-vector-rep + rep-component-type heap-size cell < ; + +: ^(sum-vector-2) ( src rep -- dst ) + { + [ dupd ^^horizontal-add-vector ] + |[ src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-vector-4) ( src rep -- dst ) + { + [ + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] bi + ] + |[ src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-vector-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-vector-8) ( src rep -- dst ) + { + [ + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] tri + ] + |[ src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-vector-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector :> src'' + + rep' widen-vector-rep :> rep'' + src'' src'' rep'' ^^merge-vector-head :> head'' + src'' src'' rep'' ^^merge-vector-tail :> tail'' + head'' tail'' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-vector-16) ( src rep -- dst ) + { + [ + { + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + } cleave + ] + |[ src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-vector-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector :> src'' + + rep' widen-vector-rep :> rep'' + src'' src'' rep'' ^^merge-vector-head :> head'' + src'' src'' rep'' ^^merge-vector-tail :> tail'' + head'' tail'' rep ^^add-vector :> src''' + + rep'' widen-vector-rep :> rep''' + src''' src''' rep''' ^^merge-vector-head :> head''' + src''' src''' rep''' ^^merge-vector-tail :> tail''' + head''' tail''' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-vector) ( src rep -- dst ) + [ + dup rep-length { + { 2 [ ^(sum-vector-2) ] } + { 4 [ ^(sum-vector-4) ] } + { 8 [ ^(sum-vector-8) ] } + { 16 [ ^(sum-vector-16) ] } + } case + ] [ ^^vector>scalar ] bi ; + +: ^sum-vector ( src rep -- dst ) + { + { float-vector-rep [ ^(sum-vector) ] } + { fixnum-vector-rep |[ src rep | + src rep ^unpack-vector-head :> head + src rep ^unpack-vector-tail :> tail + rep widen-vector-rep :> wide-rep + head tail wide-rep ^^add-vector wide-rep + ^(sum-vector) + ] } + } v-vector-op ; + +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + +: ^shuffle-vector-imm ( src1 shuffle rep -- dst ) + [ rep-length 0 pad-tail ] keep { + [ ^^shuffle-vector-imm ] + [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ] + } vl-vector-op ; + +: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst ) + [ rep-length 0 pad-tail ] keep { + { double-2-rep |[ src1 src2 shuffle rep | + shuffle first2 [ 4 mod ] bi@ :> ( i j ) + { + { [ i j [ 2 < ] both? ] [ + src1 shuffle rep ^shuffle-vector-imm + ] } + { [ i j [ 2 >= ] both? ] [ + src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm + ] } + { [ i 2 < ] [ + src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm + ] } + ! [ j 2 < ] + [ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ] + } cond + ] } + } vvl-vector-op ; + +: ^broadcast-vector ( src n rep -- dst ) + [ rep-length swap ] keep + ^shuffle-vector-imm ; + +: ^with-vector ( src rep -- dst ) + [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ; + +: ^select-vector ( src n rep -- dst ) + { + [ ^^select-vector ] + [ [ ^broadcast-vector ] keep ^^vector>scalar ] + } vl-vector-op ; + +! intrinsic emitters + +: emit-simd-v+ ( node -- ) + { + [ ^^add-vector ] + } emit-vv-vector-op ; + +: emit-simd-v- ( node -- ) + { + [ ^^sub-vector ] + } emit-vv-vector-op ; + +: emit-simd-vneg ( node -- ) + { + { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] } + { int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] } + } emit-v-vector-op ; + +: emit-simd-v+- ( node -- ) + { + [ ^^add-sub-vector ] + { float-vector-rep |[ src1 src2 rep | + rep ^load-add-sub-vector :> signs + src2 signs rep ^^xor-vector :> src2' + src1 src2' rep ^^add-vector + ] } + { int-vector-rep |[ src1 src2 rep | + rep ^load-add-sub-vector :> signs + src2 signs rep ^^xor-vector :> src2' + src2' signs rep ^^sub-vector :> src2'' + src1 src2'' rep ^^add-vector + ] } + } emit-vv-vector-op ; + +: emit-simd-vs+ ( node -- ) + { + { float-vector-rep [ ^^add-vector ] } + { int-vector-rep [ ^^saturated-add-vector ] } + } emit-vv-vector-op ; + +: emit-simd-vs- ( node -- ) + { + { float-vector-rep [ ^^sub-vector ] } + { int-vector-rep [ ^^saturated-sub-vector ] } + } emit-vv-vector-op ; + +: emit-simd-vs* ( node -- ) + { + { float-vector-rep [ ^^mul-vector ] } + { int-vector-rep [ ^^saturated-mul-vector ] } + } emit-vv-vector-op ; + +: emit-simd-v* ( node -- ) + { + [ ^^mul-vector ] + } emit-vv-vector-op ; + +: emit-simd-v*high ( node -- ) + { + [ ^^mul-high-vector ] + } emit-vv-vector-op ; + +: emit-simd-v*hs+ ( node -- ) + { + [ ^^mul-horizontal-add-vector ] + } emit-vv-vector-op ; + +: emit-simd-v/ ( node -- ) + { + [ ^^div-vector ] + } emit-vv-vector-op ; + +: emit-simd-vmin ( node -- ) + { + [ ^^min-vector ] + [ + [ cc< ^compare-vector ] + [ ^blend-vector ] 3bi + ] + } emit-vv-vector-op ; + +: emit-simd-vmax ( node -- ) + { + [ ^^max-vector ] + [ + [ cc> ^compare-vector ] + [ ^blend-vector ] 3bi + ] + } emit-vv-vector-op ; + +: emit-simd-vavg ( node -- ) + { + [ ^^avg-vector ] + { float-vector-rep |[ src1 src2 rep | + src1 src2 rep ^^add-vector + rep ^load-half-vector rep ^^mul-vector + ] } + } emit-vv-vector-op ; + +: emit-simd-v. ( node -- ) + { + [ ^^dot-vector ] + { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] } + } emit-vv-vector-op ; + +: emit-simd-vsad ( node -- ) + { + [ + [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ] + [ widen-vector-rep ^^vector>scalar ] bi + ] + } emit-vv-vector-op ; + +: emit-simd-vsqrt ( node -- ) + { + [ ^^sqrt-vector ] + } emit-v-vector-op ; + +: emit-simd-sum ( node -- ) + { + [ ^sum-vector ] + } emit-v-vector-op ; + +: emit-simd-vabs ( node -- ) + { + { unsigned-int-vector-rep [ drop ] } + [ ^^abs-vector ] + { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] } + { int-vector-rep |[ src rep | + rep ^^zero-vector :> zero + zero src rep ^^sub-vector :> -src + zero src rep cc> ^compare-vector :> sign + sign -src src rep ^blend-vector + ] } + } emit-v-vector-op ; + +: emit-simd-vand ( node -- ) + { + [ ^^and-vector ] + } emit-vv-vector-op ; + +: emit-simd-vandn ( node -- ) + { + [ ^^andn-vector ] + } emit-vv-vector-op ; + +: emit-simd-vor ( node -- ) + { + [ ^^or-vector ] + } emit-vv-vector-op ; + +: emit-simd-vxor ( node -- ) + { + [ ^^xor-vector ] + } emit-vv-vector-op ; + +: emit-simd-vnot ( node -- ) + { + [ ^not-vector ] + } emit-v-vector-op ; + +: emit-simd-vlshift ( node -- ) + { + [ ^^shl-vector ] + } { + [ ^^shl-vector-imm ] + } [ integer? ] emit-vv-or-vl-vector-op ; + +: emit-simd-vrshift ( node -- ) + { + [ ^^shr-vector ] + } { + [ ^^shr-vector-imm ] + } [ integer? ] emit-vv-or-vl-vector-op ; + +: emit-simd-hlshift ( node -- ) + { + [ ^^horizontal-shl-vector-imm ] + } [ integer? ] emit-vl-vector-op ; + +: emit-simd-hrshift ( node -- ) + { + [ ^^horizontal-shr-vector-imm ] + } [ integer? ] emit-vl-vector-op ; + +: emit-simd-vshuffle-elements ( node -- ) + { + [ ^shuffle-vector-imm ] + } [ shuffle? ] emit-vl-vector-op ; + +: emit-simd-vshuffle2-elements ( node -- ) + { + [ ^shuffle-2-vectors-imm ] + } [ shuffle? ] emit-vvl-vector-op ; + +: emit-simd-vshuffle-bytes ( node -- ) + { + [ ^^shuffle-vector ] + } emit-vv-vector-op ; + +: emit-simd-vmerge-head ( node -- ) + { + [ ^^merge-vector-head ] + } emit-vv-vector-op ; + +: emit-simd-vmerge-tail ( node -- ) + { + [ ^^merge-vector-tail ] + } emit-vv-vector-op ; + +: emit-simd-v<= ( node -- ) + { + [ cc<= ^compare-vector ] + } emit-vv-vector-op ; +: emit-simd-v< ( node -- ) + { + [ cc< ^compare-vector ] + } emit-vv-vector-op ; +: emit-simd-v= ( node -- ) + { + [ cc= ^compare-vector ] + } emit-vv-vector-op ; +: emit-simd-v> ( node -- ) + { + [ cc> ^compare-vector ] + } emit-vv-vector-op ; +: emit-simd-v>= ( node -- ) + { + [ cc>= ^compare-vector ] + } emit-vv-vector-op ; +: emit-simd-vunordered? ( node -- ) + { + [ cc/<>= ^compare-vector ] + } emit-vv-vector-op ; + +: emit-simd-vany? ( node -- ) + { + [ vcc-any ^^test-vector ] + } emit-v-vector-op ; +: emit-simd-vall? ( node -- ) + { + [ vcc-all ^^test-vector ] + } emit-v-vector-op ; +: emit-simd-vnone? ( node -- ) + { + [ vcc-none ^^test-vector ] + } emit-v-vector-op ; +: emit-simd-vgetmask ( node -- ) + { + [ ^^move-vector-mask ] + } emit-v-vector-op ; + +: emit-simd-v>float ( node -- ) + { + { float-vector-rep [ drop ] } + { int-vector-rep [ ^^integer>float-vector ] } + } emit-v-vector-op ; + +: emit-simd-v>integer ( node -- ) + { + { float-vector-rep [ ^^float>integer-vector ] } + { int-vector-rep [ drop ] } + } emit-v-vector-op ; + +: emit-simd-vpack-signed ( node -- ) + { + { double-2-rep |[ src1 src2 rep | + src1 double-2-rep ^^float-pack-vector :> dst-head + src2 double-2-rep ^^float-pack-vector :> dst-tail + dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm + ] } + { int-vector-rep [ ^^signed-pack-vector ] } + } emit-vv-vector-op ; + +: emit-simd-vpack-unsigned ( node -- ) + { + [ ^^unsigned-pack-vector ] + } emit-vv-vector-op ; + +: emit-simd-vunpack-head ( node -- ) + { + [ ^unpack-vector-head ] + } emit-v-vector-op ; + +: emit-simd-vunpack-tail ( node -- ) + { + [ ^unpack-vector-tail ] + } emit-v-vector-op ; + +: emit-simd-with ( node -- ) + { + { fixnum-vector-rep [ ^with-vector ] } + { float-vector-rep [ ^with-vector ] } + } emit-v-vector-op ; + +: emit-simd-gather-2 ( node -- ) + { + { fixnum-vector-rep [ ^^gather-int-vector-2 ] } + { fixnum-vector-rep [ ^^gather-vector-2 ] } + { float-vector-rep [ ^^gather-vector-2 ] } + } emit-vv-vector-op ; + +: emit-simd-gather-4 ( node -- ) + { + { fixnum-vector-rep [ ^^gather-int-vector-4 ] } + { fixnum-vector-rep [ ^^gather-vector-4 ] } + { float-vector-rep [ ^^gather-vector-4 ] } + } emit-vvvv-vector-op ; + +: emit-simd-select ( node -- ) + { + { fixnum-vector-rep [ ^select-vector ] } + { float-vector-rep [ ^select-vector ] } + } [ integer? ] emit-vl-vector-op ; + +: emit-alien-vector ( block node -- block' ) + dup [ + '[ + ds-drop prepare-load-memory + _ f ^^load-memory-imm ds-push + ] + [ inline-load-memory? ] inline-accessor + ] with { [ %alien-vector-reps member? ] } if-literals-match ; + +: emit-set-alien-vector ( block node -- block' ) + dup [ + '[ + ds-drop prepare-store-memory + _ f ##store-memory-imm, + ] + [ byte-array inline-store-memory? ] inline-accessor + ] with { [ %alien-vector-reps member? ] } if-literals-match ; + +: enable-simd ( -- ) + { + { (simd-v+) [ emit-simd-v+ ] } + { (simd-v-) [ emit-simd-v- ] } + { (simd-vneg) [ emit-simd-vneg ] } + { (simd-v+-) [ emit-simd-v+- ] } + { (simd-vs+) [ emit-simd-vs+ ] } + { (simd-vs-) [ emit-simd-vs- ] } + { (simd-vs*) [ emit-simd-vs* ] } + { (simd-v*) [ emit-simd-v* ] } + { (simd-v*high) [ emit-simd-v*high ] } + { (simd-v*hs+) [ emit-simd-v*hs+ ] } + { (simd-v/) [ emit-simd-v/ ] } + { (simd-vmin) [ emit-simd-vmin ] } + { (simd-vmax) [ emit-simd-vmax ] } + { (simd-vavg) [ emit-simd-vavg ] } + { (simd-v.) [ emit-simd-v. ] } + { (simd-vsad) [ emit-simd-vsad ] } + { (simd-vsqrt) [ emit-simd-vsqrt ] } + { (simd-sum) [ emit-simd-sum ] } + { (simd-vabs) [ emit-simd-vabs ] } + { (simd-vbitand) [ emit-simd-vand ] } + { (simd-vbitandn) [ emit-simd-vandn ] } + { (simd-vbitor) [ emit-simd-vor ] } + { (simd-vbitxor) [ emit-simd-vxor ] } + { (simd-vbitnot) [ emit-simd-vnot ] } + { (simd-vand) [ emit-simd-vand ] } + { (simd-vandn) [ emit-simd-vandn ] } + { (simd-vor) [ emit-simd-vor ] } + { (simd-vxor) [ emit-simd-vxor ] } + { (simd-vnot) [ emit-simd-vnot ] } + { (simd-vlshift) [ emit-simd-vlshift ] } + { (simd-vrshift) [ emit-simd-vrshift ] } + { (simd-hlshift) [ emit-simd-hlshift ] } + { (simd-hrshift) [ emit-simd-hrshift ] } + { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } + { (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] } + { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } + { (simd-vmerge-head) [ emit-simd-vmerge-head ] } + { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } + { (simd-v<=) [ emit-simd-v<= ] } + { (simd-v<) [ emit-simd-v< ] } + { (simd-v=) [ emit-simd-v= ] } + { (simd-v>) [ emit-simd-v> ] } + { (simd-v>=) [ emit-simd-v>= ] } + { (simd-vunordered?) [ emit-simd-vunordered? ] } + { (simd-vany?) [ emit-simd-vany? ] } + { (simd-vall?) [ emit-simd-vall? ] } + { (simd-vnone?) [ emit-simd-vnone? ] } + { (simd-v>float) [ emit-simd-v>float ] } + { (simd-v>integer) [ emit-simd-v>integer ] } + { (simd-vpack-signed) [ emit-simd-vpack-signed ] } + { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } + { (simd-vunpack-head) [ emit-simd-vunpack-head ] } + { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } + { (simd-with) [ emit-simd-with ] } + { (simd-gather-2) [ emit-simd-gather-2 ] } + { (simd-gather-4) [ emit-simd-gather-4 ] } + { (simd-select) [ emit-simd-select ] } + { alien-vector [ emit-alien-vector ] } + { set-alien-vector [ emit-set-alien-vector ] } + { assert-positive [ drop ] } + { (simd-vgetmask) [ emit-simd-vgetmask ] } + } enable-intrinsics ; + +enable-simd diff --git a/removed/compiler/tree/propagation/simd/simd.factor b/removed/compiler/tree/propagation/simd/simd.factor new file mode 100644 index 0000000000..dc4b852017 --- /dev/null +++ b/removed/compiler/tree/propagation/simd/simd.factor @@ -0,0 +1,135 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors byte-arrays combinators compiler.cfg.builder +compiler.tree.propagation.info compiler.tree.propagation.nodes +continuations cpu.architecture fry kernel layouts math +math.intervals math.vectors.simd.intrinsics namespaces sequences +words ; +IN: compiler.tree.propagation.simd + +CONSTANT: vector>vector-intrinsics + { + (simd-v+) + (simd-v-) + (simd-vneg) + (simd-v+-) + (simd-vs+) + (simd-vs-) + (simd-vs*) + (simd-v*) + (simd-v*high) + (simd-v*hs+) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-vavg) + (simd-vsqrt) + (simd-vabs) + (simd-vbitand) + (simd-vbitandn) + (simd-vbitor) + (simd-vbitxor) + (simd-vbitnot) + (simd-vand) + (simd-vandn) + (simd-vor) + (simd-vxor) + (simd-vnot) + (simd-vlshift) + (simd-vrshift) + (simd-hlshift) + (simd-hrshift) + (simd-vshuffle-elements) + (simd-vshuffle2-elements) + (simd-vshuffle-bytes) + (simd-vmerge-head) + (simd-vmerge-tail) + (simd-v<=) + (simd-v<) + (simd-v=) + (simd-v>) + (simd-v>=) + (simd-vunordered?) + (simd-v>float) + (simd-v>integer) + (simd-vpack-signed) + (simd-vpack-unsigned) + (simd-vunpack-head) + (simd-vunpack-tail) + (simd-with) + (simd-gather-2) + (simd-gather-4) + alien-vector + } + +CONSTANT: vector-other-intrinsics + { + (simd-v.) + (simd-vsad) + (simd-sum) + (simd-vany?) + (simd-vall?) + (simd-vnone?) + (simd-vgetmask) + (simd-select) + set-alien-vector + } + +: vector-intrinsics ( -- x ) + vector>vector-intrinsics vector-other-intrinsics append ; + +vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each + +: scalar-output-class ( rep -- class ) + dup literal?>> [ + literal>> scalar-rep-of { + { float-rep [ float ] } + { double-rep [ float ] } + { longlong-scalar-rep [ integer ] } + { ulonglong-scalar-rep [ integer ] } + { int-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] } + { uint-scalar-rep [ cell 8 = [ fixnum ] [ integer ] if ] } + [ drop fixnum ] + } case + ] [ drop real ] if + ; + +\ (simd-sum) [ nip scalar-output-class ] "outputs" set-word-prop + +\ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop + +{ + (simd-vany?) + (simd-vall?) + (simd-vnone?) +} [ { boolean } "default-output-classes" set-word-prop ] each + +\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop + +\ assert-positive [ + real [0,inf] value-info-intersect +] "outputs" set-word-prop + +\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop + +: clone-with-value-infos ( node -- node' ) + clone dup in-d>> extract-value-info >>info ; + +: try-intrinsic ( node intrinsic-quot -- ? ) + '[ + _ clone-with-value-infos + _ with-dummy-cfg-builder + t + ] [ drop f ] recover ; + +: inline-unless-intrinsic ( word -- ) + dup '[ + _ swap over "intrinsic" word-prop + "always-inline-simd-intrinsics" get not swap and + ! word node intrinsic + [ try-intrinsic [ drop f ] [ def>> ] if ] + [ drop def>> ] if* + ] + "custom-inlining" set-word-prop ; + +vector-intrinsics [ inline-unless-intrinsic ] each