Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2009-12-02 20:11:08 -06:00
commit 0a48b946b6
67 changed files with 2631 additions and 2049 deletions

View File

@ -538,6 +538,9 @@ M: ulonglong-2-rep rep-component-type drop ulonglong ;
M: float-4-rep rep-component-type drop float ;
M: double-2-rep rep-component-type drop double ;
: rep-length ( rep -- n )
16 swap rep-component-type heap-size /i ; foldable
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
@ -550,4 +553,6 @@ M: double-2-rep rep-component-type drop double ;
{ [ dup { uchar ushort uint ulong ulonglong } member-eq? ] [ unsigned-interval ] }
} cond ; foldable
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
: c-type-clamp ( value c-type -- value' )
dup { float double } member-eq?
[ drop ] [ c-type-interval clamp ] if ; inline

View File

@ -189,9 +189,6 @@ M: struct-c-type c-struct? drop t ;
\ cleave [ ] 2sequence
\ output>array [ ] 2sequence ;
: define-inline-method ( class generic quot -- )
[ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
: (define-struct-slot-values-method) ( class -- )
[ \ struct-slot-values ] [ struct-slot-values-quot ] bi
define-inline-method ;

View File

@ -45,6 +45,12 @@ SYMBOL: loops
end-stack-analysis
] with-scope ; inline
: with-dummy-cfg-builder ( node quot -- )
[
[ V{ } clone procedures ] 2dip
'[ _ t t [ _ call( node -- ) ] with-cfg-builder ] with-variable
] { } make drop ;
GENERIC: emit-node ( node -- )
: emit-nodes ( nodes -- )

View File

@ -408,13 +408,13 @@ use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-sub-vector
def: dst/scalar-rep
use: src
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##horizontal-shl-vector-imm

View File

@ -7,7 +7,6 @@ compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.allot
compiler.cfg.intrinsics.fixnum
compiler.cfg.intrinsics.float
compiler.cfg.intrinsics.simd
compiler.cfg.intrinsics.slots
compiler.cfg.intrinsics.misc
compiler.cfg.comparisons ;
@ -23,7 +22,6 @@ QUALIFIED: classes.tuple.private
QUALIFIED: math.private
QUALIFIED: math.integers.private
QUALIFIED: math.floats.private
QUALIFIED: math.vectors.simd.intrinsics
QUALIFIED: math.libm
IN: compiler.cfg.intrinsics
@ -152,64 +150,5 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ;
: enable-simd ( -- )
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
{ math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ;

View File

@ -0,0 +1,206 @@
! (c)2009 Joe Groff bsd license
USING: accessors arrays assocs classes combinators
combinators.short-circuit compiler.cfg.builder.blocks
compiler.cfg.registers compiler.cfg.stacks
compiler.cfg.stacks.local compiler.tree.propagation.info
cpu.architecture effects fry generalizations
kernel locals macros math namespaces quotations sequences
splitting stack-checker words ;
IN: compiler.cfg.intrinsics.simd.backend
! Selection of implementation based on available CPU instructions
: can-has? ( quot -- ? )
[ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
: can-has-rep? ( rep reps -- )
member? \ can-has? [ and ] change ; inline
GENERIC: create-can-has ( word -- word' )
PREDICATE: hat-word < word
{
[ name>> { [ "^" head? ] [ "##" head? ] } 1|| ]
[ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
} 1&& ;
PREDICATE: vector-op-word < hat-word
name>> "-vector" swap subseq? ;
: reps-word ( word -- word' )
name>> "^^" ?head drop "##" ?head drop
"%" "-reps" surround "cpu.architecture" lookup ;
SYMBOL: blub
:: can-has-^^-quot ( word def effect -- quot )
effect in>> { "rep" } split1 [ length ] bi@ 1 +
word reps-word 1quotation
effect out>> length blub <array> >quotation
'[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ;
:: can-has-^-quot ( word def effect -- quot )
def create-can-has first ;
: map-concat-like ( seq quot -- seq' )
'[ _ map ] [ concat-as ] bi ; inline
M: object create-can-has 1quotation ;
M: array create-can-has
[ create-can-has ] map-concat-like 1quotation ;
M: callable create-can-has
[ create-can-has ] map-concat-like 1quotation ;
: (can-has-word) ( word -- word' )
name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
: (can-has-quot) ( word -- quot )
[ ] [ def>> ] [ stack-effect ] tri {
{ [ pick name>> "^^" head? ] [ can-has-^^-quot ] }
{ [ pick name>> "##" head? ] [ can-has-^^-quot ] }
{ [ pick name>> "^" head? ] [ can-has-^-quot ] }
} cond ;
: (can-has-nop-quot) ( word -- quot )
stack-effect in>> length '[ _ ndrop blub ] ;
DEFER: can-has-words
M: word create-can-has
can-has-words ?at drop 1quotation ;
M: hat-word create-can-has
(can-has-nop-quot) ;
M: vector-op-word create-can-has
dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
#dup quot create-can-has '[ _ ndup @ can-has? ] quot 2array ;
M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
pair first2 :> ( class quot )
#pick class #dup quot create-can-has
'[ _ npick _ instance? [ _ ndup @ can-has? ] dip and ]
quot 2array ;
MACRO: v-vector-op ( trials -- )
[ 1 2 >can-has-cond ] map '[ _ cond ] ;
MACRO: vl-vector-op ( trials -- )
[ 1 3 >can-has-cond ] map '[ _ cond ] ;
MACRO: vv-vector-op ( trials -- )
[ 1 3 >can-has-cond ] map '[ _ cond ] ;
MACRO: vv-cc-vector-op ( trials -- )
[ 2 4 >can-has-cond ] map '[ _ cond ] ;
MACRO: vvvv-vector-op ( trials -- )
[ 1 5 >can-has-cond ] map '[ _ cond ] ;
! Special-case conditional instructions
: can-has-^(compare-vector) ( src1 src2 rep cc -- dst )
[ 2drop ] 2dip %compare-vector-reps member?
\ can-has? [ and ] change
blub ;
: can-has-^^test-vector ( src rep vcc -- dst )
[ drop ] 2dip drop %test-vector-reps member?
\ can-has? [ and ] change
blub ;
MACRO: can-has-case ( cases -- )
dup first second infer in>> length 1 +
'[ _ ndrop f ] suffix '[ _ case ] ;
GENERIC# >can-has-trial 1 ( obj #pick -- quot )
M: callable >can-has-trial
drop '[ _ can-has? ] ;
M: pair >can-has-trial
swap first2 dup infer in>> length
'[ _ npick _ instance? [ _ can-has? ] [ _ ndrop blub ] if ] ;
MACRO: can-has-vector-op ( trials #pick #dup -- )
[ '[ _ >can-has-trial ] map ] dip '[ _ _ n|| \ can-has? [ and ] change blub ] ;
: can-has-v-vector-op ( trials -- ? )
1 2 can-has-vector-op ; inline
: can-has-vv-vector-op ( trials -- ? )
1 3 can-has-vector-op ; inline
: can-has-vv-cc-vector-op ( trials -- ? )
2 4 can-has-vector-op ; inline
: can-has-vvvv-vector-op ( trials -- ? )
1 5 can-has-vector-op ; inline
CONSTANT: can-has-words
H{
{ case can-has-case }
{ v-vector-op can-has-v-vector-op }
{ vl-vector-op can-has-vv-vector-op }
{ vv-vector-op can-has-vv-vector-op }
{ vv-cc-vector-op can-has-vv-cc-vector-op }
{ vvvv-vector-op can-has-vvvv-vector-op }
}
! Intrinsic code emission
MACRO: check-elements ( quots -- )
[ length '[ _ firstn ] ]
[ '[ _ spread ] ]
[ length 1 - \ and <repetition> [ ] like ]
tri 3append ;
ERROR: bad-simd-intrinsic node ;
MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri
! n quots n
'[
! node quot
[
dup node-input-infos
_ tail-slice* [ literal>> ] map
dup _ check-elements
] dip
swap [
! node literals quot
[ _ firstn ] dip call
drop
] [ 2drop bad-simd-intrinsic ] if
] ;
CONSTANT: [unary] [ ds-drop ds-pop ]
CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
CONSTANT: [binary] [ ds-drop 2inputs ]
CONSTANT: [quaternary]
[
ds-drop
D 3 peek-loc
D 2 peek-loc
D 1 peek-loc
D 0 peek-loc
-4 inc-d
]
:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot )
params-quot trials op-quot literal-preds
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
MACRO: emit-v-vector-op ( trials -- )
[unary] [ v-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO: emit-vl-vector-op ( trials literal-pred -- )
[ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
MACRO: emit-vv-vector-op ( trials -- )
[binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO: emit-vvvv-vector-op ( trials -- )
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- )
literal-pred imm-trials literal-pred var-trials
'[
dup node-input-infos 2 tail-slice* first literal>> @
[ _ _ emit-vl-vector-op ]
[ _ emit-vv-vector-op ] if
] ;

View File

@ -0,0 +1,544 @@
! (c)2009 Joe Groff bsd license
USING: arrays assocs biassocs byte-arrays byte-arrays.hex
classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
compiler.cfg.registers compiler.cfg.stacks.height
compiler.cfg.stacks.local compiler.tree compiler.tree.propagation.info
cpu.architecture fry hashtables kernel locals make namespaces sequences
system tools.test words ;
IN: compiler.cfg.intrinsics.simd.tests
:: test-node ( rep -- node )
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info { class byte-array } } }
{ 2 T{ value-info { class byte-array } } }
{ 3 T{ value-info { class byte-array } } }
{ 4 T{ value-info { class word } { literal? t } { literal rep } } }
{ 5 T{ value-info { class byte-array } } }
} }
} ;
:: test-node-literal ( lit rep -- node )
lit class :> lit-class
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info { class byte-array } } }
{ 2 T{ value-info { class byte-array } } }
{ 3 T{ value-info { class lit-class } { literal? t } { literal lit } } }
{ 4 T{ value-info { class word } { literal? t } { literal rep } } }
{ 5 T{ value-info { class byte-array } } }
} }
} ;
: test-node-nonliteral-rep ( -- node )
T{ #call
{ in-d { 1 2 3 4 } }
{ out-d { 5 } }
{ info H{
{ 1 T{ value-info { class byte-array } } }
{ 2 T{ value-info { class byte-array } } }
{ 3 T{ value-info { class byte-array } } }
{ 4 T{ value-info { class object } } }
{ 5 T{ value-info { class byte-array } } }
} }
} ;
: test-compiler-env ( -- x )
H{ } clone
T{ basic-block { id 0 } }
[ \ basic-block pick set-at ]
[ 0 swap associate \ ds-heights pick set-at ]
[ 0 swap associate \ rs-heights pick set-at ] tri
T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at
H{ } clone \ local-peek-set pick set-at
H{ } clone \ replace-mapping pick set-at
H{ } <biassoc> \ locs>vregs pick set-at
H{ } clone \ peek-sets pick set-at
H{ } clone \ replace-sets pick set-at
H{ } clone \ kill-sets pick set-at ;
: make-classes ( quot -- seq )
{ } make [ class ] map ; inline
: test-emit ( cpu rep quot -- node )
[
[ new \ cpu ] 2dip '[
test-compiler-env [ _ test-node @ ] bind
] with-variable
] make-classes ; inline
: test-emit-literal ( cpu lit rep quot -- node )
[
[ new \ cpu ] 3dip '[
test-compiler-env [ _ _ test-node-literal @ ] bind
] with-variable
] make-classes ; inline
: test-emit-nonliteral-rep ( cpu quot -- node )
[
[ new \ cpu ] dip '[
test-compiler-env [ test-node-nonliteral-rep @ ] bind
] with-variable
] make-classes ; inline
CONSTANT: signed-reps
{ char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
CONSTANT: all-reps
{
char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
}
TUPLE: scalar-cpu ;
TUPLE: simple-ops-cpu ;
M: simple-ops-cpu %zero-vector-reps all-reps ;
M: simple-ops-cpu %fill-vector-reps all-reps ;
M: simple-ops-cpu %add-vector-reps all-reps ;
M: simple-ops-cpu %sub-vector-reps all-reps ;
M: simple-ops-cpu %mul-vector-reps all-reps ;
M: simple-ops-cpu %div-vector-reps all-reps ;
M: simple-ops-cpu %andn-vector-reps all-reps ;
M: simple-ops-cpu %and-vector-reps all-reps ;
M: simple-ops-cpu %or-vector-reps all-reps ;
M: simple-ops-cpu %xor-vector-reps all-reps ;
M: simple-ops-cpu %merge-vector-reps all-reps ;
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
M: simple-ops-cpu %test-vector-reps all-reps ;
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
M: simple-ops-cpu %alien-vector-reps all-reps ;
! v+
[ { ##add-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
unit-test
! v-
[ { ##sub-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
unit-test
! vneg
[ { ##load-constant ##sub-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
unit-test
[ { ##zero-vector ##sub-vector } ]
[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
unit-test
! v*
[ { ##mul-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
unit-test
! v/
[ { ##div-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
unit-test
TUPLE: addsub-cpu < simple-ops-cpu ;
M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
! v+-
[ { ##add-sub-vector } ]
[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
[ { ##load-constant ##xor-vector ##add-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
[ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ]
[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
unit-test
TUPLE: saturating-cpu < simple-ops-cpu ;
M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
! vs+
[ { ##add-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
[ { ##add-vector } ]
[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
[ { ##saturated-add-vector } ]
[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
unit-test
! vs-
[ { ##sub-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
[ { ##sub-vector } ]
[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
[ { ##saturated-sub-vector } ]
[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
unit-test
! vs*
[ { ##mul-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
[ { ##mul-vector } ]
[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
[ { ##saturated-mul-vector } ]
[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
unit-test
TUPLE: minmax-cpu < simple-ops-cpu ;
M: minmax-cpu %min-vector-reps signed-reps ;
M: minmax-cpu %max-vector-reps signed-reps ;
M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
TUPLE: compare-cpu < simple-ops-cpu ;
M: compare-cpu %compare-vector-reps drop signed-reps ;
M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
! vmin
[ { ##min-vector } ]
[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
unit-test
[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
unit-test
! vmax
[ { ##max-vector } ]
[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
unit-test
[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
unit-test
TUPLE: dot-cpu < simple-ops-cpu ;
M: dot-cpu %dot-vector-reps { float-4-rep } ;
TUPLE: horizontal-cpu < simple-ops-cpu ;
M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
! v.
[ { ##dot-vector } ]
[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
[ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
[ {
##mul-vector
##merge-vector-head ##merge-vector-tail ##add-vector
##merge-vector-head ##merge-vector-tail ##add-vector
##vector>scalar
} ]
[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
unit-test
! vsqrt
[ { ##sqrt-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
unit-test
! sum
[ { ##horizontal-add-vector ##vector>scalar } ]
[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
unit-test
[ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
unit-test
[ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector
##vector>scalar
} ]
[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ]
unit-test
[ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector ##horizontal-add-vector
##vector>scalar
} ]
[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
unit-test
[ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
##vector>scalar
} ]
[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
unit-test
TUPLE: abs-cpu < simple-ops-cpu ;
M: abs-cpu %abs-vector-reps signed-reps ;
! vabs
[ { } ]
[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
[ { ##abs-vector } ]
[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
[ { ##load-constant ##andn-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
[ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
unit-test
! vand
[ { ##and-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
unit-test
! vandn
[ { ##andn-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
unit-test
! vor
[ { ##or-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
unit-test
! vxor
[ { ##xor-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
unit-test
TUPLE: not-cpu < simple-ops-cpu ;
M: not-cpu %not-vector-reps signed-reps ;
! vnot
[ { ##not-vector } ]
[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
unit-test
[ { ##fill-vector ##xor-vector } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
unit-test
TUPLE: shift-cpu < simple-ops-cpu ;
M: shift-cpu %shl-vector-reps signed-reps ;
M: shift-cpu %shr-vector-reps signed-reps ;
TUPLE: shift-imm-cpu < simple-ops-cpu ;
M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
! vlshift
[ { ##shl-vector-imm } ]
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
unit-test
[ { ##shl-vector } ]
[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
unit-test
! vrshift
[ { ##shr-vector-imm } ]
[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
unit-test
[ { ##shr-vector } ]
[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
unit-test
! hlshift
[ { ##horizontal-shl-vector-imm } ]
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
unit-test
! hrshift
[ { ##horizontal-shr-vector-imm } ]
[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
unit-test
TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
TUPLE: shuffle-cpu < simple-ops-cpu ;
M: shuffle-cpu %shuffle-vector-reps signed-reps ;
! vshuffle-elements
[ { ##load-constant ##shuffle-vector } ]
[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
unit-test
[ { ##shuffle-vector-imm } ]
[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
unit-test
! vshuffle-bytes
[ { ##shuffle-vector } ]
[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
unit-test
! vmerge-head
[ { ##merge-vector-head } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
unit-test
! vmerge-tail
[ { ##merge-vector-tail } ]
[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
unit-test
! v<= etc.
[ { ##compare-vector } ]
[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
[ { ##min-vector ##compare-vector } ]
[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
[ { ##load-constant ##xor-vector ##xor-vector ##compare-vector } ]
[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
unit-test
! vany? etc.
[ { ##test-vector } ]
[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
unit-test
TUPLE: convert-cpu < simple-ops-cpu ;
M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
! v>float
[ { } ]
[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
unit-test
[ { ##integer>float-vector } ]
[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
unit-test
! v>integer
[ { } ]
[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
unit-test
[ { ##float>integer-vector } ]
[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
unit-test
! vpack-signed
[ { ##signed-pack-vector } ]
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
unit-test
! vpack-unsigned
[ { ##unsigned-pack-vector } ]
[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
unit-test
TUPLE: unpack-head-cpu < simple-ops-cpu ;
M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
TUPLE: unpack-cpu < unpack-head-cpu ;
M: unpack-cpu %unpack-vector-tail-reps all-reps ;
! vunpack-head
[ { ##unpack-vector-head } ]
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
[ { ##zero-vector ##merge-vector-head } ]
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
[ { ##merge-vector-head ##shr-vector-imm } ]
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
[ { ##zero-vector ##compare-vector ##merge-vector-head } ]
[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
unit-test
! vunpack-tail
[ { ##unpack-vector-tail } ]
[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
[ { ##tail>head-vector ##unpack-vector-head } ]
[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
[ { ##zero-vector ##merge-vector-tail } ]
[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
[ { ##merge-vector-tail ##shr-vector-imm } ]
[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
[ { ##zero-vector ##compare-vector ##merge-vector-tail } ]
[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
unit-test
! with
[ { ##scalar>vector ##shuffle-vector-imm } ]
[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ]
unit-test
! gather-2
[ { ##gather-vector-2 } ]
[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ]
unit-test
! gather-4
[ { ##gather-vector-4 } ]
[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ]
unit-test
! select
[ { ##shuffle-vector-imm ##vector>scalar } ]
[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ]
unit-test
! test with nonliteral/invalid reps
[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
[ bad-simd-intrinsic? ] must-fail-with
[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
[ bad-simd-intrinsic? ] must-fail-with
[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
[ bad-simd-intrinsic? ] must-fail-with

View File

@ -1,189 +1,26 @@
! Copyright (C) 2009 Slava Pestov.
! Copyright (C) 2009 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien byte-arrays fry classes.algebra
cpu.architecture kernel math sequences math.vectors
math.vectors.simd.intrinsics macros generalizations combinators
combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks
USING: accessors alien alien.c-types byte-arrays fry
classes.algebra cpu.architecture kernel math sequences
math.vectors math.vectors.simd.intrinsics
macros generalizations combinators combinators.short-circuit
arrays locals compiler.tree.propagation.info
compiler.cfg.builder.blocks
compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics
compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.simd.backend
specialized-arrays ;
FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ;
SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ;
FROM: alien.c-types => heap-size char short int longlong float double ;
SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
IN: compiler.cfg.intrinsics.simd
MACRO: check-elements ( quots -- )
[ length '[ _ firstn ] ]
[ '[ _ spread ] ]
[ length 1 - \ and <repetition> [ ] like ]
tri 3append ;
MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri
! n quots n
'[
! node quot
[
dup node-input-infos
_ tail-slice* [ literal>> ] map
dup _ check-elements
] dip
swap [
! node literals quot
[ _ firstn ] dip call
drop
] [ 2drop emit-primitive ] if
] ;
: emit-vector-op ( node quot: ( rep -- ) -- )
{ [ representation? ] } if-literals-match ; inline
: [binary] ( quot -- quot' )
'[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
: emit-binary-vector-op ( node quot -- )
[binary] emit-vector-op ; inline
: [unary] ( quot -- quot' )
'[ [ ds-drop ds-pop ] dip @ ds-push ] ; inline
: emit-unary-vector-op ( node quot -- )
[unary] emit-vector-op ; inline
: [unary/param] ( quot -- quot' )
'[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline
: emit-shift-vector-imm-op ( node quot -- )
[unary/param]
{ [ integer? ] [ representation? ] } if-literals-match ; inline
:: emit-shift-vector-op ( node imm-quot var-quot -- )
node node-input-infos 2 tail-slice* first literal>> integer?
[ node imm-quot emit-shift-vector-imm-op ]
[ node var-quot emit-binary-vector-op ] if ; inline
: emit-gather-vector-2 ( node -- )
[ ^^gather-vector-2 ] emit-binary-vector-op ;
: emit-gather-vector-4 ( node -- )
[
ds-drop
[
D 3 peek-loc
D 2 peek-loc
D 1 peek-loc
D 0 peek-loc
-4 inc-d
] dip
^^gather-vector-4
ds-push
] emit-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ iota >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: generate-shuffle-vector-imm ( src shuffle rep -- dst )
dup %shuffle-vector-imm-reps member?
[ ^^shuffle-vector-imm ]
[
[ >variable-shuffle ^^load-constant ] keep
^^shuffle-vector
] if ;
: emit-shuffle-vector-imm ( node -- )
! Pad the permutation with zeroes if it's too short, since we
! can't throw an error at this point.
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
{ [ shuffle? ] [ representation? ] } if-literals-match ;
: emit-shuffle-vector-var ( node -- )
[ ^^shuffle-vector ] [binary]
{ [ %shuffle-vector-reps member? ] } if-literals-match ;
: emit-shuffle-vector ( node -- )
dup node-input-infos {
[ length 3 = ]
[ first class>> byte-array class<= ]
[ second class>> byte-array class<= ]
[ third literal>> representation? ]
} 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
: ^^broadcast-vector ( src n rep -- dst )
[ rep-components swap <array> ] keep
generate-shuffle-vector-imm ;
: emit-broadcast-vector ( node -- )
[ ^^broadcast-vector ] [unary/param]
{ [ integer? ] [ representation? ] } if-literals-match ;
: ^^with-vector ( src rep -- dst )
[ ^^scalar>vector ] keep [ 0 ] dip ^^broadcast-vector ;
: ^^select-vector ( src n rep -- dst )
[ ^^broadcast-vector ] keep ^^vector>scalar ;
: emit-select-vector ( node -- )
[ ^^select-vector ] [unary/param]
{ [ integer? ] [ representation? ] } if-literals-match ; inline
: emit-alien-vector-op ( node quot: ( rep -- ) -- )
{ [ %alien-vector-reps member? ] } if-literals-match ; inline
: emit-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-getter
_ ^^alien-vector ds-push
]
[ inline-alien-getter? ] inline-alien
] with emit-alien-vector-op ;
: emit-set-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-setter ds-pop
_ ##set-alien-vector
]
[ byte-array inline-alien-setter? ]
inline-alien
] with emit-alien-vector-op ;
: generate-not-vector ( src rep -- dst )
dup %not-vector-reps member?
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
{cc,swap} first2 :> ( cc swap? )
swap?
[ src2 src1 rep cc ^^compare-vector ]
[ src1 src2 rep cc ^^compare-vector ] if ;
:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst )
rep orig-cc %compare-vector-ccs :> ( ccs not? )
ccs empty?
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
[
ccs unclip :> ( rest-ccs first-cc )
src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst
rest-ccs first-dst
[ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ]
reduce
not? [ rep generate-not-vector ] when
] if ;
! compound vector ops
: sign-bit-mask ( rep -- byte-array )
unsign-rep {
signed-rep {
{ char-16-rep [ uchar-array{
HEX: 80 HEX: 80 HEX: 80 HEX: 80
HEX: 80 HEX: 80 HEX: 80 HEX: 80
@ -204,150 +41,584 @@ MACRO: if-literals-match ( quots -- )
} underlying>> ] }
} case ;
:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst )
orig-cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] }
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] }
} case ;
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
: ^load-neg-zero-vector ( rep -- dst )
{
{
[ rep orig-cc %compare-vector-reps member? ]
[ src1 src2 rep orig-cc (generate-compare-vector) ]
}
{
[ rep %min-vector-reps member? ]
[ src1 src2 rep orig-cc (generate-minmax-compare-vector) ]
}
{
[ rep unsign-rep orig-cc %compare-vector-reps member? ]
[
rep sign-bit-mask ^^load-constant :> sign-bits
src1 sign-bits rep ^^xor-vector
src2 sign-bits rep ^^xor-vector
rep unsign-rep orig-cc (generate-compare-vector)
]
}
} cond ;
:: generate-unpack-vector-head ( src rep -- dst )
{
{
[ rep %unpack-vector-head-reps member? ]
[ src rep ^^unpack-vector-head ]
}
{
[ rep unsigned-int-vector-rep? ]
[
rep ^^zero-vector :> zero
src zero rep ^^merge-vector-head
]
}
{
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
[
src src rep ^^merge-vector-head
rep rep-component-type
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
]
}
[
rep ^^zero-vector :> zero
zero src rep cc> ^^compare-vector :> sign
src sign rep ^^merge-vector-head
]
} cond ;
:: generate-unpack-vector-tail ( src rep -- dst )
{
{
[ rep %unpack-vector-tail-reps member? ]
[ src rep ^^unpack-vector-tail ]
}
{
[ rep %unpack-vector-head-reps member? ]
[
src rep ^^tail>head-vector :> tail
tail rep ^^unpack-vector-head
]
}
{
[ rep unsigned-int-vector-rep? ]
[
rep ^^zero-vector :> zero
src zero rep ^^merge-vector-tail
]
}
{
[ rep widen-vector-rep %shr-vector-imm-reps member? ]
[
src src rep ^^merge-vector-tail
rep rep-component-type
heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
]
}
[
rep ^^zero-vector :> zero
zero src rep cc> ^^compare-vector :> sign
src sign rep ^^merge-vector-tail
]
} cond ;
:: generate-load-neg-zero-vector ( rep -- dst )
rep {
{ float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
{ double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
[ drop rep ^^zero-vector ]
} case ;
:: generate-neg-vector ( src rep -- dst )
rep generate-load-neg-zero-vector
src rep ^^sub-vector ;
: ^load-add-sub-vector ( rep -- dst )
signed-rep {
{ float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] }
{ double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] }
{ char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
{ short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
{ int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
} case ;
:: generate-blend-vector ( mask true false rep -- dst )
mask true rep ^^and-vector
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ iota >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: ^load-immediate-shuffle ( shuffle rep -- dst )
>variable-shuffle ^^load-constant ;
:: ^blend-vector ( mask true false rep -- dst )
true mask rep ^^and-vector
mask false rep ^^andn-vector
rep ^^or-vector ;
:: generate-abs-vector ( src rep -- dst )
: ^not-vector ( src rep -- dst )
{
{
[ rep unsigned-int-vector-rep? ]
[ src ]
}
{
[ rep %abs-vector-reps member? ]
[ src rep ^^abs-vector ]
}
{
[ rep float-vector-rep? ]
[
rep generate-load-neg-zero-vector
src rep ^^andn-vector
]
}
[
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
} v-vector-op ;
:: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
{cc,swap} first2 :> ( cc swap? )
swap?
[ src2 src1 rep cc ^^compare-vector ]
[ src1 src2 rep cc ^^compare-vector ] if ;
:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
rep orig-cc %compare-vector-ccs :> ( ccs not? )
ccs empty?
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
[
ccs unclip :> ( rest-ccs first-cc )
src1 src2 rep first-cc ^((compare-vector)) :> first-dst
rest-ccs first-dst
[ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
reduce
not? [ rep ^not-vector ] when
] if ;
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
{ cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
} case ;
: ^compare-vector ( src1 src2 rep cc -- dst )
{
[ ^(compare-vector) ]
[ ^minmax-compare-vector ]
{ unsigned-int-vector-rep [| src1 src2 rep cc |
rep sign-bit-mask ^^load-constant :> sign-bits
src1 sign-bits rep ^^xor-vector
src2 sign-bits rep ^^xor-vector
rep signed-rep cc ^(compare-vector)
] }
} vv-cc-vector-op ;
: ^unpack-vector-head ( src rep -- dst )
{
[ ^^unpack-vector-head ]
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
{ signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-head :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero
zero src rep cc> ^compare-vector :> sign
src sign rep ^^merge-vector-head
] }
} v-vector-op ;
: ^unpack-vector-tail ( src rep -- dst )
{
[ ^^unpack-vector-tail ]
[ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
{ unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
{ signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-tail :> merged
rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-vector-rep ^^shr-vector-imm
] }
{ signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero
zero src rep cc> ^compare-vector :> sign
src sign rep ^^merge-vector-tail
] }
} v-vector-op ;
: ^(sum-vector-2) ( src rep -- dst )
{
[ dupd ^^horizontal-add-vector ]
[| src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-4) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ] bi
]
[| src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-8) ( src rep -- dst )
{
[
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ] tri
]
[| src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector-16) ( src rep -- dst )
{
[
{
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
[ dupd ^^horizontal-add-vector ]
} cleave
]
[| src rep |
src src rep ^^merge-vector-head :> head
src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src'
rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src''
rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector :> src'''
rep'' widen-vector-rep :> rep'''
src''' src''' rep''' ^^merge-vector-head :> head'''
src''' src''' rep''' ^^merge-vector-tail :> tail'''
head''' tail''' rep ^^add-vector
]
} v-vector-op ;
: ^(sum-vector) ( src rep -- dst )
[
dup rep-length {
{ 2 [ ^(sum-vector-2) ] }
{ 4 [ ^(sum-vector-4) ] }
{ 8 [ ^(sum-vector-8) ] }
{ 16 [ ^(sum-vector-16) ] }
} case
] [ ^^vector>scalar ] bi ;
: ^sum-vector ( src rep -- dst )
{
{ float-vector-rep [ ^(sum-vector) ] }
{ int-vector-rep [| src rep |
src rep ^unpack-vector-head :> head
src rep ^unpack-vector-tail :> tail
rep widen-vector-rep :> wide-rep
head tail wide-rep ^^add-vector wide-rep
^(sum-vector)
] }
} v-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
[ rep-length 0 pad-tail ] keep {
[ ^^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
: ^with-vector ( src rep -- dst )
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
: ^select-vector ( src n rep -- dst )
[ ^broadcast-vector ] keep ^^vector>scalar ;
! intrinsic emitters
: emit-simd-v+ ( node -- )
{
[ ^^add-vector ]
} emit-vv-vector-op ;
: emit-simd-v- ( node -- )
{
[ ^^sub-vector ]
} emit-vv-vector-op ;
: emit-simd-vneg ( node -- )
{
{ float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
{ int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
} emit-v-vector-op ;
: emit-simd-v+- ( node -- )
{
[ ^^add-sub-vector ]
{ float-vector-rep [| src1 src2 rep |
rep ^load-add-sub-vector :> signs
src2 signs rep ^^xor-vector :> src2'
src1 src2' rep ^^add-vector
] }
{ int-vector-rep [| src1 src2 rep |
rep ^load-add-sub-vector :> signs
src2 signs rep ^^xor-vector :> src2'
src2' signs rep ^^sub-vector :> src2''
src1 src2'' rep ^^add-vector
] }
} emit-vv-vector-op ;
: emit-simd-vs+ ( node -- )
{
{ float-vector-rep [ ^^add-vector ] }
{ int-vector-rep [ ^^saturated-add-vector ] }
} emit-vv-vector-op ;
: emit-simd-vs- ( node -- )
{
{ float-vector-rep [ ^^sub-vector ] }
{ int-vector-rep [ ^^saturated-sub-vector ] }
} emit-vv-vector-op ;
: emit-simd-vs* ( node -- )
{
{ float-vector-rep [ ^^mul-vector ] }
{ int-vector-rep [ ^^saturated-mul-vector ] }
} emit-vv-vector-op ;
: emit-simd-v* ( node -- )
{
[ ^^mul-vector ]
} emit-vv-vector-op ;
: emit-simd-v/ ( node -- )
{
[ ^^div-vector ]
} emit-vv-vector-op ;
: emit-simd-vmin ( node -- )
{
[ ^^min-vector ]
[
[ cc< ^compare-vector ]
[ ^blend-vector ] 3bi
]
} emit-vv-vector-op ;
: emit-simd-vmax ( node -- )
{
[ ^^max-vector ]
[
[ cc> ^compare-vector ]
[ ^blend-vector ] 3bi
]
} emit-vv-vector-op ;
: emit-simd-v. ( node -- )
{
[ ^^dot-vector ]
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
} emit-vv-vector-op ;
: emit-simd-vsqrt ( node -- )
{
[ ^^sqrt-vector ]
} emit-v-vector-op ;
: emit-simd-sum ( node -- )
{
[ ^sum-vector ]
} emit-v-vector-op ;
: emit-simd-vabs ( node -- )
{
{ unsigned-int-vector-rep [ drop ] }
[ ^^abs-vector ]
{ float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
{ int-vector-rep [| src rep |
rep ^^zero-vector :> zero
zero src rep ^^sub-vector :> -src
zero src rep cc> ^^compare-vector :> sign
sign -src src rep generate-blend-vector
zero src rep cc> ^compare-vector :> sign
sign -src src rep ^blend-vector
] }
} emit-v-vector-op ;
: emit-simd-vand ( node -- )
{
[ ^^and-vector ]
} emit-vv-vector-op ;
: emit-simd-vandn ( node -- )
{
[ ^^andn-vector ]
} emit-vv-vector-op ;
: emit-simd-vor ( node -- )
{
[ ^^or-vector ]
} emit-vv-vector-op ;
: emit-simd-vxor ( node -- )
{
[ ^^xor-vector ]
} emit-vv-vector-op ;
: emit-simd-vnot ( node -- )
{
[ ^not-vector ]
} emit-v-vector-op ;
: emit-simd-vlshift ( node -- )
{
[ ^^shl-vector ]
} {
[ ^^shl-vector-imm ]
} [ integer? ] emit-vv-or-vl-vector-op ;
: emit-simd-vrshift ( node -- )
{
[ ^^shr-vector ]
} {
[ ^^shr-vector-imm ]
} [ integer? ] emit-vv-or-vl-vector-op ;
: emit-simd-hlshift ( node -- )
{
[ ^^horizontal-shl-vector-imm ]
} [ integer? ] emit-vl-vector-op ;
: emit-simd-hrshift ( node -- )
{
[ ^^horizontal-shr-vector-imm ]
} [ integer? ] emit-vl-vector-op ;
: emit-simd-vshuffle-elements ( node -- )
{
[ ^shuffle-vector-imm ]
} [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- )
{
[ ^^shuffle-vector ]
} emit-vv-vector-op ;
: emit-simd-vmerge-head ( node -- )
{
[ ^^merge-vector-head ]
} emit-vv-vector-op ;
: emit-simd-vmerge-tail ( node -- )
{
[ ^^merge-vector-tail ]
} emit-vv-vector-op ;
: emit-simd-v<= ( node -- )
{
[ cc<= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v< ( node -- )
{
[ cc< ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v= ( node -- )
{
[ cc= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v> ( node -- )
{
[ cc> ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-v>= ( node -- )
{
[ cc>= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-vunordered? ( node -- )
{
[ cc/<>= ^compare-vector ]
} emit-vv-vector-op ;
: emit-simd-vany? ( node -- )
{
[ vcc-any ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-vall? ( node -- )
{
[ vcc-all ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-vnone? ( node -- )
{
[ vcc-none ^^test-vector ]
} emit-v-vector-op ;
: emit-simd-v>float ( node -- )
{
{ float-vector-rep [ drop ] }
{ int-vector-rep [ ^^integer>float-vector ] }
} emit-v-vector-op ;
: emit-simd-v>integer ( node -- )
{
{ float-vector-rep [ ^^float>integer-vector ] }
{ int-vector-rep [ drop ] }
} emit-v-vector-op ;
: emit-simd-vpack-signed ( node -- )
{
[ ^^signed-pack-vector ]
} emit-vv-vector-op ;
: emit-simd-vpack-unsigned ( node -- )
{
[ ^^unsigned-pack-vector ]
} emit-vv-vector-op ;
: emit-simd-vunpack-head ( node -- )
{
[ ^unpack-vector-head ]
} emit-v-vector-op ;
: emit-simd-vunpack-tail ( node -- )
{
[ ^unpack-vector-tail ]
} emit-v-vector-op ;
: emit-simd-with ( node -- )
{
[ ^with-vector ]
} emit-v-vector-op ;
: emit-simd-gather-2 ( node -- )
{
[ ^^gather-vector-2 ]
} emit-vv-vector-op ;
: emit-simd-gather-4 ( node -- )
{
[ ^^gather-vector-4 ]
} emit-vvvv-vector-op ;
: emit-simd-select ( node -- )
{
[ ^select-vector ]
} [ integer? ] emit-vl-vector-op ;
: emit-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-getter
_ ^^alien-vector ds-push
]
} cond ;
[ inline-alien-getter? ] inline-alien
] with { [ %alien-vector-reps member? ] } if-literals-match ;
: generate-min-vector ( src1 src2 rep -- dst )
dup %min-vector-reps member?
[ ^^min-vector ] [
[ cc< generate-compare-vector ]
[ generate-blend-vector ] 3bi
] if ;
: emit-set-alien-vector ( node -- )
dup [
'[
ds-drop prepare-alien-setter ds-pop
_ ##set-alien-vector
]
[ byte-array inline-alien-setter? ]
inline-alien
] with { [ %alien-vector-reps member? ] } if-literals-match ;
: generate-max-vector ( src1 src2 rep -- dst )
dup %max-vector-reps member?
[ ^^max-vector ] [
[ cc> generate-compare-vector ]
[ generate-blend-vector ] 3bi
] if ;
: enable-simd ( -- )
{
{ (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
{ (simd-v.) [ emit-simd-v. ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
enable-simd

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit arrays
fry kernel layouts math namespaces sequences cpu.architecture
math.bitwise math.order math.vectors.simd.intrinsics classes
math.bitwise math.order classes
vectors locals make alien.c-types io.binary grouping
compiler.cfg
compiler.cfg.registers
@ -42,6 +42,14 @@ M: insn rewrite drop f ;
] [ drop f ] if ; inline
: general-compare-expr? ( insn -- ? )
{
[ compare-expr? ]
[ compare-imm-expr? ]
[ compare-float-unordered-expr? ]
[ compare-float-ordered-expr? ]
} 1|| ;
: general-or-vector-compare-expr? ( insn -- ? )
{
[ compare-expr? ]
[ compare-imm-expr? ]
@ -52,7 +60,7 @@ M: insn rewrite drop f ;
: rewrite-boolean-comparison? ( insn -- ? )
dup ##branch-t? [
src1>> vreg>expr general-compare-expr?
src1>> vreg>expr general-or-vector-compare-expr?
] [ drop f ] if ; inline
: >compare-expr< ( expr -- in1 in2 cc )
@ -463,100 +471,9 @@ M: ##alien-signed-2 rewrite rewrite-alien-addressing ;
M: ##alien-signed-4 rewrite rewrite-alien-addressing ;
M: ##alien-float rewrite rewrite-alien-addressing ;
M: ##alien-double rewrite rewrite-alien-addressing ;
M: ##alien-vector rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ;
M: ##set-alien-float rewrite rewrite-alien-addressing ;
M: ##set-alien-double rewrite rewrite-alien-addressing ;
M: ##set-alien-vector rewrite rewrite-alien-addressing ;
! Some lame constant folding for SIMD intrinsics. Eventually this
! should be redone completely.
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
2dup [ rep>> ] bi@ eq? [
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
[ [ shuffle>> ] bi@ nths ]
[ drop rep>> ]
2tri \ ##shuffle-vector-imm new-insn
] [ 2drop f ] if ;
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
2dup length swap length /i group nths concat ;
: fold-shuffle-vector-imm ( insn expr -- insn' )
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
M: ##shuffle-vector-imm rewrite
dup src>> vreg>expr {
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
[ 2drop f ]
} cond ;
: (fold-scalar>vector) ( insn bytes -- insn' )
[ [ dst>> ] [ rep>> rep-components ] bi ] dip <repetition> concat
\ ##load-constant new-insn ;
: fold-scalar>vector ( insn expr -- insn' )
value>> over rep>> {
{ float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
{ double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
[ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
} case ;
M: ##scalar>vector rewrite
dup src>> vreg>expr dup constant-expr?
[ fold-scalar>vector ] [ 2drop f ] if ;
M: ##xor-vector rewrite
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
: vector-not? ( expr -- ? )
{
[ not-vector-expr? ]
[ {
[ xor-vector-expr? ]
[ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
} 1&& ]
} 1|| ;
GENERIC: vector-not-src ( expr -- vreg )
M: not-vector-expr vector-not-src src>> vn>vreg ;
M: xor-vector-expr vector-not-src
dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
M: ##and-vector rewrite
{
{ [ dup src1>> vreg>expr vector-not? ] [
{
[ dst>> ]
[ src1>> vreg>expr vector-not-src ]
[ src2>> ]
[ rep>> ]
} cleave \ ##andn-vector new-insn
] }
{ [ dup src2>> vreg>expr vector-not? ] [
{
[ dst>> ]
[ src2>> vreg>expr vector-not-src ]
[ src1>> ]
[ rep>> ]
} cleave \ ##andn-vector new-insn
] }
[ drop f ]
} cond ;
M: ##andn-vector rewrite
dup src1>> vreg>expr vector-not? [
{
[ dst>> ]
[ src1>> vreg>expr vector-not-src ]
[ src2>> ]
[ rep>> ]
} cleave \ ##and-vector new-insn
] [ drop f ] if ;

View File

@ -0,0 +1,120 @@
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit arrays
fry kernel layouts math namespaces sequences cpu.architecture
math.bitwise math.order classes
vectors locals make alien.c-types io.binary grouping
math.vectors.simd.intrinsics
compiler.cfg
compiler.cfg.registers
compiler.cfg.comparisons
compiler.cfg.instructions
compiler.cfg.value-numbering.expressions
compiler.cfg.value-numbering.graph
compiler.cfg.value-numbering.rewrite
compiler.cfg.value-numbering.simplify ;
IN: compiler.cfg.value-numbering.simd
M: ##alien-vector rewrite rewrite-alien-addressing ;
M: ##set-alien-vector rewrite rewrite-alien-addressing ;
! Some lame constant folding for SIMD intrinsics. Eventually this
! should be redone completely.
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
2dup [ rep>> ] bi@ eq? [
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
[ [ shuffle>> ] bi@ nths ]
[ drop rep>> ]
2tri \ ##shuffle-vector-imm new-insn
] [ 2drop f ] if ;
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
2dup length swap length /i group nths concat ;
: fold-shuffle-vector-imm ( insn expr -- insn' )
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
M: ##shuffle-vector-imm rewrite
dup src>> vreg>expr {
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
[ 2drop f ]
} cond ;
: (fold-scalar>vector) ( insn bytes -- insn' )
[ [ dst>> ] [ rep>> rep-length ] bi ] dip <repetition> concat
\ ##load-constant new-insn ;
: fold-scalar>vector ( insn expr -- insn' )
value>> over rep>> {
{ float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
{ double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
[ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
} case ;
M: ##scalar>vector rewrite
dup src>> vreg>expr dup constant-expr?
[ fold-scalar>vector ] [ 2drop f ] if ;
M: ##xor-vector rewrite
dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
: vector-not? ( expr -- ? )
{
[ not-vector-expr? ]
[ {
[ xor-vector-expr? ]
[ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
} 1&& ]
} 1|| ;
GENERIC: vector-not-src ( expr -- vreg )
M: not-vector-expr vector-not-src src>> vn>vreg ;
M: xor-vector-expr vector-not-src
dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
M: ##and-vector rewrite
{
{ [ dup src1>> vreg>expr vector-not? ] [
{
[ dst>> ]
[ src1>> vreg>expr vector-not-src ]
[ src2>> ]
[ rep>> ]
} cleave \ ##andn-vector new-insn
] }
{ [ dup src2>> vreg>expr vector-not? ] [
{
[ dst>> ]
[ src2>> vreg>expr vector-not-src ]
[ src1>> ]
[ rep>> ]
} cleave \ ##andn-vector new-insn
] }
[ drop f ]
} cond ;
M: ##andn-vector rewrite
dup src1>> vreg>expr vector-not? [
{
[ dst>> ]
[ src1>> vreg>expr vector-not-src ]
[ src2>> ]
[ rep>> ]
} cleave \ ##and-vector new-insn
] [ drop f ] if ;
M: scalar>vector-expr simplify*
src>> vn>expr {
{ [ dup vector>scalar-expr? ] [ src>> ] }
[ drop f ]
} cond ;
M: shuffle-vector-imm-expr simplify*
[ src>> ] [ shuffle>> ] [ rep>> rep-length iota ] tri
sequence= [ drop f ] unless ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors combinators classes math layouts
sequences math.vectors.simd.intrinsics
sequences
compiler.cfg.instructions
compiler.cfg.value-numbering.graph
compiler.cfg.value-numbering.expressions ;
@ -130,16 +130,6 @@ M: box-displaced-alien-expr simplify*
[ 2drop f ]
} cond ;
M: scalar>vector-expr simplify*
src>> vn>expr {
{ [ dup vector>scalar-expr? ] [ src>> ] }
[ drop f ]
} cond ;
M: shuffle-vector-imm-expr simplify*
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
sequence= [ drop f ] unless ;
M: expr simplify* drop f ;
: simplify ( expr -- vn )

View File

@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit
accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
compiler.cfg.ssa.destruction compiler.cfg.loop-detection
compiler.cfg.representations compiler.cfg assocs vectors arrays
layouts literals namespaces alien ;
layouts literals namespaces alien compiler.cfg.value-numbering.simd ;
IN: compiler.cfg.value-numbering.tests
: trim-temps ( insns -- insns )

View File

@ -16,8 +16,7 @@ compiler.tree.propagation.slots
compiler.tree.propagation.simple
compiler.tree.propagation.constraints
compiler.tree.propagation.call-effect
compiler.tree.propagation.transforms
compiler.tree.propagation.simd ;
compiler.tree.propagation.transforms ;
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
IN: compiler.tree.propagation.known-words

View File

@ -1,57 +1,77 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd.intrinsics ;
USING: accessors assocs byte-arrays combinators compiler.cfg.builder
continuations fry sequences compiler.tree.propagation.info
cpu.architecture kernel words make math math.intervals
math.vectors.simd.intrinsics namespaces ;
IN: compiler.tree.propagation.simd
{
(simd-v+)
(simd-v-)
(simd-vneg)
(simd-vabs)
(simd-v+-)
(simd-v*)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-sum)
(simd-vsqrt)
(simd-vbitand)
(simd-vbitandn)
(simd-vbitor)
(simd-vbitxor)
(simd-vbitnot)
(simd-vand)
(simd-vandn)
(simd-vor)
(simd-vxor)
(simd-vnot)
(simd-vlshift)
(simd-vrshift)
(simd-hlshift)
(simd-hrshift)
(simd-vshuffle-bytes)
(simd-vshuffle-elements)
(simd-(vmerge-head))
(simd-(vmerge-tail))
(simd-(v>float))
(simd-(v>integer))
(simd-(vpack-signed))
(simd-(vpack-unsigned))
(simd-(vunpack-head))
(simd-(vunpack-tail))
(simd-v<=)
(simd-v<)
(simd-v=)
(simd-v>)
(simd-v>=)
(simd-vunordered?)
(simd-with)
(simd-gather-2)
(simd-gather-4)
alien-vector
} [ { byte-array } "default-output-classes" set-word-prop ] each
CONSTANT: vector>vector-intrinsics
{
(simd-v+)
(simd-v-)
(simd-vneg)
(simd-v+-)
(simd-vs+)
(simd-vs-)
(simd-vs*)
(simd-v*)
(simd-v/)
(simd-vmin)
(simd-vmax)
(simd-vsqrt)
(simd-vabs)
(simd-vbitand)
(simd-vbitandn)
(simd-vbitor)
(simd-vbitxor)
(simd-vbitnot)
(simd-vand)
(simd-vandn)
(simd-vor)
(simd-vxor)
(simd-vnot)
(simd-vlshift)
(simd-vrshift)
(simd-hlshift)
(simd-hrshift)
(simd-vshuffle-elements)
(simd-vshuffle-bytes)
(simd-vmerge-head)
(simd-vmerge-tail)
(simd-v<=)
(simd-v<)
(simd-v=)
(simd-v>)
(simd-v>=)
(simd-vunordered?)
(simd-v>float)
(simd-v>integer)
(simd-vpack-signed)
(simd-vpack-unsigned)
(simd-vunpack-head)
(simd-vunpack-tail)
(simd-with)
(simd-gather-2)
(simd-gather-4)
alien-vector
}
CONSTANT: vector-other-intrinsics
{
(simd-v.)
(simd-sum)
(simd-vany?)
(simd-vall?)
(simd-vnone?)
(simd-select)
set-alien-vector
}
: vector-intrinsics ( -- x )
vector>vector-intrinsics vector-other-intrinsics append ;
vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
: scalar-output-class ( rep -- class )
dup literal?>> [
@ -79,12 +99,24 @@ IN: compiler.tree.propagation.simd
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
! If SIMD is not available, inline alien-vector and set-alien-vector
! to get a speedup
: clone-with-value-infos ( node -- node' )
clone dup in-d>> [ dup value-info ] H{ } map>assoc >>info ;
: try-intrinsic ( node intrinsic-quot -- ? )
'[
_ clone-with-value-infos
_ with-dummy-cfg-builder
t
] [ drop f ] recover ;
: inline-unless-intrinsic ( word -- )
dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ]
dup '[
_ swap over "intrinsic" word-prop
"always-inline-simd-intrinsics" get not swap and
! word node intrinsic
[ try-intrinsic [ drop f ] [ def>> ] if ]
[ drop def>> ] if*
]
"custom-inlining" set-word-prop ;
\ alien-vector inline-unless-intrinsic
\ set-alien-vector inline-unless-intrinsic
vector-intrinsics [ inline-unless-intrinsic ] each

View File

@ -1,5 +0,0 @@
USING: cords strings tools.test kernel sequences ;
IN: cords.tests
[ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test
[ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test

View File

@ -1,72 +0,0 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs sequences sorting binary-search math
math.order arrays combinators kernel ;
IN: cords
<PRIVATE
TUPLE: simple-cord
{ first read-only } { second read-only } ;
M: simple-cord length
[ first>> length ] [ second>> length ] bi + ; inline
M: simple-cord virtual-exemplar first>> ; inline
M: simple-cord virtual@
2dup first>> length <
[ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline
TUPLE: multi-cord
{ count read-only } { seqs read-only } ;
M: multi-cord length count>> ; inline
M: multi-cord virtual@
dupd
seqs>> [ first <=> ] with search nip
[ first - ] [ second ] bi ; inline
M: multi-cord virtual-exemplar
seqs>> [ f ] [ first second ] if-empty ; inline
: <cord> ( seqs -- cord )
dup length 2 = [
first2 simple-cord boa
] [
[ 0 [ length + ] accumulate ] keep zip multi-cord boa
] if ; inline
PRIVATE>
UNION: cord simple-cord multi-cord ;
INSTANCE: cord virtual-sequence
INSTANCE: multi-cord virtual-sequence
: cord-append ( seq1 seq2 -- cord )
{
{ [ over empty? ] [ nip ] }
{ [ dup empty? ] [ drop ] }
{ [ 2dup [ cord? ] both? ] [ [ seqs>> values ] bi@ append <cord> ] }
{ [ over cord? ] [ [ seqs>> values ] dip suffix <cord> ] }
{ [ dup cord? ] [ seqs>> values swap prefix <cord> ] }
[ 2array <cord> ]
} cond ; inline
: cord-concat ( seqs -- cord )
{
{ [ dup empty? ] [ drop f ] }
{ [ dup length 1 = ] [ first ] }
[
[
{
{ [ dup cord? ] [ seqs>> values ] }
{ [ dup empty? ] [ drop { } ] }
[ 1array ]
} cond
] map concat <cord>
]
} cond ; inline

View File

@ -95,7 +95,7 @@ double-rep
vector-rep
scalar-rep ;
: unsign-rep ( rep -- rep' )
: signed-rep ( rep -- rep' )
{
{ uint-4-rep int-4-rep }
{ ulonglong-2-rep longlong-2-rep }
@ -105,7 +105,7 @@ scalar-rep ;
{ ushort-scalar-rep short-scalar-rep }
{ uint-scalar-rep int-scalar-rep }
{ ulonglong-scalar-rep longlong-scalar-rep }
} ?at drop ;
} ?at drop ; foldable
: widen-vector-rep ( rep -- rep' )
{
@ -115,7 +115,19 @@ scalar-rep ;
{ uchar-16-rep ushort-8-rep }
{ ushort-8-rep uint-4-rep }
{ uint-4-rep ulonglong-2-rep }
} at ;
{ float-4-rep double-2-rep }
} at ; foldable
: narrow-vector-rep ( rep -- rep' )
{
{ short-8-rep char-16-rep }
{ int-4-rep short-8-rep }
{ longlong-2-rep int-4-rep }
{ ushort-8-rep uchar-16-rep }
{ uint-4-rep ushort-8-rep }
{ ulonglong-2-rep uint-4-rep }
{ double-2-rep float-4-rep }
} at ; foldable
! Register classes
SINGLETONS: int-regs float-regs ;
@ -277,8 +289,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %abs-vector cpu ( dst src rep -- )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
@ -385,6 +397,10 @@ M: object %shr-vector-imm-reps { } ;
M: object %horizontal-shl-vector-imm-reps { } ;
M: object %horizontal-shr-vector-imm-reps { } ;
ALIAS: %merge-vector-head-reps %merge-vector-reps
ALIAS: %merge-vector-tail-reps %merge-vector-reps
ALIAS: %tail>head-vector-reps %unpack-vector-head-reps
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src -- )
HOOK: %box-alien cpu ( dst src temp -- )

View File

@ -650,7 +650,7 @@ M: x86 %fill-vector-reps
} available-reps ;
! M:: x86 %broadcast-vector ( dst src rep -- )
! rep unsign-rep {
! rep signed-rep {
! { float-4-rep [
! dst src float-4-rep %copy
! dst dst { 0 0 0 0 } SHUFPS
@ -687,7 +687,7 @@ M: x86 %fill-vector-reps
! } available-reps ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep unsign-rep {
rep signed-rep {
{ float-4-rep [
dst src1 float-4-rep %copy
dst src2 UNPCKLPS
@ -710,7 +710,7 @@ M: x86 %gather-vector-4-reps
} available-reps ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep unsign-rep {
rep signed-rep {
{ double-2-rep [
dst src1 double-2-rep %copy
dst src2 MOVLHPS
@ -763,7 +763,7 @@ M: x86 %gather-vector-2-reps
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
dst shuffle rep unsign-rep {
dst shuffle rep signed-rep {
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
{ float-4-rep [ float-4-shuffle ] }
{ int-4-rep [ int-4-shuffle ] }
@ -786,7 +786,7 @@ M: x86 %shuffle-vector-reps
M: x86 %merge-vector-head
[ two-operand ] keep
unsign-rep {
signed-rep {
{ double-2-rep [ MOVLHPS ] }
{ float-4-rep [ UNPCKLPS ] }
{ longlong-2-rep [ PUNPCKLQDQ ] }
@ -797,7 +797,7 @@ M: x86 %merge-vector-head
M: x86 %merge-vector-tail
[ two-operand ] keep
unsign-rep {
signed-rep {
{ double-2-rep [ UNPCKHPD ] }
{ float-4-rep [ UNPCKHPS ] }
{ longlong-2-rep [ PUNPCKHQDQ ] }
@ -826,7 +826,7 @@ M: x86 %signed-pack-vector-reps
M: x86 %unsigned-pack-vector
[ two-operand ] keep
unsign-rep {
signed-rep {
{ int-4-rep [ PACKUSDW ] }
{ short-8-rep [ PACKUSWB ] }
} case ;
@ -896,7 +896,7 @@ M: x86 %float>integer-vector-reps
} case ;
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
rep unsign-rep :> rep'
rep signed-rep :> rep'
dst src rep' {
{ longlong-2-rep [ int64 call ] }
{ int-4-rep [ int32 call ] }
@ -1162,34 +1162,28 @@ M: x86 %max-vector-reps
M: x86 %dot-vector
[ two-operand ] keep
{
{ float-4-rep [
sse4.1?
[ HEX: ff DPPS ]
[ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
if
] }
{ double-2-rep [
sse4.1?
[ HEX: ff DPPD ]
[ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
if
] }
{ float-4-rep [ HEX: ff DPPS ] }
{ double-2-rep [ HEX: ff DPPD ] }
} case ;
M: x86 %dot-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
{ sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
signed-rep {
{ float-4-rep [ HADDPS ] }
{ double-2-rep [ HADDPD ] }
{ int-4-rep [ PHADDD ] }
{ short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
@ -1197,7 +1191,7 @@ M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shl-vector-imm-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
@ -1205,7 +1199,7 @@ M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shr-vector-imm-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
@ -1329,8 +1323,15 @@ M: x86 %shr-vector-imm-reps %shr-vector-reps ;
M: x86 %integer>scalar drop MOVD ;
! XXX the longlong versions won't work on x86.32
M:: x86 %scalar>integer ( dst src rep -- )
rep {
{ longlong-scalar-rep [
dst src MOVD
] }
{ ulonglong-scalar-rep [
dst src MOVD
] }
{ int-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 32-bit-version-of
@ -1401,7 +1402,6 @@ M: x86 immediate-bitwise? ( n -- ? )
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
enable-simd
enable-min/max
enable-fixnum-log2

View File

@ -5,6 +5,8 @@ IN: fry.tests
SYMBOLS: a b c d e f g h ;
[ [ ] ] [ '[ ] ] unit-test
[ [ + ] ] [ '[ + ] ] unit-test
[ [ 1 ] ] [ 1 '[ _ ] ] unit-test
[ [ 1 ] ] [ [ 1 ] '[ @ ] ] unit-test
[ [ 1 2 ] ] [ [ 1 ] [ 2 ] '[ @ @ ] ] unit-test

View File

@ -136,10 +136,12 @@ TUPLE: dredge-fry-state
PRIVATE>
M: callable fry ( quot -- quot' )
0 swap <dredge-fry>
[ dredge-fry ] [
[ prequot>> >quotation ]
[ quot>> >quotation shallow-fry ] bi append
] bi ;
[ [ [ ] ] ] [
0 swap <dredge-fry>
[ dredge-fry ] [
[ prequot>> >quotation ]
[ quot>> >quotation shallow-fry ] bi append
] bi
] if-empty ;
SYNTAX: '[ parse-quotation fry append! ;

View File

@ -87,7 +87,6 @@ ARTICLE: "io.mmap.examples" "Memory-mapped file examples"
"Normalize a file containing packed quadrupes of floats:"
{ $code
"USING: kernel io.mmap math.vectors math.vectors.simd" "sequences specialized-arrays ;"
"SIMD: float"
"SPECIALIZED-ARRAY: float-4"
""
"\"mydata.dat\" float-4 ["

View File

@ -41,7 +41,6 @@ CONSTANT: b 2
[ 0 ] [ BIN: 0 bit-count ] unit-test
[ 1 ] [ BIN: 1 bit-count ] unit-test
SIMD: uint
SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: uint-4

View File

@ -1,21 +0,0 @@
! (c)Joe Groff bsd license
USING: accessors alien.c-types arrays assocs classes combinators
cords fry kernel math math.vectors sequences ;
IN: math.vectors.conversion.backend
: saturate-map-as ( v quot result -- w )
[ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline
: (v>float) ( i to-type -- f )
[ >float ] swap new map-as ;
: (v>integer) ( f to-type -- i )
[ >integer ] swap new map-as ;
: (vpack-signed) ( a b to-type -- ab )
[ cord-append [ ] ] dip new saturate-map-as ;
: (vpack-unsigned) ( a b to-type -- ab )
[ cord-append [ ] ] dip new saturate-map-as ;
: (vunpack-head) ( ab to-type -- a )
[ dup length 2 /i head-slice ] dip new like ;
: (vunpack-tail) ( ab to-type -- b )
[ dup length 2 /i tail-slice ] dip new like ;

View File

@ -22,7 +22,7 @@ HELP: vconvert
}
{ $description "Converts SIMD vectors of " { $snippet "from-type" } " to " { $snippet "to-type" } ". The number of inputs and outputs depends on the relationship of the two types:"
{ $list
{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-8" } " to " { $snippet "float-8" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "float-4" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
{ "Likewise, if " { $snippet "to-type" } " is an integer vector type with the same byte length and element count as the floating-point vector type " { $snippet "from-type" } ", " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and truncates its elements to integers, outputting one vector of " { $snippet "to-type" } "." }
{ "If " { $snippet "to-type" } " is a vector type with the same byte length as and twice the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "ushort-8" } ", from " { $snippet "double-2" } " to " { $snippet "float-4" } ", or from " { $snippet "short-8" } " to " { $snippet "char-16" } "), " { $snippet "vconvert" } " takes two vectors of " { $snippet "from-type" } " and packs them into one vector of " { $snippet "to-type" } ", saturating values too large or small to be representable as elements of " { $snippet "to-type" } "." }
{ "If " { $snippet "to-type" } " is a vector type with the same byte length as and half the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "ushort-8" } " to " { $snippet "int-4" } ", from " { $snippet "float-4" } " to " { $snippet "double-2" } ", or from " { $snippet "char-16" } " to " { $snippet "short-8" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and unpacks it into two vectors of " { $snippet "to-type" } "." }
@ -39,26 +39,23 @@ HELP: vconvert
"Conversion between integer and float vectors:"
{ $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
prettyprint ;
SIMDS: int float longlong double ;
int-8{ 0 1 2 3 4 5 6 7 } int-8 float-8 vconvert .
int-4{ 0 1 2 3 } int-4 float-4 vconvert .
double-2{ 1.25 3.75 } double-2 longlong-2 vconvert ."""
"""float-8{ 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 }
"""float-4{ 0.0 1.0 2.0 3.0 }
longlong-2{ 1 3 }""" }
"Packing conversions:"
{ $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
prettyprint ;
SIMDS: ushort int float double ;
int-4{ -8 70000 6000 50 } int-4{ 4 3 2 -1 } int-4 ushort-8 vconvert .
double-4{ 0.0 1.5 1.0e100 2.0 }
double-4{ -1.0e100 0.0 1.0 2.0 } double-4 float-8 vconvert ."""
double-2{ 0.0 1.0e100 }
double-2{ -1.0e100 0.0 } double-2 float-4 vconvert ."""
"""ushort-8{ 0 65535 6000 50 4 3 2 0 }
float-8{ 0.0 1.5 1/0. 2.0 -1/0. 0.0 1.0 2.0 }""" }
float-4{ 0.0 1/0. -1/0. 0.0 }""" }
"Unpacking conversions:"
{ $example """USING: alien.c-types kernel math.vectors.conversion
math.vectors.simd prettyprint ;
SIMDS: uchar short ;
uchar-16{ 8 70 60 50 4 30 200 1 9 10 110 102 133 143 115 0 }
uchar-16 short-8 vconvert [ . ] bi@"""

View File

@ -3,16 +3,6 @@ USING: accessors arrays compiler continuations generalizations
kernel kernel.private locals math.vectors.conversion math.vectors.simd
sequences stack-checker tools.test ;
FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
SIMD: uchar
SIMD: char
SIMD: ushort
SIMD: short
SIMD: uint
SIMD: int
SIMD: ulonglong
SIMD: longlong
SIMD: float
SIMD: double
IN: math.vectors.conversion.tests
ERROR: optimized-vconvert-inconsistent
@ -59,12 +49,12 @@ MACRO:: test-vconvert ( from-type to-type -- )
[ double-2{ -5.0 1.0 } ]
[ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test
[ longlong-4{ -5 1 2 6 } ]
[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test
[ longlong-2{ -5 1 } ]
[ double-2{ -5.0 1.0 } double-2 longlong-2 test-vconvert ] unit-test
! TODO we should be able to do double->int pack
! [ int-8{ -5 1 2 6 12 34 -56 78 } ]
[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ]
! [ int-4{ -5 1 12 34 } ]
[ double-2{ -5.0 1.0 } double-2{ 12.0 34.0 } double-2 int-4 test-vconvert ]
[ error>> bad-vconvert? ] must-fail-with
[ float-4{ -1.25 2.0 3.0 -4.0 } ]
@ -76,10 +66,10 @@ MACRO:: test-vconvert ( from-type to-type -- )
[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test
[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ]
[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
[
int-8{ -1 2 3 -40000 3 2 1 0 }
int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert
int-4{ -1 2 3 -40000 }
int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert
] unit-test
[ ushort-8{ 0 2 3 0 5 60000 0 65535 } ]
@ -97,15 +87,6 @@ MACRO:: test-vconvert ( from-type to-type -- )
uchar-16 ushort-8 test-vconvert
] unit-test
! TODO we should be able to do 256->128 pack
! [ float-4{ -1.25 2.0 3.0 -4.0 } ]
[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ]
[ error>> bad-vconvert? ] must-fail-with
! [ int-4{ -1 2 3 -4 } ]
[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ]
[ error>> bad-vconvert? ] must-fail-with
[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ]
[ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test
@ -121,8 +102,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
[ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ]
[ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test
[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
[ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ]
[ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test
@ -130,13 +111,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
[ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ]
[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test
[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
! TODO we should be able to do 128->256 unpack
! [ longlong-4{ 1 2 3 4 } ]
[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ]
[ error>> bad-vconvert? ] must-fail-with
[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
! TODO we should be able to do multi-tier pack/unpack
! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ]

View File

@ -1,8 +1,10 @@
! (c)Joe Groff bsd license
USING: accessors alien.c-types arrays assocs classes combinators
combinators.short-circuit cords fry kernel locals math
math.vectors math.vectors.conversion.backend sequences ;
FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
combinators.short-circuit fry kernel locals math
math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ;
FROM: alien.c-types =>
char uchar short ushort int uint longlong ulonglong
float double ;
IN: math.vectors.conversion
ERROR: bad-vconvert from-type to-type ;
@ -30,11 +32,11 @@ ERROR: bad-vconvert-input value expected-type ;
}
{
[ from-element float-type? ]
[ [ to-type (v>integer) ] ]
[ from-type new simd-rep '[ underlying>> _ (simd-v>integer) to-type boa ] ]
}
{
[ to-element float-type? ]
[ [ to-type (v>float) ] ]
[ from-type new simd-rep '[ underlying>> _ (simd-v>float) to-type boa ] ]
}
} cond
[ from-type check-vconvert-type ] prepose ;
@ -47,10 +49,18 @@ ERROR: bad-vconvert-input value expected-type ;
} 0|| [ from-type to-type bad-vconvert ] when ;
:: [[vpack-unsigned]] ( from-type to-type -- quot )
[ [ from-type check-vconvert-type ] bi@ to-type (vpack-unsigned) ] ;
from-type new simd-rep
'[
[ from-type check-vconvert-type underlying>> ] bi@
_ (simd-vpack-unsigned) to-type boa
] ;
:: [[vpack-signed]] ( from-type to-type -- quot )
[ [ from-type check-vconvert-type ] bi@ to-type (vpack-signed) ] ;
from-type new simd-rep
'[
[ from-type check-vconvert-type underlying>> ] bi@
_ (simd-vpack-signed) to-type boa
] ;
:: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot )
from-size to-size /i log2 :> steps
@ -68,9 +78,11 @@ ERROR: bad-vconvert-input value expected-type ;
} 0|| [ from-type to-type bad-vconvert ] when ;
:: [[vunpack]] ( from-type to-type -- quot )
[
from-type check-vconvert-type
[ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi
from-type new simd-rep
'[
from-type check-vconvert-type underlying>> _
[ (simd-vunpack-head) to-type boa ]
[ (simd-vunpack-tail) to-type boa ] 2bi
] ;
:: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot )
@ -81,8 +93,8 @@ ERROR: bad-vconvert-input value expected-type ;
PRIVATE>
MACRO:: vconvert ( from-type to-type -- )
from-type new [ element-type ] [ byte-length ] bi :> ( from-element from-length )
to-type new [ element-type ] [ byte-length ] bi :> ( to-element to-length )
from-type new [ simd-element-type ] [ byte-length ] bi :> ( from-element from-length )
to-type new [ simd-element-type ] [ byte-length ] bi :> ( to-element to-length )
from-element heap-size :> from-size
to-element heap-size :> to-size

View File

@ -0,0 +1,87 @@
USING: accessors alien.c-types arrays byte-arrays
cpu.architecture effects functors generalizations kernel lexer
math math.vectors.simd math.vectors.simd.intrinsics parser
prettyprint.custom quotations sequences sequences.cords words ;
IN: math.vectors.simd.cords
<<
<PRIVATE
FUNCTOR: (define-simd-128-cord) ( A/2 A -- )
A-rep IS ${A/2}-rep
>A/2 IS >${A/2}
A/2-boa IS ${A/2}-boa
A/2-with IS ${A/2}-with
A/2-cast IS ${A/2}-cast
>A DEFINES >${A}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
A-cast DEFINES ${A}-cast
A{ DEFINES ${A}{
N [ A-rep rep-length ]
BOA-EFFECT [ N 2 * "n" <repetition> >array { "v" } <effect> ]
WHERE
: >A ( seq -- A )
[ N head >A/2 ]
[ N tail >A/2 ] bi cord-append ;
\ A-boa
{ N ndip A/2-boa cord-append } { A/2-boa } >quotation prefix >quotation
BOA-EFFECT define-inline
: A-with ( n -- v )
[ A/2-with ] [ A/2-with ] bi cord-append ;
: A-cast ( v -- v' )
[ A/2-cast ] cord-map ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
M: A pprint-delims drop \ A{ \ } ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
<c-type>
byte-array >>class
A >>boxed-class
[
[ A-rep alien-vector A/2 boa ]
[ 16 + A-rep alien-vector A/2 boa ] 2bi cord-append
] >>getter
[
[ [ head>> underlying>> ] 2dip A-rep set-alien-vector ]
[ [ tail>> underlying>> ] 2dip 16 + A-rep set-alien-vector ] 3bi
] >>setter
32 >>size
16 >>align
A-rep >>rep
\ A typedef
;FUNCTOR
: define-simd-128-cord ( A/2 T -- )
[ define-specialized-cord ]
[ create-in (define-simd-128-cord) ] 2bi ;
SYNTAX: SIMD-128-CORD:
scan-word scan define-simd-128-cord ;
PRIVATE>
>>
SIMD-128-CORD: char-16 char-32
SIMD-128-CORD: uchar-16 uchar-32
SIMD-128-CORD: short-8 short-16
SIMD-128-CORD: ushort-8 ushort-16
SIMD-128-CORD: int-4 int-8
SIMD-128-CORD: uint-4 uint-8
SIMD-128-CORD: longlong-2 longlong-4
SIMD-128-CORD: ulonglong-2 ulonglong-4
SIMD-128-CORD: float-4 float-8
SIMD-128-CORD: double-2 double-4

View File

@ -1 +0,0 @@
Slava Pestov

View File

@ -1,524 +0,0 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs byte-arrays classes classes.algebra effects fry
functors generalizations kernel literals locals math math.functions
math.vectors math.vectors.private math.vectors.simd.intrinsics
math.vectors.conversion.backend
math.vectors.specialization parser prettyprint.custom sequences
sequences.private strings words definitions macros cpu.architecture
namespaces arrays quotations combinators combinators.short-circuit sets
layouts ;
QUALIFIED-WITH: alien.c-types c
QUALIFIED: math.private
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
: vector-true-value ( class -- value )
{
{ [ dup integer class<= ] [ drop -1 ] }
{ [ dup float class<= ] [ drop -1 bits>double ] }
} cond ; foldable
: vector-false-value ( class -- value )
{
{ [ dup integer class<= ] [ drop 0 ] }
{ [ dup float class<= ] [ drop 0.0 ] }
} cond ; foldable
: boolean>element ( bool/elt class -- elt )
swap {
{ t [ vector-true-value ] }
{ f [ vector-false-value ] }
[ nip ]
} case ; inline
MACRO: simd-boa ( rep class -- simd-array )
[ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
: can-be-unboxed? ( type -- ? )
{
{ c:float [ \ math.private:float+ "intrinsic" word-prop ] }
{ c:double [ \ math.private:float+ "intrinsic" word-prop ] }
[ c:heap-size cell < ]
} case ;
: simd-boa-fast? ( rep -- ? )
[ dup rep-gather-word supported-simd-op? ]
[ rep-component-type can-be-unboxed? ]
bi and ;
:: define-boa-custom-inlining ( word rep class -- )
word [
drop
rep simd-boa-fast? [
[ rep (simd-boa) class boa ]
] [ word def>> ] if
] "custom-inlining" set-word-prop ;
: simd-with ( rep class x -- simd-array )
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
: simd-with/nth-fast? ( rep -- ? )
[ \ (simd-vshuffle-elements) supported-simd-op? ]
[ rep-component-type can-be-unboxed? ]
bi and ;
:: define-with-custom-inlining ( word rep class -- )
word [
drop
rep simd-with/nth-fast? [
[ rep rep-coerce rep (simd-with) class boa ]
] [ word def>> ] if
] "custom-inlining" set-word-prop ;
: simd-nth-fast ( rep -- quot )
[ rep-components ] keep
'[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
'[ swap >fixnum _ case ] ;
: simd-nth-slow ( rep -- quot )
rep-component-type dup c:c-type-getter-boxer c:array-accessor ;
MACRO: simd-nth ( rep -- x )
dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ;
: boa-effect ( rep n -- effect )
[ rep-components ] dip *
[ CHAR: a + 1string ] map
{ "simd-vector" } <effect> ;
: supported-simd-ops ( assoc rep -- assoc' )
[ simd-ops get ] dip
'[ nip _ swap supported-simd-op? ] assoc-filter
'[ drop _ key? ] assoc-filter ;
ERROR: bad-schema op schema ;
:: op-wrapper ( op specials schemas -- wrapper )
op {
[ specials at ]
[ word-schema schemas at ]
[ dup word-schema bad-schema ]
} 1|| ;
: low-level-ops ( simd-ops specials schemas -- alist )
'[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ;
:: high-level-ops ( ctor elt-class -- assoc )
! Some SIMD operations are defined in terms of others.
{
{ vbroadcast [ swap nth ctor execute ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ v+n [ ctor execute v+ ] }
{ n-v [ [ ctor execute ] dip v- ] }
{ v-n [ ctor execute v- ] }
{ n*v [ [ ctor execute ] dip v* ] }
{ v*n [ ctor execute v* ] }
{ n/v [ [ ctor execute ] dip v/ ] }
{ v/n [ ctor execute v/ ] }
{ norm-sq [ dup v. assert-positive ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
}
! To compute dot product and distance with integer vectors, we
! have to do things less efficiently, with integer overflow checks,
! in the general case.
elt-class float = [ { distance [ v- norm ] } suffix ] when ;
TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
: define-simd ( simd -- )
dup rep>> rep-component-type c:c-type-boxed-class >>elt-class
{
[ class>> ]
[ elt-class>> ]
[ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ]
[ rep>> supported-simd-ops ]
[ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
} cleave
specialize-vector-words ;
:: define-simd-128-type ( class rep -- )
c:<c-type>
byte-array >>class
class >>boxed-class
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
16 >>align
16 >>align-first
rep >>rep
class c:typedef ;
: (define-simd-128) ( simd -- )
simd-ops get >>ops
[ define-simd ]
[ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
FUNCTOR: define-simd-128 ( T -- )
N [ 16 T c:heap-size /i ]
A DEFINES-CLASS ${T}-${N}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
A-cast DEFINES ${A}-cast
>A DEFINES >${A}
A{ DEFINES ${A}{
SET-NTH [ T dup c:c-setter c:array-accessor ]
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op
A-element-class [ A-rep rep-component-type c:c-type-boxed-class ]
WHERE
TUPLE: A
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
INSTANCE: A simd-128
M: A clone underlying>> clone \ A boa ; inline
M: A length drop N ; inline
M: A equal?
over \ A instance? [ v= vall? ] [ 2drop f ] if ;
M: A nth-unsafe underlying>> A-rep simd-nth ; inline
M: A set-nth-unsafe
[ A-element-class boolean>element ] 2dip
underlying>> SET-NTH call ; inline
: >A ( seq -- simd-array ) \ A new clone-like ;
M: A like drop dup \ A instance? [ >A ] unless ; inline
M: A new-underlying drop \ A boa ; inline
M: A new-sequence
drop dup N =
[ drop 16 <byte-array> \ A boa ]
[ N bad-length ]
if ; inline
M: A c:byte-length underlying>> length ; inline
M: A element-type drop A-rep rep-component-type ;
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
\ A-with \ A-rep \ A define-with-custom-inlining
\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
\ A-rep rep-gather-word [
\ A-boa \ A-rep \ A define-boa-custom-inlining
] when
: A-cast ( simd-array -- simd-array' )
underlying>> \ A boa ; inline
INSTANCE: A sequence
<PRIVATE
: A-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
: A-vn->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
: A-vv->n-op ( v1 v2 quot -- n )
[ [ underlying>> ] bi@ A-rep ] dip call ; inline
: A-v->v-op ( v1 quot -- v2 )
[ underlying>> A-rep ] dip call \ A boa ; inline
: A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline
: A-v-conversion-op ( v1 to-type quot -- v2 )
swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline
: A-vv-conversion-op ( v1 v2 to-type quot -- v2 )
swap {
[ underlying>> ]
[ underlying>> A-rep ]
[ call ]
[ '[ _ boa ] call( u -- v ) ]
} spread ; inline
simd new
\ A >>class
\ A-with >>ctor
\ A-rep >>rep
{
{ (v>float) A-v-conversion-op }
{ (v>integer) A-v-conversion-op }
{ (vpack-signed) A-vv-conversion-op }
{ (vpack-unsigned) A-vv-conversion-op }
{ (vunpack-head) A-v-conversion-op }
{ (vunpack-tail) A-v-conversion-op }
} >>special-wrappers
{
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
{ { +vector+ +any-vector+ -> +vector+ } A-vv->v-op }
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
{ { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
{ { +vector+ +vector+ -> +boolean+ } A-vv->n-op }
{ { +vector+ -> +vector+ } A-v->v-op }
{ { +vector+ -> +scalar+ } A-v->n-op }
{ { +vector+ -> +boolean+ } A-v->n-op }
{ { +vector+ -> +nonnegative+ } A-v->n-op }
} >>schema-wrappers
(define-simd-128)
PRIVATE>
;FUNCTOR
! Synthesize 256-bit vectors from a pair of 128-bit vectors
SLOT: underlying1
SLOT: underlying2
:: define-simd-256-type ( class rep -- )
c:<c-type>
class >>class
class >>boxed-class
[
[ rep alien-vector ]
[ 16 + >fixnum rep alien-vector ] 2bi
class boa
] >>getter
[
[ [ underlying1>> ] 2dip rep set-alien-vector ]
[ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
16 >>align
16 >>align-first
rep >>rep
class c:typedef ;
: (define-simd-256) ( simd -- )
simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
[ define-simd ]
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
FUNCTOR: define-simd-256 ( T -- )
N [ 32 T c:heap-size /i ]
N/2 [ N 2 /i ]
A/2 IS ${T}-${N/2}
A/2-boa IS ${A/2}-boa
A/2-with IS ${A/2}-with
A DEFINES-CLASS ${T}-${N}
A-boa DEFINES ${A}-boa
A-with DEFINES ${A}-with
A-cast DEFINES ${A}-cast
>A DEFINES >${A}
A{ DEFINES ${A}{
A-deref DEFINES-PRIVATE ${A}-deref
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v.-op DEFINES-PRIVATE ${A}-v.-op
(A-v->n-op) DEFINES-PRIVATE (${A}-v->v-op)
A-sum-op DEFINES-PRIVATE ${A}-sum-op
A-vany-op DEFINES-PRIVATE ${A}-vany-op
A-vall-op DEFINES-PRIVATE ${A}-vall-op
A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op
A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op
A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
A-vpack-op DEFINES-PRIVATE ${A}-vpack-op
A-vunpack-head-op DEFINES-PRIVATE ${A}-vunpack-head-op
A-vunpack-tail-op DEFINES-PRIVATE ${A}-vunpack-tail-op
WHERE
SLOT: underlying1
SLOT: underlying2
TUPLE: A
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
INSTANCE: A simd-256
M: A clone
[ underlying1>> clone ] [ underlying2>> clone ] bi
\ A boa ; inline
M: A length drop N ; inline
M: A equal?
over \ A instance? [ v= vall? ] [ 2drop f ] if ;
: A-deref ( n seq -- n' seq' )
over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline
M: A nth-unsafe A-deref nth-unsafe ; inline
M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
: >A ( seq -- simd-array ) \ A new clone-like ;
M: A like drop dup \ A instance? [ >A ] unless ; inline
M: A new-sequence
drop dup N =
[ drop 16 <byte-array> 16 <byte-array> \ A boa ]
[ N bad-length ]
if ; inline
M: A c:byte-length drop 32 ; inline
M: A element-type drop A-rep rep-component-type ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
: A-with ( x -- simd-array )
[ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
\ A boa ; inline
: A-boa ( ... -- simd-array )
[ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
\ A boa ; inline
\ A-rep 2 boa-effect \ A-boa set-stack-effect
: A-cast ( simd-array -- simd-array' )
[ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline
INSTANCE: A sequence
: A-vv->v-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
\ A boa ; inline
: A-vn->v-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] dip A-rep ] dip call ]
[ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
\ A boa ; inline
: A-v->v-op ( v1 combine-quot -- v2 )
[ [ underlying1>> A-rep ] dip call ]
[ [ underlying2>> A-rep ] dip call ] 2bi
\ A boa ; inline
: A-v.-op ( v1 v2 quot -- n )
[ [ [ underlying1>> ] bi@ A-rep ] dip call ]
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
+ ; inline
: (A-v->n-op) ( v1 quot reduce-quot -- n )
'[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline
: A-sum-op ( v1 quot -- n )
[ (simd-v+) ] (A-v->n-op) ; inline
: A-vany-op ( v1 quot -- n )
[ (simd-vbitor) ] (A-v->n-op) ; inline
: A-vall-op ( v1 quot -- n )
[ (simd-vbitand) ] (A-v->n-op) ; inline
: A-vmerge-head-op ( v1 v2 quot -- v )
drop
[ underlying1>> ] bi@
[ A-rep (simd-(vmerge-head)) ]
[ A-rep (simd-(vmerge-tail)) ] 2bi
\ A boa ; inline
: A-vmerge-tail-op ( v1 v2 quot -- v )
drop
[ underlying2>> ] bi@
[ A-rep (simd-(vmerge-head)) ]
[ A-rep (simd-(vmerge-tail)) ] 2bi
\ A boa ; inline
: A-v-conversion-op ( v1 to-type quot -- v )
swap [
[ [ underlying1>> A-rep ] dip call ]
[ [ underlying2>> A-rep ] dip call ] 2bi
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
: A-vpack-op ( v1 v2 to-type quot -- v )
swap [
'[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi*
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
: A-vunpack-head-op ( v1 to-type quot -- v )
'[
underlying1>>
[ A-rep @ ]
[ A-rep (simd-(vunpack-tail)) ] bi
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
: A-vunpack-tail-op ( v1 to-type quot -- v )
'[
underlying2>>
[ A-rep (simd-(vunpack-head)) ]
[ A-rep @ ] bi
] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
simd new
\ A >>class
\ A-with >>ctor
\ A-rep >>rep
{
{ v. A-v.-op }
{ sum A-sum-op }
{ vnone? A-vany-op }
{ vany? A-vany-op }
{ vall? A-vall-op }
{ (vmerge-head) A-vmerge-head-op }
{ (vmerge-tail) A-vmerge-tail-op }
{ (v>integer) A-v-conversion-op }
{ (v>float) A-v-conversion-op }
{ (vpack-signed) A-vpack-op }
{ (vpack-unsigned) A-vpack-op }
{ (vunpack-head) A-vunpack-head-op }
{ (vunpack-tail) A-vunpack-tail-op }
} >>special-wrappers
{
{ { +vector+ +vector+ -> +vector+ } A-vv->v-op }
{ { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
{ { +vector+ +literal+ -> +vector+ } A-vn->v-op }
{ { +vector+ -> +vector+ } A-v->v-op }
} >>schema-wrappers
(define-simd-256)
;FUNCTOR

View File

@ -1 +0,0 @@
Slava Pestov

View File

@ -1,18 +0,0 @@
IN: math.vectors.simd.intrinsics.tests
USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
[ 16 ] [ uchar-16-rep rep-components ] unit-test
[ 16 ] [ char-16-rep rep-components ] unit-test
[ 8 ] [ ushort-8-rep rep-components ] unit-test
[ 8 ] [ short-8-rep rep-components ] unit-test
[ 4 ] [ uint-4-rep rep-components ] unit-test
[ 4 ] [ int-4-rep rep-components ] unit-test
[ 4 ] [ float-4-rep rep-components ] unit-test
[ 2 ] [ double-2-rep rep-components ] unit-test
{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as

View File

@ -1,207 +1,236 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien alien.c-types alien.data assocs combinators
cpu.architecture compiler.cfg.comparisons fry generalizations
kernel libc macros math
math.vectors.conversion.backend
sequences sets effects accessors namespaces
lexer parser vocabs.parser words arrays math.vectors ;
! (c)2009 Slava Pestov, Joe Groff bsd license
USING: accessors alien alien.c-types alien.data combinators
sequences.cords cpu.architecture fry generalizations kernel
libc locals math math.libm math.order math.ranges math.vectors
sequences sequences.private specialized-arrays vocabs.loader ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAYS:
c:char c:short c:int c:longlong
c:uchar c:ushort c:uint c:ulonglong
c:float c:double ;
IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call word ;
<<
: simd-effect ( word -- effect )
stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
: simd-conversion-effect ( word -- effect )
stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi <effect> ;
SYMBOL: simd-ops
V{ } clone simd-ops set-global
: (SIMD-OP:) ( accum quot -- accum )
[
scan-word dup name>> "(simd-" ")" surround create-in
[ nip dup '[ _ bad-simd-call ] define ]
] dip
'[ _ dip set-stack-effect ]
[ 2array simd-ops get push ]
2tri ; inline
SYNTAX: SIMD-OP:
[ simd-effect ] (SIMD-OP:) ;
SYNTAX: SIMD-CONVERSION-OP:
[ simd-conversion-effect ] (SIMD-OP:) ;
>>
SIMD-OP: v+
SIMD-OP: v-
SIMD-OP: vneg
SIMD-OP: v+-
SIMD-OP: vs+
SIMD-OP: vs-
SIMD-OP: vs*
SIMD-OP: v*
SIMD-OP: v/
SIMD-OP: vmin
SIMD-OP: vmax
SIMD-OP: v.
SIMD-OP: vsqrt
SIMD-OP: sum
SIMD-OP: vabs
SIMD-OP: vbitand
SIMD-OP: vbitandn
SIMD-OP: vbitor
SIMD-OP: vbitxor
SIMD-OP: vbitnot
SIMD-OP: vand
SIMD-OP: vandn
SIMD-OP: vor
SIMD-OP: vxor
SIMD-OP: vnot
SIMD-OP: vlshift
SIMD-OP: vrshift
SIMD-OP: hlshift
SIMD-OP: hrshift
SIMD-OP: vshuffle-elements
SIMD-OP: vshuffle-bytes
SIMD-OP: (vmerge-head)
SIMD-OP: (vmerge-tail)
SIMD-OP: v<=
SIMD-OP: v<
SIMD-OP: v=
SIMD-OP: v>
SIMD-OP: v>=
SIMD-OP: vunordered?
SIMD-OP: vany?
SIMD-OP: vall?
SIMD-OP: vnone?
SIMD-CONVERSION-OP: (v>float)
SIMD-CONVERSION-OP: (v>integer)
SIMD-CONVERSION-OP: (vpack-signed)
SIMD-CONVERSION-OP: (vpack-unsigned)
SIMD-CONVERSION-OP: (vunpack-head)
SIMD-CONVERSION-OP: (vunpack-tail)
: (simd-with) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
: (simd-select) ( v n rep -- x ) bad-simd-call ;
: assert-positive ( x -- y ) ;
: alien-vector ( c-ptr n rep -- value )
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
<PRIVATE
: set-alien-vector ( value c-ptr n rep -- )
! Inefficient version for when intrinsics are missing
: >bitwise-vector-rep ( rep -- rep' )
{
{ float-4-rep [ uint-4-rep ] }
{ double-2-rep [ ulonglong-2-rep ] }
[ ]
} case ; foldable
: >uint-vector-rep ( rep -- rep' )
{
{ longlong-2-rep [ ulonglong-2-rep ] }
{ int-4-rep [ uint-4-rep ] }
{ short-8-rep [ ushort-8-rep ] }
{ char-16-rep [ uchar-16-rep ] }
[ ]
} case ; foldable
: >int-vector-rep ( rep -- rep' )
{
{ float-4-rep [ int-4-rep ] }
{ double-2-rep [ longlong-2-rep ] }
} case ; foldable
: >float-vector-rep ( rep -- rep' )
{
{ int-4-rep [ float-4-rep ] }
{ longlong-2-rep [ double-2-rep ] }
} case ; foldable
: [byte>rep-array] ( rep -- class )
{
{ char-16-rep [ [ byte-array>char-array ] ] }
{ uchar-16-rep [ [ byte-array>uchar-array ] ] }
{ short-8-rep [ [ byte-array>short-array ] ] }
{ ushort-8-rep [ [ byte-array>ushort-array ] ] }
{ int-4-rep [ [ byte-array>int-array ] ] }
{ uint-4-rep [ [ byte-array>uint-array ] ] }
{ longlong-2-rep [ [ byte-array>longlong-array ] ] }
{ ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] }
{ float-4-rep [ [ byte-array>float-array ] ] }
{ double-2-rep [ [ byte-array>double-array ] ] }
} case ; foldable
: [>rep-array] ( rep -- class )
{
{ char-16-rep [ [ >char-array ] ] }
{ uchar-16-rep [ [ >uchar-array ] ] }
{ short-8-rep [ [ >short-array ] ] }
{ ushort-8-rep [ [ >ushort-array ] ] }
{ int-4-rep [ [ >int-array ] ] }
{ uint-4-rep [ [ >uint-array ] ] }
{ longlong-2-rep [ [ >longlong-array ] ] }
{ ulonglong-2-rep [ [ >ulonglong-array ] ] }
{ float-4-rep [ [ >float-array ] ] }
{ double-2-rep [ [ >double-array ] ] }
} case ; foldable
: [<rep-array>] ( rep -- class )
{
{ char-16-rep [ [ 16 (char-array) ] ] }
{ uchar-16-rep [ [ 16 (uchar-array) ] ] }
{ short-8-rep [ [ 8 (short-array) ] ] }
{ ushort-8-rep [ [ 8 (ushort-array) ] ] }
{ int-4-rep [ [ 4 (int-array) ] ] }
{ uint-4-rep [ [ 4 (uint-array) ] ] }
{ longlong-2-rep [ [ 2 (longlong-array) ] ] }
{ ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] }
{ float-4-rep [ [ 4 (float-array) ] ] }
{ double-2-rep [ [ 2 (double-array) ] ] }
} case ; foldable
: rep-tf-values ( rep -- t f )
float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
: >rep-array ( a rep -- a' )
[byte>rep-array] call( a -- a' ) ; inline
: 2>rep-array ( a b rep -- a' b' )
[byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
: <rep-array> ( rep -- a' )
[<rep-array>] call( -- a' ) ; inline
: components-map ( a rep quot -- c )
[ >rep-array ] dip map underlying>> ; inline
: components-2map ( a b rep quot -- c )
[ 2>rep-array ] dip 2map underlying>> ; inline
: components-reduce ( a rep quot -- x )
[ >rep-array [ ] ] dip map-reduce ; inline
: bitwise-components-map ( a rep quot -- c )
[ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
: bitwise-components-2map ( a b rep quot -- c )
[ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
: bitwise-components-reduce ( a rep quot -- x )
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
:: (vshuffle) ( a elts rep -- c )
a rep >rep-array :> a'
rep <rep-array> :> c'
elts [| from to |
from rep rep-length 1 - bitand
a' nth-unsafe
to c' set-nth-unsafe
] each-index
c' underlying>> ; inline
PRIVATE>
: (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
: (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
: (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
:: (simd-v+-) ( a b rep -- c )
a b rep 2>rep-array :> ( a' b' )
rep <rep-array> :> c'
0 rep rep-length 1 - 2 <range> [| n |
n a' nth-unsafe n b' nth-unsafe -
n c' set-nth-unsafe
n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
n 1 + c' set-nth-unsafe
] each
c' underlying>> ;
: (simd-vs+) ( a b rep -- c )
dup rep-component-type '[ + _ c-type-clamp ] components-2map ;
: (simd-vs-) ( a b rep -- c )
dup rep-component-type '[ - _ c-type-clamp ] components-2map ;
: (simd-vs*) ( a b rep -- c )
dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
: (simd-v/) ( a b rep -- c ) [ / ] components-2map ;
: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
: (simd-v.) ( a b rep -- n )
[ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
: (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
: (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
: (simd-hlshift) ( a n rep -- c )
drop head-slice* 16 0 pad-head ;
: (simd-hrshift) ( a n rep -- c )
drop tail-slice 16 0 pad-tail ;
: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
:: (simd-vmerge-head) ( a b rep -- c )
a b rep 2>rep-array :> ( a' b' )
rep <rep-array> :> c'
rep rep-length 2 /i iota [| n |
n a' nth-unsafe n 2 * c' set-nth-unsafe
n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
] each
c' underlying>> ;
:: (simd-vmerge-tail) ( a b rep -- c )
a b rep 2>rep-array :> ( a' b' )
rep <rep-array> :> c'
rep rep-length 2 /i :> len
len iota [| n |
n len + a' nth-unsafe n 2 * c' set-nth-unsafe
n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
] each
c' underlying>> ;
: (simd-v<=) ( a b rep -- c )
dup rep-tf-values '[ <= _ _ ? ] components-2map ;
: (simd-v<) ( a b rep -- c )
dup rep-tf-values '[ < _ _ ? ] components-2map ;
: (simd-v=) ( a b rep -- c )
dup rep-tf-values '[ = _ _ ? ] components-2map ;
: (simd-v>) ( a b rep -- c )
dup rep-tf-values '[ > _ _ ? ] components-2map ;
: (simd-v>=) ( a b rep -- c )
dup rep-tf-values '[ >= _ _ ? ] components-2map ;
: (simd-vunordered?) ( a b rep -- c )
dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
: (simd-v>float) ( a rep -- c )
[ >rep-array [ >float ] ] [ >float-vector-rep <rep-array> ] bi map-as underlying>> ;
: (simd-v>integer) ( a rep -- c )
[ >rep-array [ >integer ] ] [ >int-vector-rep <rep-array> ] bi map-as underlying>> ;
: (simd-vpack-signed) ( a b rep -- c )
[ 2>rep-array cord-append ]
[ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
'[ _ c-type-clamp ] swap map-as underlying>> ;
: (simd-vpack-unsigned) ( a b rep -- c )
[ 2>rep-array cord-append ]
[ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
'[ _ c-type-clamp ] swap map-as underlying>> ;
: (simd-vunpack-head) ( a rep -- c )
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
[ head-slice ] dip call( a' -- c' ) underlying>> ;
: (simd-vunpack-tail) ( a rep -- c )
[ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
[ tail-slice ] dip call( a' -- c' ) underlying>> ;
: (simd-with) ( n rep -- v )
[ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as
underlying>> ;
: (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
: (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
: alien-vector ( c-ptr n rep -- value )
[ swap <displaced-alien> ] dip rep-size memory>byte-array ;
: set-alien-vector ( value c-ptr n rep -- )
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
<<
"compiler.cfg.intrinsics.simd" require
"compiler.tree.propagation.simd" require
"compiler.cfg.value-numbering.simd" require
: rep-components ( rep -- n )
16 swap rep-component-type heap-size /i ; foldable
: rep-coercer ( rep -- quot )
{
{ [ dup int-vector-rep? ] [ [ >fixnum ] ] }
{ [ dup float-vector-rep? ] [ [ >float ] ] }
} cond nip ; foldable
: rep-coerce ( value rep -- value' )
rep-coercer call( value -- value' ) ; inline
CONSTANT: rep-gather-words
{
{ 2 (simd-gather-2) }
{ 4 (simd-gather-4) }
}
: rep-gather-word ( rep -- word )
rep-components rep-gather-words at ;
>>
MACRO: (simd-boa) ( rep -- quot )
{
[ rep-coercer ]
[ rep-components ]
[ ]
[ rep-gather-word ]
} cleave
'[ _ _ napply _ _ execute ] ;
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
: (%unpack-reps) ( -- reps )
%merge-vector-reps [ int-vector-rep? ] filter
%unpack-vector-head-reps union ;
: (%abs-reps) ( -- reps )
cc> %compare-vector-reps [ int-vector-rep? ] filter
%xor-vector-reps [ float-vector-rep? ] filter
union
[ { } ] [ { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ] if-empty ;
: (%shuffle-imm-reps) ( -- reps )
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
{ \ (simd-vneg) [ %sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] }
{ \ (simd-vmin) [ %min-vector-reps cc< %compare-vector-reps union ] }
{ \ (simd-vmax) [ %max-vector-reps cc> %compare-vector-reps union ] }
{ \ (simd-v.) [ %dot-vector-reps ] }
{ \ (simd-vsqrt) [ %sqrt-vector-reps ] }
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
{ \ (simd-vabs) [ (%abs-reps) ] }
{ \ (simd-vbitand) [ %and-vector-reps ] }
{ \ (simd-vbitandn) [ %andn-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
{ \ (simd-vbitnot) [ %xor-vector-reps ] }
{ \ (simd-vand) [ %and-vector-reps ] }
{ \ (simd-vandn) [ %andn-vector-reps ] }
{ \ (simd-vor) [ %or-vector-reps ] }
{ \ (simd-vxor) [ %xor-vector-reps ] }
{ \ (simd-vnot) [ %xor-vector-reps ] }
{ \ (simd-vlshift) [ %shl-vector-reps ] }
{ \ (simd-vrshift) [ %shr-vector-reps ] }
{ \ (simd-hlshift) [ %horizontal-shl-vector-imm-reps ] }
{ \ (simd-hrshift) [ %horizontal-shr-vector-imm-reps ] }
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
{ \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] }
{ \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] }
{ \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] }
{ \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] }
{ \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] }
{ \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
{ \ (simd-vany?) [ %test-vector-reps ] }
{ \ (simd-vall?) [ %test-vector-reps ] }
{ \ (simd-vnone?) [ %test-vector-reps ] }
} case member? ;

View File

@ -0,0 +1,3 @@
USING: math.vectors.simd mirrors ;
IN: math.vectors.simd.mirrors
INSTANCE: simd-128 enumerated-sequence

View File

@ -1,6 +1,6 @@
USING: classes.tuple.private cpu.architecture help.markup
help.syntax kernel.private math math.vectors
math.vectors.simd.intrinsics sequences ;
help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
sequences ;
IN: math.vectors.simd
ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
@ -19,11 +19,11 @@ $nl
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
$nl
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } ")."
$nl
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } ") and integer SIMD (all types). Integer SIMD is missing a few features; in particular, the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
$nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
$nl
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
$nl
@ -36,47 +36,18 @@ $nl
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
{ $subsections
POSTPONE: SIMD:
POSTPONE: SIMDS:
}
"The following scalar types are supported:"
{ $code
"char"
"uchar"
"short"
"ushort"
"int"
"uint"
"longlong"
"ulonglong"
"float"
"double"
}
"The following vector types are generated from the above scalar types:"
"The following vector types are available:"
{ $code
"char-16"
"uchar-16"
"char-32"
"uchar-32"
"short-8"
"ushort-8"
"short-16"
"ushort-16"
"int-4"
"uint-4"
"int-8"
"uint-8"
"longlong-2"
"ulonglong-2"
"longlong-4"
"ulonglong-4"
"float-4"
"float-8"
"double-2"
"double-4"
} ;
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
@ -103,19 +74,17 @@ $nl
{ $code
"""USING: compiler.tree.debugger math.vectors
math.vectors.simd ;
SIMD: double
SYMBOLS: x y ;
[
double-4{ 1.5 2.0 3.7 0.4 } x set
double-4{ 1.5 2.0 3.7 0.4 } y set
float-4{ 1.5 2.0 3.7 0.4 } x set
float-4{ 1.5 2.0 3.7 0.4 } y set
x get y get v+
] optimizer-report.""" }
"The following word benefits from SIMD optimization, because it begins with an unsafe declaration:"
{ $code
"""USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ;
SIMD: float
IN: simd-demo
: interpolate ( v a b -- w )
@ -129,7 +98,6 @@ $nl
{ $code
"""USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ;
SIMD: float
IN: simd-demo
: interpolate ( v a b -- w )
@ -145,7 +113,6 @@ $nl
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code
"""USING: compiler.tree.debugger math.vectors math.vectors.simd ;
SIMD: float
IN: simd-demo
STRUCT: actor
@ -182,7 +149,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
{ $list
"They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
"They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
{ "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." }
}
"The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
$nl
@ -203,7 +169,7 @@ $nl
ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
$nl
"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
"In particular, horizontal operations on " { $snippet "float-4" } " vectors are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
@ -218,16 +184,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"math.vectors.simd.intrinsics"
} ;
HELP: SIMD:
{ $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
HELP: SIMDS:
{ $syntax "SIMDS: type type type ... ;" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words
ABOUT: "math.vectors.simd"

View File

@ -3,22 +3,14 @@ effects fry io kernel kernel.private math math.functions
math.private math.vectors math.vectors.simd
math.vectors.simd.private prettyprint random sequences system
tools.test vocabs assocs compiler.cfg.debugger words
locals math.vectors.specialization combinators cpu.architecture
math.vectors.conversion.backend
math.vectors.simd.intrinsics namespaces byte-arrays alien
locals combinators cpu.architecture namespaces byte-arrays alien
specialized-arrays classes.struct eval classes.algebra sets
quotations math.constants compiler.units ;
quotations math.constants compiler.units splitting ;
FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAY: c:float
SIMD: c:char
SIMDS: c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double ;
IN: math.vectors.simd.tests
! Make sure the functor doesn't generate bogus vocabularies
2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times
[ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test
! Test type propagation
[ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
@ -38,10 +30,6 @@ IN: math.vectors.simd.tests
[ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test
[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test
[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test
! Test puns; only on x86
cpu x86? [
[ double-2{ 4 1024 } ] [
@ -55,26 +43,76 @@ CONSTANT: simd-classes
{
char-16
uchar-16
char-32
uchar-32
short-8
ushort-8
short-16
ushort-16
int-4
uint-4
int-8
uint-8
longlong-2
ulonglong-2
longlong-4
ulonglong-4
float-4
float-8
double-2
double-4
}
SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
CONSTANT: vector-words
H{
{ [v-] { +vector+ +vector+ -> +vector+ } }
{ distance { +vector+ +vector+ -> +nonnegative+ } }
{ n*v { +scalar+ +vector+ -> +vector+ } }
{ n+v { +scalar+ +vector+ -> +vector+ } }
{ n-v { +scalar+ +vector+ -> +vector+ } }
{ n/v { +scalar+ +vector+ -> +vector+ } }
{ norm { +vector+ -> +nonnegative+ } }
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
{ v/n { +vector+ +scalar+ -> +vector+ } }
{ vceiling { +vector+ -> +vector+ } }
{ vfloor { +vector+ -> +vector+ } }
{ vmax { +vector+ +vector+ -> +vector+ } }
{ vmin { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitandn { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
{ vbitnot { +vector+ -> +vector+ } }
{ vand { +vector+ +vector+ -> +vector+ } }
{ vandn { +vector+ +vector+ -> +vector+ } }
{ vor { +vector+ +vector+ -> +vector+ } }
{ vxor { +vector+ +vector+ -> +vector+ } }
{ vnot { +vector+ -> +vector+ } }
{ vlshift { +vector+ +scalar+ -> +vector+ } }
{ vrshift { +vector+ +scalar+ -> +vector+ } }
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
{ v<= { +vector+ +vector+ -> +vector+ } }
{ v< { +vector+ +vector+ -> +vector+ } }
{ v= { +vector+ +vector+ -> +vector+ } }
{ v> { +vector+ +vector+ -> +vector+ } }
{ v>= { +vector+ +vector+ -> +vector+ } }
{ vunordered? { +vector+ +vector+ -> +vector+ } }
}
: vector-word-inputs ( schema -- seq ) { -> } split first ;
: with-ctors ( -- seq )
simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
@ -82,7 +120,7 @@ CONSTANT: simd-classes
simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
: check-optimizer ( seq quot eq-quot -- failures )
'[
dup '[
@
[ dup [ class ] { } map-as ] dip '[ _ declare @ ]
{
@ -90,8 +128,9 @@ CONSTANT: simd-classes
[ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
[ [ [ call ] dip call ] call( quot quot -- result ) ]
[ [ [ call ] dip compile-call ] call( quot quot -- result ) ]
[ [ t "always-inline-simd-intrinsics" [ [ call ] dip compile-call ] with-variable ] call( quot quot -- result ) ]
} 2cleave
@ not
[ drop @ ] [ nip @ ] 3bi and not
] filter ; inline
"== Checking -new constructors" print
@ -166,26 +205,15 @@ CONSTANT: simd-classes
: remove-boolean-words ( alist -- alist' )
boolean-ops unique assoc-diff ;
: remove-special-words ( alist -- alist' )
! These have their own tests later
{
hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
vany? vall? vnone?
(v>float) (v>integer)
(vpack-signed) (vpack-unsigned)
(vunpack-head) (vunpack-tail)
} unique assoc-diff ;
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip
float = [ remove-integer-words ] [ remove-float-words ] if
remove-boolean-words
remove-special-words ;
remove-boolean-words ;
: check-vector-ops ( class elt-class compare-quot -- )
[
[ nip ops-to-check ] 2keep
'[ first2 inputs _ _ check-vector-op ]
'[ first2 vector-word-inputs _ _ check-vector-op ]
] dip check-optimizer ; inline
: (approx=) ( x y -- ? )
@ -235,8 +263,8 @@ simd-classes&reps [
: check-boolean-ops ( class elt-class compare-quot -- seq )
[
[ boolean-ops [ dup word-schema ] { } map>assoc ] 2dip
'[ first2 inputs _ _ check-boolean-op ]
[ boolean-ops [ dup vector-words at ] { } map>assoc ] 2dip
'[ first2 vector-word-inputs _ _ check-boolean-op ]
] dip check-optimizer ; inline
simd-classes&reps [
@ -427,27 +455,6 @@ TUPLE: inconsistent-vector-test bool branch ;
[ t f f ]
[ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test
[ f t t ]
[ float-8{ t t t t t t t t } { float-8 } test-vector-tests ] unit-test
[ f t f ]
[ float-8{ f t t t t f t t } { float-8 } test-vector-tests ] unit-test
[ t f f ]
[ float-8{ f f f f f f f f } { float-8 } test-vector-tests ] unit-test
[ f t t ]
[ double-4{ t t t t } { double-4 } test-vector-tests ] unit-test
[ f t f ]
[ double-4{ f t t f } { double-4 } test-vector-tests ] unit-test
[ t f f ]
[ double-4{ f f f f } { double-4 } test-vector-tests ] unit-test
[ f t t ]
[ int-8{ t t t t t t t t } { int-8 } test-vector-tests ] unit-test
[ f t f ]
[ int-8{ f t t t t f f f } { int-8 } test-vector-tests ] unit-test
[ t f f ]
[ int-8{ f f f f f f f f } { int-8 } test-vector-tests ] unit-test
"== Checking element access" print
! Test element access -- it should box bignums for int-4 on x86
@ -467,14 +474,6 @@ TUPLE: inconsistent-vector-test bool branch ;
[ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test
[ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test
[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test
[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test
[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test
"== Checking broadcast" print
: test-broadcast ( seq -- failures )
[ length >array ] keep
@ -488,14 +487,6 @@ TUPLE: inconsistent-vector-test bool branch ;
[ { } ] [ longlong-2{ 1 2 } test-broadcast ] unit-test
[ { } ] [ ulonglong-2{ 1 2 } test-broadcast ] unit-test
[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-broadcast ] unit-test
[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test
[ { } ] [ longlong-4{ 1 2 3 4 } test-broadcast ] unit-test
[ { } ] [ ulonglong-4{ 1 2 3 4 } test-broadcast ] unit-test
! Make sure we use the fallback in the correct situations
[ int-4{ 3 3 3 3 } ] [ int-4{ 12 34 3 17 } 2 [ { int-4 fixnum } declare vbroadcast ] compile-call ] unit-test
@ -529,37 +520,37 @@ TUPLE: inconsistent-vector-test bool branch ;
STRUCT: simd-struct
{ x float-4 }
{ y longlong-2 }
{ z double-4 }
{ w int-8 } ;
{ z double-2 }
{ w int-4 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
longlong-2{ 2 1 }
double-4{ 4 3 2 1 }
int-8{ 1 2 3 4 5 6 7 8 }
double-2{ 4 3 }
int-4{ 1 2 3 4 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
int-8{ 1 2 3 4 5 6 7 8 } >>w
double-2{ 4 3 } >>z
int-4{ 1 2 3 4 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
longlong-2{ 2 1 }
double-4{ 4 3 2 1 }
int-8{ 1 2 3 4 5 6 7 8 }
double-2{ 4 3 }
int-4{ 1 2 3 4 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
int-8{ 1 2 3 4 5 6 7 8 } >>w
double-2{ 4 3 } >>z
int-4{ 1 2 3 4 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test
@ -569,9 +560,9 @@ STRUCT: simd-struct
[ ] [ char-16 new 1array stack. ] unit-test
! CSSA bug
[ 8000000 ] [
int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
[ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
[ 4000000 ] [
int-4{ 1000 1000 1000 1000 }
[ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
] unit-test
! Coalescing was too aggressive

View File

@ -1,42 +1,274 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien.c-types combinators fry kernel parser math math.parser
math.vectors.simd.functor sequences splitting vocabs.generated
vocabs.loader vocabs.parser words accessors vocabs compiler.units
definitions ;
USING: accessors alien.c-types arrays byte-arrays classes combinators
cpu.architecture effects fry functors generalizations generic
generic.parser kernel lexer literals macros math math.functions
math.vectors math.vectors.private math.vectors.simd.intrinsics namespaces parser
prettyprint.custom quotations sequences sequences.private vocabs
vocabs.loader words ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd
ERROR: bad-base-type type ;
ERROR: bad-simd-length got expected ;
<<
<PRIVATE
! Primitive SIMD constructors
GENERIC: new-underlying ( underlying seq -- seq' )
: make-underlying ( seq quot -- seq' )
dip new-underlying ; inline
: change-underlying ( seq quot -- seq' )
'[ underlying>> @ ] keep new-underlying ; inline
PRIVATE>
>>
<PRIVATE
: simd-vocab ( base-type -- vocab )
name>> "math.vectors.simd.instances." prepend ;
! Helper for boolean vector literals
: parse-base-type ( c-type -- c-type )
dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } member-eq?
[ bad-base-type ] unless ;
: vector-true-value ( class -- value )
{ c:float c:double } member? [ -1 bits>double ] [ -1 ] if ; foldable
: forget-instances ( -- )
[
"math.vectors.simd.instances" child-vocabs
[ forget-vocab ] each
] with-compilation-unit ;
: vector-false-value ( type -- value )
{ c:float c:double } member? [ 0.0 ] [ 0 ] if ; foldable
: boolean>element ( bool/elt type -- elt )
swap {
{ t [ vector-true-value ] }
{ f [ vector-false-value ] }
[ nip ]
} case ; inline
PRIVATE>
: define-simd-vocab ( type -- vocab )
parse-base-type
[ simd-vocab ] keep '[
_
[ define-simd-128 ]
[ define-simd-256 ] bi
] generate-vocab ;
! SIMD base type
SYNTAX: SIMD:
scan-word define-simd-vocab use-vocab ;
TUPLE: simd-128
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
SYNTAX: SIMDS:
\ ; parse-until [ define-simd-vocab use-vocab ] each ;
GENERIC: simd-element-type ( obj -- c-type )
GENERIC: simd-rep ( simd -- rep )
M: object simd-element-type drop f ;
M: object simd-rep drop f ;
<<
<PRIVATE
DEFER: simd-construct-op
! Unboxers for SIMD operations
: if-both-vectors ( a b rep t f -- )
[ 2over [ simd-128? ] both? ] 2dip if ; inline
: if-both-vectors-match ( a b rep t f -- )
[ 3dup [ drop [ simd-128? ] both? ] [ '[ simd-rep _ eq? ] both? ] 3bi and ]
2dip if ; inline
: simd-unbox ( a -- a (a) )
[ ] [ underlying>> ] bi ; inline
: v->v-op ( a rep quot: ( (a) rep -- (c) ) fallback-quot -- c )
drop [ simd-unbox ] 2dip 2curry make-underlying ; inline
: vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c )
drop [ simd-unbox ] 3dip 3curry make-underlying ; inline
: vn->n-op ( a n rep quot: ( (a) n rep -- n ) fallback-quot -- n )
drop [ underlying>> ] 3dip call ; inline
: v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n )
drop [ underlying>> ] 2dip call ; inline
: (vv->v-op) ( a b rep quot: ( (a) (b) rep -- (c) ) -- c )
[ [ simd-unbox ] [ underlying>> ] bi* ] 2dip 3curry make-underlying ; inline
: (vv->n-op) ( a b rep quot: ( (a) (b) rep -- n ) -- n )
[ [ underlying>> ] bi@ ] 2dip 3curry call ; inline
: vv->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
: vv'->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
[ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors ; inline
: vv->n-op ( a b rep quot: ( (a) (b) rep -- n ) fallback-quot -- n )
[ '[ _ (vv->n-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
PRIVATE>
>>
<<
<PRIVATE
! SIMD concrete type functor
FUNCTOR: define-simd-128 ( T -- )
A DEFINES-CLASS ${T}
A-rep IS ${T}-rep
>A DEFINES >${T}
A-boa DEFINES ${T}-boa
A-with DEFINES ${T}-with
A-cast DEFINES ${T}-cast
A{ DEFINES ${T}{
ELT [ A-rep rep-component-type ]
N [ A-rep rep-length ]
COERCER [ ELT c-type-class "coercer" word-prop [ ] or ]
SET-NTH [ ELT dup c:c-setter c:array-accessor ]
BOA-EFFECT [ N "n" <repetition> >array { "v" } <effect> ]
WHERE
TUPLE: A < simd-128 ;
M: A new-underlying drop \ A boa ; inline
M: A simd-rep drop A-rep ; inline
M: A simd-element-type drop ELT ; inline
M: A set-nth-unsafe
[ ELT boolean>element ] 2dip
underlying>> SET-NTH call ; inline
: >A ( seq -- simd ) \ A new clone-like ; inline
M: A like drop dup \ A instance? [ >A ] unless ; inline
: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
: A-cast ( v -- v' ) underlying>> \ A boa ; inline
! SIMD vectors as sequences
M: A hashcode* underlying>> hashcode* ; inline
M: A clone [ clone ] change-underlying ; inline
M: A length drop N ; inline
M: A nth-unsafe
swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
M: A c:byte-length drop 16 ; inline
M: A new-sequence
2dup length =
[ nip [ 16 (byte-array) ] make-underlying ]
[ length bad-simd-length ] if ; inline
M: A equal?
\ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
! SIMD primitive operations
M: A v+ \ A-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline
M: A v- \ A-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline
M: A vneg \ A-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline
M: A v+- \ A-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline
M: A vs+ \ A-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline
M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline
M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
M: A vmax \ A-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline
M: A v. \ A-rep [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline
M: A vsqrt \ A-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline
M: A sum \ A-rep [ (simd-sum) ] [ call-next-method ] v->n-op ; inline
M: A vabs \ A-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline
M: A vbitand \ A-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline
M: A vbitandn \ A-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline
M: A vbitor \ A-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline
M: A vbitxor \ A-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline
M: A vbitnot \ A-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline
M: A vand \ A-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline
M: A vandn \ A-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline
M: A vor \ A-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline
M: A vxor \ A-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline
M: A vnot \ A-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline
M: A vlshift \ A-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline
M: A vrshift \ A-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline
M: A hlshift \ A-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline
M: A hrshift \ A-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline
M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline
M: A (vmerge-head) \ A-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline
M: A (vmerge-tail) \ A-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline
M: A v<= \ A-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline
M: A v< \ A-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline
M: A v= \ A-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline
M: A v> \ A-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline
M: A v>= \ A-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline
M: A vunordered? \ A-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline
M: A vany? \ A-rep [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline
M: A vall? \ A-rep [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline
M: A vnone? \ A-rep [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline
! SIMD high-level specializations
M: A vbroadcast swap nth A-with ; inline
M: A n+v [ A-with ] dip v+ ; inline
M: A n-v [ A-with ] dip v- ; inline
M: A n*v [ A-with ] dip v* ; inline
M: A n/v [ A-with ] dip v/ ; inline
M: A v+n A-with v+ ; inline
M: A v-n A-with v- ; inline
M: A v*n A-with v* ; inline
M: A v/n A-with v/ ; inline
M: A norm-sq dup v. assert-positive ; inline
M: A distance v- norm ; inline
M: A >pprint-sequence ;
M: A pprint* pprint-object ;
\ A-boa
[ COERCER N napply ] N {
{ 2 [ [ A-rep (simd-gather-2) A boa ] ] }
{ 4 [ [ A-rep (simd-gather-4) A boa ] ] }
[ \ A new '[ _ _ nsequence ] ]
} case compose
BOA-EFFECT define-inline
M: A pprint-delims drop \ A{ \ } ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
c:<c-type>
byte-array >>class
A >>boxed-class
{ A-rep alien-vector A boa } >quotation >>getter
{ [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter
16 >>size
16 >>align
A-rep >>rep
\ A c:typedef
;FUNCTOR
SYNTAX: SIMD-128:
scan define-simd-128 ;
PRIVATE>
>>
INSTANCE: simd-128 sequence
! SIMD instances
SIMD-128: char-16
SIMD-128: uchar-16
SIMD-128: short-8
SIMD-128: ushort-8
SIMD-128: int-4
SIMD-128: uint-4
SIMD-128: longlong-2
SIMD-128: ulonglong-2
SIMD-128: float-4
SIMD-128: double-2
! misc
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
"mirrors" vocab [
"math.vectors.simd.mirrors" require
] when

View File

@ -1,28 +0,0 @@
IN: math.vectors.specialization.tests
USING: compiler.tree.debugger math.vectors tools.test kernel
kernel.private math specialized-arrays ;
QUALIFIED-WITH: alien.c-types c
QUALIFIED-WITH: alien.complex c
SPECIALIZED-ARRAY: c:double
SPECIALIZED-ARRAY: c:complex-float
SPECIALIZED-ARRAY: c:float
[ V{ t } ] [
[ { double-array double-array } declare distance 0.0 < not ] final-literals
] unit-test
[ V{ float } ] [
[ { float-array float } declare v*n norm ] final-classes
] unit-test
[ V{ complex } ] [
[ { complex-float-array complex-float-array } declare v. ] final-classes
] unit-test
[ V{ float } ] [
[ { float-array float } declare v*n norm ] final-classes
] unit-test
[ V{ float } ] [
[ { complex-float-array complex } declare v*n norm ] final-classes
] unit-test

View File

@ -1,207 +0,0 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: words kernel make sequences effects sets kernel.private
accessors combinators math math.intervals math.vectors
math.vectors.conversion.backend namespaces assocs fry splitting
classes.algebra generalizations locals
compiler.tree.propagation.info ;
IN: math.vectors.specialization
SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
: parent-vector-class ( type -- type' )
{
{ [ dup simd-128 class<= ] [ drop simd-128 ] }
{ [ dup simd-256 class<= ] [ drop simd-256 ] }
[ "Not a vector class" throw ]
} cond ;
: signature-for-schema ( array-type elt-type schema -- signature )
[
{
{ +vector+ [ drop ] }
{ +any-vector+ [ drop parent-vector-class ] }
{ +scalar+ [ nip ] }
{ +boolean+ [ 2drop boolean ] }
{ +nonnegative+ [ nip ] }
{ +literal+ [ 2drop f ] }
} case
] with with map ;
: (specialize-vector-word) ( word array-type elt-type schema -- word' )
signature-for-schema
[ [ name>> ] [ [ name>> ] map "," join ] bi* "=>" glue f <word> ]
[ [ , \ declare , def>> % ] [ ] make ]
[ drop stack-effect ]
2tri
[ define-declared ] [ 2drop ] 3bi ;
: output-infos ( array-type elt-type schema -- value-infos )
[
{
{ +vector+ [ drop <class-info> ] }
{ +any-vector+ [ drop parent-vector-class <class-info> ] }
{ +scalar+ [ nip <class-info> ] }
{ +boolean+ [ 2drop boolean <class-info> ] }
{
+nonnegative+
[
nip
dup complex class<= [ drop float ] when
[0,inf] <class/interval-info>
]
}
} case
] with with map ;
: record-output-signature ( word array-type elt-type schema -- word )
output-infos
[ drop ]
[ drop ]
[ [ stack-effect in>> length '[ _ ndrop ] ] dip append ] 2tri
"outputs" set-word-prop ;
CONSTANT: vector-words
H{
{ [v-] { +vector+ +vector+ -> +vector+ } }
{ distance { +vector+ +vector+ -> +nonnegative+ } }
{ n*v { +scalar+ +vector+ -> +vector+ } }
{ n+v { +scalar+ +vector+ -> +vector+ } }
{ n-v { +scalar+ +vector+ -> +vector+ } }
{ n/v { +scalar+ +vector+ -> +vector+ } }
{ norm { +vector+ -> +nonnegative+ } }
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
{ v/n { +vector+ +scalar+ -> +vector+ } }
{ vceiling { +vector+ -> +vector+ } }
{ vfloor { +vector+ -> +vector+ } }
{ vmax { +vector+ +vector+ -> +vector+ } }
{ vmin { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitandn { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
{ vbitnot { +vector+ -> +vector+ } }
{ vand { +vector+ +vector+ -> +vector+ } }
{ vandn { +vector+ +vector+ -> +vector+ } }
{ vor { +vector+ +vector+ -> +vector+ } }
{ vxor { +vector+ +vector+ -> +vector+ } }
{ vnot { +vector+ -> +vector+ } }
{ vlshift { +vector+ +scalar+ -> +vector+ } }
{ vrshift { +vector+ +scalar+ -> +vector+ } }
{ hlshift { +vector+ +literal+ -> +vector+ } }
{ hrshift { +vector+ +literal+ -> +vector+ } }
{ vshuffle-elements { +vector+ +literal+ -> +vector+ } }
{ vshuffle-bytes { +vector+ +any-vector+ -> +vector+ } }
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
{ (v>float) { +vector+ +literal+ -> +vector+ } }
{ (v>integer) { +vector+ +literal+ -> +vector+ } }
{ (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } }
{ (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } }
{ (vunpack-head) { +vector+ +literal+ -> +vector+ } }
{ (vunpack-tail) { +vector+ +literal+ -> +vector+ } }
{ v<= { +vector+ +vector+ -> +vector+ } }
{ v< { +vector+ +vector+ -> +vector+ } }
{ v= { +vector+ +vector+ -> +vector+ } }
{ v> { +vector+ +vector+ -> +vector+ } }
{ v>= { +vector+ +vector+ -> +vector+ } }
{ vunordered? { +vector+ +vector+ -> +vector+ } }
{ vany? { +vector+ -> +boolean+ } }
{ vall? { +vector+ -> +boolean+ } }
{ vnone? { +vector+ -> +boolean+ } }
}
PREDICATE: vector-word < word vector-words key? ;
: specializations ( word -- assoc )
dup "specializations" word-prop
[ ] [ V{ } clone [ "specializations" set-word-prop ] keep ] ?if ;
M: vector-word subwords specializations values [ word? ] filter ;
: add-specialization ( new-word signature word -- )
specializations set-at ;
ERROR: bad-vector-word word ;
: word-schema ( word -- schema )
vector-words ?at [ bad-vector-word ] unless ;
: inputs ( schema -- seq ) { -> } split first ;
: outputs ( schema -- seq ) { -> } split second ;
: loop-vector-op ( word array-type elt-type -- word' )
pick word-schema
[ inputs (specialize-vector-word) ]
[ outputs record-output-signature ] 3bi ;
:: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
:: input-signature ( word array-type elt-type -- signature )
array-type elt-type word word-schema inputs signature-for-schema ;
: vector-words-for-type ( elt-type -- words )
{
! Can't do shifts on floats
{ [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
! Can't divide integers
{ [ dup integer class<= ] [ vector-words keys { vsqrt n/v v/n v/ normalize } diff ] }
! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
[ { } ]
} cond
! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
{
hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
(v>integer) (v>float)
(vpack-signed) (vpack-unsigned)
(vunpack-head) (vunpack-tail)
} diff
nip ;
:: specialize-vector-words ( array-type elt-type simd -- )
elt-type vector-words-for-type simd keys union [
[ array-type elt-type simd specialize-vector-word ]
[ array-type elt-type input-signature ]
[ ]
tri add-specialization
] each ;
: specialization-matches? ( value-infos signature -- ? )
[ [ [ class>> ] dip class<= ] [ literal?>> ] if* ] 2all? ;
: find-specialization ( classes word -- word/f )
specializations
[ first specialization-matches? ] with find
swap [ second ] when ;
: vector-word-custom-inlining ( #call -- word/f )
[ in-d>> [ value-info ] map ] [ word>> ] bi
find-specialization ;
vector-words keys [
[ vector-word-custom-inlining ]
"custom-inlining" set-word-prop
] each

View File

@ -436,7 +436,6 @@ HELP: vshuffle
{ $example
"USING: alien.c-types combinators math.vectors math.vectors.simd"
"namespaces prettyprint prettyprint.config ;"
"SIMDS: int uchar ;"
"IN: scratchpad"
""
": endian-swap ( size -- vector )"

View File

@ -6,29 +6,47 @@ byte-arrays accessors locals ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors
MIXIN: simd-128
MIXIN: simd-256
GENERIC: vneg ( u -- v )
M: object vneg [ neg ] map ;
GENERIC: element-type ( obj -- c-type )
M: object element-type drop f ; inline
GENERIC# v+n 1 ( u n -- v )
M: object v+n [ + ] curry map ;
: vneg ( u -- v ) [ neg ] map ;
GENERIC: n+v ( n v -- w )
M: object n+v [ + ] with map ;
: v+n ( u n -- v ) [ + ] curry map ;
: n+v ( n u -- v ) [ + ] with map ;
: v-n ( u n -- v ) [ - ] curry map ;
: n-v ( n u -- v ) [ - ] with map ;
GENERIC# v-n 1 ( u n -- w )
M: object v-n [ - ] curry map ;
: v*n ( u n -- v ) [ * ] curry map ;
: n*v ( n u -- v ) [ * ] with map ;
: v/n ( u n -- v ) [ / ] curry map ;
: n/v ( n u -- v ) [ / ] with map ;
GENERIC: n-v ( n v -- w )
M: object n-v [ - ] with map ;
: v+ ( u v -- w ) [ + ] 2map ;
: v- ( u v -- w ) [ - ] 2map ;
: [v-] ( u v -- w ) [ [-] ] 2map ;
: v* ( u v -- w ) [ * ] 2map ;
: v/ ( u v -- w ) [ / ] 2map ;
GENERIC# v*n 1 ( u n -- v )
M: object v*n [ * ] curry map ;
GENERIC: n*v ( n v -- w )
M: object n*v [ * ] with map ;
GENERIC# v/n 1 ( u n -- v )
M: object v/n [ / ] curry map ;
GENERIC: n/v ( n v -- w )
M: object n/v [ / ] with map ;
GENERIC: v+ ( u v -- w )
M: object v+ [ + ] 2map ;
GENERIC: v- ( u v -- w )
M: object v- [ - ] 2map ;
GENERIC: [v-] ( u v -- w )
M: object [v-] [ [-] ] 2map ;
GENERIC: v* ( u v -- w )
M: object v* [ * ] 2map ;
GENERIC: v/ ( u v -- w )
M: object v/ [ / ] 2map ;
<PRIVATE
@ -37,113 +55,128 @@ M: object element-type drop f ; inline
PRIVATE>
: vmax ( u v -- w ) [ [ float-max ] [ max ] if-both-floats ] 2map ;
: vmin ( u v -- w ) [ [ float-min ] [ min ] if-both-floats ] 2map ;
GENERIC: vmax ( u v -- w )
M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ;
: v+- ( u v -- w )
GENERIC: vmin ( u v -- w )
M: object vmin [ [ float-min ] [ min ] if-both-floats ] 2map ;
GENERIC: v+- ( u v -- w )
M: object v+-
[ t ] 2dip
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ;
<PRIVATE
GENERIC: vs+ ( u v -- w )
M: object vs+ [ + ] 2map ;
: 2saturate-map ( u v quot -- w )
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
GENERIC: vs- ( u v -- w )
M: object vs- [ - ] 2map ;
PRIVATE>
GENERIC: vs* ( u v -- w )
M: object vs* [ * ] 2map ;
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
: vs- ( u v -- w ) [ - ] 2saturate-map ;
: vs* ( u v -- w ) [ * ] 2saturate-map ;
GENERIC: vabs ( u -- v )
M: object vabs [ abs ] map ;
: vabs ( u -- v ) [ abs ] map ;
: vsqrt ( u -- v ) [ >float fsqrt ] map ;
GENERIC: vsqrt ( u -- v )
M: object vsqrt [ >float fsqrt ] map ;
<PRIVATE
: fp-bitwise-op ( x y seq quot -- z )
swap element-type {
{ c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
{ c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] }
[ drop call ]
} case ; inline
: fp-bitwise-unary ( x seq quot -- z )
swap element-type {
{ c:double [ [ double>bits ] dip call bits>double ] }
{ c:float [ [ float>bits ] dip call bits>float ] }
[ drop call ]
} case ; inline
: element>bool ( x seq -- ? )
element-type [ [ f ] when-zero ] when ; inline
: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
GENERIC: new-underlying ( underlying seq -- seq' )
: change-underlying ( seq quot -- seq' )
'[ underlying>> @ ] keep new-underlying ; inline
PRIVATE>
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
GENERIC: vbitand ( u v -- w )
M: object vbitand [ bitand ] 2map ;
GENERIC: vbitandn ( u v -- w )
M: object vbitandn [ bitandn ] 2map ;
GENERIC: vbitor ( u v -- w )
M: object vbitor [ bitor ] 2map ;
GENERIC: vbitxor ( u v -- w )
M: object vbitxor [ bitxor ] 2map ;
GENERIC: vbitnot ( u -- w )
M: object vbitnot [ bitnot ] map ;
:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
GENERIC# vbroadcast 1 ( u n -- v )
M:: object vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
: vshuffle-elements ( u perm -- v )
GENERIC# vshuffle-elements 1 ( u perm -- v )
M: object vshuffle-elements
over length 0 pad-tail
swap [ '[ _ nth ] ] keep map-as ;
: vshuffle-bytes ( u perm -- v )
underlying>> [
swap [ '[ 15 bitand _ nth ] ] keep map-as
] curry change-underlying ;
GENERIC# vshuffle-bytes 1 ( u perm -- v )
GENERIC: vshuffle ( u perm -- v )
M: array vshuffle ( u perm -- v )
vshuffle-elements ; inline
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
: vlshift ( u n -- w ) '[ _ shift ] map ;
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
GENERIC# vlshift 1 ( u n -- w )
M: object vlshift '[ _ shift ] map ;
GENERIC# vrshift 1 ( u n -- w )
M: object vrshift neg '[ _ shift ] map ;
: hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
: hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;
GENERIC# hlshift 1 ( u n -- w )
GENERIC# hrshift 1 ( u n -- w )
: (vmerge-head) ( u v -- h )
over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
: (vmerge-tail) ( u v -- t )
over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
GENERIC: (vmerge-head) ( u v -- h )
M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
GENERIC: (vmerge-tail) ( u v -- t )
M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
: (vmerge) ( u v -- h t )
GENERIC: (vmerge) ( u v -- h t )
M: object (vmerge)
[ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline
: vmerge ( u v -- w ) [ zip ] keep concat-as ;
GENERIC: vmerge ( u v -- w )
M: object vmerge [ zip ] keep concat-as ;
: vand ( u v -- w ) over '[ [ _ element>bool ] bi@ and ] 2map ;
: vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ;
: vor ( u v -- w ) over '[ [ _ element>bool ] bi@ or ] 2map ;
: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ;
: vnot ( u -- w ) dup '[ _ element>bool not ] map ;
GENERIC: vand ( u v -- w )
M: object vand [ and ] 2map ;
: vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
: vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
GENERIC: vandn ( u v -- w )
M: object vandn [ [ not ] dip and ] 2map ;
: v< ( u v -- w ) [ < ] 2map ;
: v<= ( u v -- w ) [ <= ] 2map ;
: v>= ( u v -- w ) [ >= ] 2map ;
: v> ( u v -- w ) [ > ] 2map ;
: vunordered? ( u v -- w ) [ unordered? ] 2map ;
: v= ( u v -- w ) [ = ] 2map ;
GENERIC: vor ( u v -- w )
M: object vor [ or ] 2map ;
: v? ( mask true false -- result )
GENERIC: vxor ( u v -- w )
M: object vxor [ xor ] 2map ;
GENERIC: vnot ( u -- w )
M: object vnot [ not ] map ;
GENERIC: vall? ( v -- ? )
M: object vall? [ ] all? ;
GENERIC: vany? ( v -- ? )
M: object vany? [ ] any? ;
GENERIC: vnone? ( v -- ? )
M: object vnone? [ not ] all? ;
GENERIC: v< ( u v -- w )
M: object v< [ < ] 2map ;
GENERIC: v<= ( u v -- w )
M: object v<= [ <= ] 2map ;
GENERIC: v>= ( u v -- w )
M: object v>= [ >= ] 2map ;
GENERIC: v> ( u v -- w )
M: object v> [ > ] 2map ;
GENERIC: vunordered? ( u v -- w )
M: object vunordered? [ unordered? ] 2map ;
GENERIC: v= ( u v -- w )
M: object v= [ = ] 2map ;
GENERIC: v? ( mask true false -- result )
M: object v?
[ vand ] [ vandn ] bi-curry* bi vor ; inline
:: vif ( mask true-quot false-quot -- result )
@ -157,15 +190,21 @@ M: simd-128 vshuffle ( u perm -- v )
: vceiling ( u -- v ) [ ceiling ] map ;
: vtruncate ( u -- v ) [ truncate ] map ;
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ;
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ;
: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; inline
: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; inline
: v. ( u v -- x ) [ conjugate * ] [ + ] 2map-reduce ;
: norm-sq ( v -- x ) [ absq ] [ + ] map-reduce ;
: norm ( v -- x ) norm-sq sqrt ;
: normalize ( u -- v ) dup norm v/n ;
GENERIC: v. ( u v -- x )
M: object v. [ conjugate * ] [ + ] 2map-reduce ;
: distance ( u v -- x ) [ - absq ] [ + ] 2map-reduce sqrt ;
GENERIC: norm-sq ( v -- x )
M: object norm-sq [ absq ] [ + ] map-reduce ;
: norm ( v -- x ) norm-sq sqrt ; inline
: normalize ( u -- v ) dup norm v/n ; inline
GENERIC: distance ( u v -- x )
M: object distance [ - absq ] [ + ] 2map-reduce sqrt ;
: set-axis ( u v axis -- w )
[ [ zero? 2over ? ] dip swap nth ] map-index 2nip ;
@ -198,27 +237,27 @@ PRIVATE>
: v~ ( a b epsilon -- ? )
[ ~ ] curry 2all? ; inline
HINTS: vneg { array } ;
HINTS: norm-sq { array } ;
HINTS: M\ object vneg { array } ;
HINTS: M\ object norm-sq { array } ;
HINTS: norm { array } ;
HINTS: normalize { array } ;
HINTS: distance { array array } ;
HINTS: M\ object distance { array array } ;
HINTS: n*v { object array } ;
HINTS: v*n { array object } ;
HINTS: n/v { array } ;
HINTS: v/n { array object } ;
HINTS: M\ object n*v { object array } ;
HINTS: M\ object v*n { array object } ;
HINTS: M\ object n/v { object array } ;
HINTS: M\ object v/n { array object } ;
HINTS: v+ { array array } ;
HINTS: v- { array array } ;
HINTS: v* { array array } ;
HINTS: v/ { array array } ;
HINTS: vmax { array array } ;
HINTS: vmin { array array } ;
HINTS: v. { array array } ;
HINTS: M\ object v+ { array array } ;
HINTS: M\ object v- { array array } ;
HINTS: M\ object v* { array array } ;
HINTS: M\ object v/ { array array } ;
HINTS: M\ object vmax { array array } ;
HINTS: M\ object vmin { array array } ;
HINTS: M\ object v. { array array } ;
HINTS: vlerp { array array array } ;
HINTS: vnlerp { array array object } ;
HINTS: bilerp { object object object object array } ;
HINTS: trilerp { object object object object object object object object array } ;

View File

@ -4,7 +4,6 @@ USING: accessors alien.c-types kernel locals math math.ranges
math.bitwise math.vectors math.vectors.simd random
sequences specialized-arrays sequences.private classes.struct
combinators.short-circuit fry ;
SIMDS: uchar uint ;
SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: uint-4
IN: random.sfmt

View File

@ -0,0 +1,4 @@
USING: sequences.cords strings tools.test kernel sequences ;
IN: sequences.cords.tests
[ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test

View File

@ -0,0 +1,112 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs sequences sorting binary-search fry math
math.order arrays classes combinators kernel functors math.functions
math.vectors ;
IN: sequences.cords
MIXIN: cord
TUPLE: generic-cord
{ head read-only } { tail read-only } ;
INSTANCE: generic-cord cord
M: cord length
[ head>> length ] [ tail>> length ] bi + ; inline
M: cord virtual-exemplar head>> ; inline
M: cord virtual@
2dup head>> length <
[ head>> ] [ [ head>> length - ] [ tail>> ] bi ] if ; inline
INSTANCE: cord virtual-sequence
GENERIC: cord-append ( seq1 seq2 -- cord )
M: object cord-append
generic-cord boa ; inline
FUNCTOR: define-specialized-cord ( T C -- )
T-cord DEFINES-CLASS ${C}
WHERE
TUPLE: T-cord
{ head T read-only } { tail T read-only } ;
INSTANCE: T-cord cord
M: T cord-append
2dup [ T instance? ] both?
[ T-cord boa ] [ generic-cord boa ] if ; inline
;FUNCTOR
: cord-map ( cord quot -- cord' )
[ [ head>> ] dip call ]
[ [ tail>> ] dip call ] 2bi cord-append ; inline
: cord-2map ( cord cord quot -- cord' )
[ [ [ head>> ] bi@ ] dip call ]
[ [ [ tail>> ] bi@ ] dip call ] 3bi cord-append ; inline
: cord-both ( cord quot -- h t )
[ [ head>> ] [ tail>> ] bi ] dip bi@ ; inline
: cord-2both ( cord cord quot -- h t )
[ [ [ head>> ] bi@ ] dip call ]
[ [ [ tail>> ] bi@ ] dip call ] 3bi ; inline
M: cord v+ [ v+ ] cord-2map ; inline
M: cord v- [ v- ] cord-2map ; inline
M: cord vneg [ vneg ] cord-map ; inline
M: cord v+- [ v+- ] cord-2map ; inline
M: cord vs+ [ vs+ ] cord-2map ; inline
M: cord vs- [ vs- ] cord-2map ; inline
M: cord vs* [ vs* ] cord-2map ; inline
M: cord v* [ v* ] cord-2map ; inline
M: cord v/ [ v/ ] cord-2map ; inline
M: cord vmin [ vmin ] cord-2map ; inline
M: cord vmax [ vmax ] cord-2map ; inline
M: cord v. [ v. ] cord-2both + ; inline
M: cord vsqrt [ vsqrt ] cord-map ; inline
M: cord sum [ sum ] cord-both + ; inline
M: cord vabs [ vabs ] cord-map ; inline
M: cord vbitand [ vbitand ] cord-2map ; inline
M: cord vbitandn [ vbitandn ] cord-2map ; inline
M: cord vbitor [ vbitor ] cord-2map ; inline
M: cord vbitxor [ vbitxor ] cord-2map ; inline
M: cord vbitnot [ vbitnot ] cord-map ; inline
M: cord vand [ vand ] cord-2map ; inline
M: cord vandn [ vandn ] cord-2map ; inline
M: cord vor [ vor ] cord-2map ; inline
M: cord vxor [ vxor ] cord-2map ; inline
M: cord vnot [ vnot ] cord-map ; inline
M: cord vlshift '[ _ vlshift ] cord-map ; inline
M: cord vrshift '[ _ vrshift ] cord-map ; inline
M: cord (vmerge-head) [ head>> ] bi@ (vmerge) cord-append ; inline
M: cord (vmerge-tail) [ tail>> ] bi@ (vmerge) cord-append ; inline
M: cord v<= [ v<= ] cord-2map ; inline
M: cord v< [ v< ] cord-2map ; inline
M: cord v= [ v= ] cord-2map ; inline
M: cord v> [ v> ] cord-2map ; inline
M: cord v>= [ v>= ] cord-2map ; inline
M: cord vunordered? [ vunordered? ] cord-2map ; inline
M: cord vany? [ vany? ] cord-both or ; inline
M: cord vall? [ vall? ] cord-both and ; inline
M: cord vnone? [ vnone? ] cord-both and ; inline
M: cord n+v [ n+v ] with cord-map ; inline
M: cord n-v [ n-v ] with cord-map ; inline
M: cord n*v [ n*v ] with cord-map ; inline
M: cord n/v [ n/v ] with cord-map ; inline
M: cord v+n '[ _ v+n ] cord-map ; inline
M: cord v-n '[ _ v-n ] cord-map ; inline
M: cord v*n '[ _ v*n ] cord-map ; inline
M: cord v/n '[ _ v/n ] cord-map ; inline
M: cord norm-sq [ norm-sq ] cord-both + ; inline
M: cord distance v- norm ; inline

View File

@ -4,5 +4,3 @@ USING: mirrors specialized-arrays math.vectors ;
IN: specialized-arrays.mirrors
INSTANCE: specialized-array enumerated-sequence
INSTANCE: simd-128 enumerated-sequence
INSTANCE: simd-256 enumerated-sequence

View File

@ -2,8 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer
libc math math.vectors math.vectors.private
math.vectors.specialization namespaces
libc math math.vectors math.vectors.private namespaces
parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators make ;
@ -69,8 +68,6 @@ TUPLE: A
[ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
M: A new-underlying drop byte-array>A ;
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
M: A length length>> ; inline
@ -96,8 +93,6 @@ M: A resize
M: A byte-length length \ T heap-size * ; inline
M: A element-type drop \ T ; inline
M: A direct-array-syntax drop \ A@ ;
M: A pprint-delims drop \ A{ \ } ;
@ -109,8 +104,6 @@ SYNTAX: A@ scan-object scan-object <direct-A> suffix! ;
INSTANCE: A specialized-array
A T c-type-boxed-class f specialize-vector-words
;FUNCTOR
GENERIC: (underlying-type) ( c-type -- c-type' )

View File

@ -3,7 +3,8 @@ USING: accessors urls io.encodings.ascii io.files math.parser
http.client kernel ;
: deploy-test-5 ( -- )
URL" http://apple.com"
URL" http://localhost/foo.html" clone
"resource:port-number" ascii file-contents string>number >>port
http-get 2drop ;
MAIN: deploy-test-5

View File

@ -13,6 +13,9 @@ ERROR: not-in-a-method-error ;
: create-method-in ( class generic -- method )
create-method dup set-word dup save-location ;
: define-inline-method ( class generic quot -- )
[ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
: CREATE-METHOD ( -- method )
scan-word bootstrap-word scan-word create-method-in ;

View File

@ -929,7 +929,8 @@ PRIVATE>
: trim ( seq quot -- newseq )
[ trim-slice ] [ drop ] 2bi like ; inline
: sum ( seq -- n ) 0 [ + ] binary-reduce ;
GENERIC: sum ( seq -- n )
M: object sum 0 [ + ] binary-reduce ; inline
: product ( seq -- n ) 1 [ * ] binary-reduce ;

View File

@ -3,7 +3,6 @@ USING: alien.data.map fry generalizations kernel locals math.vectors
math.vectors.conversion math math.vectors.simd sequences
specialized-arrays tools.test ;
FROM: alien.c-types => uchar short int float ;
SIMDS: float int short uchar ;
SPECIALIZED-ARRAYS: int float float-4 uchar-16 ;
IN: alien.data.map.tests

View File

@ -1,7 +1,6 @@
USING: kernel locals math math.matrices.simd math.order math.vectors
math.vectors.simd prettyprint sequences typed ;
QUALIFIED-WITH: alien.c-types c
SIMD: c:float
IN: benchmark.3d-matrix-vector
: v2min ( xy -- xx )

View File

@ -2,9 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types fry kernel locals math
math.constants math.functions math.vectors math.vectors.simd
prettyprint combinators.smart sequences hints classes.struct
specialized-arrays ;
SIMD: double
math.vectors.simd.cords prettyprint combinators.smart sequences
hints classes.struct specialized-arrays ;
IN: benchmark.nbody-simd
: solar-mass ( -- x ) 4 pi sq * ; inline

View File

@ -3,10 +3,9 @@
USING: arrays accessors io io.files io.files.temp
io.encodings.binary kernel math math.constants math.functions
math.vectors math.vectors.simd math.parser make sequences
sequences.private words hints classes.struct ;
math.vectors math.vectors.simd math.vectors.simd.cords math.parser
make sequences sequences.private words hints classes.struct ;
QUALIFIED-WITH: alien.c-types c
SIMD: c:double
IN: benchmark.raytracer-simd
! parameters

View File

@ -3,7 +3,6 @@
USING: kernel io math math.functions math.parser math.vectors
math.vectors.simd sequences specialized-arrays ;
QUALIFIED-WITH: alien.c-types c
SIMD: c:float
SPECIALIZED-ARRAY: float-4
IN: benchmark.simd-1

View File

@ -1,7 +1,6 @@
! (c)Joe Groff bsd license
USING: io kernel math.vectors.simd terrain.generation threads ;
FROM: alien.c-types => float ;
SIMD: float
IN: benchmark.terrain-generation
: terrain-generation-benchmark ( -- )

View File

@ -11,7 +11,6 @@ specialized-vectors ;
FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: float
SPECIALIZED-VECTOR: uint
SIMD: float
IN: gpu.demos.bunny
GLSL-SHADER-FILE: bunny-vertex-shader vertex-shader "bunny.v.glsl"

View File

@ -3,7 +3,6 @@ USING: accessors alien.data.map arrays destructors fry grouping
kernel math math.ranges math.vectors.simd opengl opengl.gl sequences
sequences.product specialized-arrays ;
FROM: alien.c-types => float ;
SIMD: float
SPECIALIZED-ARRAY: float-4
IN: grid-meshes

View File

@ -3,7 +3,6 @@ USING: classes.struct math.matrices.simd math.vectors.simd math
literals math.constants math.functions specialized-arrays tools.test ;
QUALIFIED-WITH: alien.c-types c
FROM: math.matrices => m~ ;
SIMD: c:float
SPECIALIZED-ARRAY: float-4
IN: math.matrices.simd.tests

View File

@ -4,7 +4,6 @@ math math.combinatorics math.functions math.matrices.simd math.vectors
math.vectors.simd sequences sequences.private specialized-arrays
typed ;
QUALIFIED-WITH: alien.c-types c
SIMD: c:float
SPECIALIZED-ARRAY: float-4
IN: math.matrices.simd

View File

@ -4,7 +4,6 @@ math.libm math.matrices.simd math.vectors math.vectors.conversion math.vectors.s
memoize random random.mersenne-twister sequences sequences.private specialized-arrays
typed ;
QUALIFIED-WITH: alien.c-types c
SIMDS: c:float c:int c:short c:ushort c:uchar ;
SPECIALIZED-ARRAYS: c:float c:uchar float-4 uchar-16 ;
IN: noise

View File

@ -3,7 +3,6 @@ combinators.smart fry grouping images kernel math
math.matrices.simd math.order math.vectors noise random
sequences math.vectors.simd typed ;
FROM: alien.c-types => float uchar ;
SIMDS: float uchar ;
IN: terrain.generation
CONSTANT: terrain-segment-size { 512 512 }

View File

@ -11,7 +11,6 @@ math.matrices.simd noise ui.gestures combinators.short-circuit
destructors grid-meshes math.vectors.simd ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAY: c:float
SIMD: c:float
IN: terrain
CONSTANT: FOV $[ 2.0 sqrt 1 + ]

View File

@ -86,6 +86,15 @@ cell factor_vm::frame_executing(stack_frame *frame)
return frame_code(frame)->owner;
}
cell factor_vm::frame_executing_quot(stack_frame *frame)
{
tagged<object> executing(frame_executing(frame));
code_block *compiled = frame_code(frame);
if(!compiled->optimized_p() && executing->type() == WORD_TYPE)
executing = executing.as<word>()->def;
return executing.value();
}
stack_frame *factor_vm::frame_successor(stack_frame *frame)
{
check_frame(frame);
@ -133,7 +142,7 @@ struct stack_frame_accumulator {
void operator()(stack_frame *frame)
{
data_root<object> executing(parent->frame_executing(frame),parent);
data_root<object> executing(parent->frame_executing_quot(frame),parent);
data_root<object> scan(parent->frame_scan(frame),parent);
frames.add(executing.value());
@ -166,23 +175,18 @@ stack_frame *factor_vm::innermost_stack_frame(callstack *stack)
return frame;
}
stack_frame *factor_vm::innermost_stack_frame_quot(callstack *callstack)
{
stack_frame *inner = innermost_stack_frame(callstack);
tagged<quotation>(frame_executing(inner)).untag_check(this);
return inner;
}
/* Some primitives implementing a limited form of callstack mutation.
Used by the single stepper. */
void factor_vm::primitive_innermost_stack_frame_executing()
{
dpush(frame_executing(innermost_stack_frame(untag_check<callstack>(dpop()))));
stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
dpush(frame_executing_quot(frame));
}
void factor_vm::primitive_innermost_stack_frame_scan()
{
dpush(frame_scan(innermost_stack_frame_quot(untag_check<callstack>(dpop()))));
stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
dpush(frame_scan(frame));
}
void factor_vm::primitive_set_innermost_stack_frame_quot()
@ -195,7 +199,7 @@ void factor_vm::primitive_set_innermost_stack_frame_quot()
jit_compile_quot(quot.value(),true);
stack_frame *inner = innermost_stack_frame_quot(callstack.untagged());
stack_frame *inner = innermost_stack_frame(callstack.untagged());
cell offset = (char *)FRAME_RETURN_ADDRESS(inner,this) - (char *)inner->xt;
inner->xt = quot->xt;
FRAME_RETURN_ADDRESS(inner,this) = (char *)quot->xt + offset;

View File

@ -20,7 +20,7 @@ fixnum instruction_operand::load_value_masked(cell mask, cell bits, cell shift)
{
fixnum *ptr = (fixnum *)pointer;
return (((*ptr & mask) << bits) >> bits) << shift;
return (((*ptr & (fixnum)mask) << bits) >> bits) << shift;
}
fixnum instruction_operand::load_value(cell relative_to)
@ -36,7 +36,7 @@ fixnum instruction_operand::load_value(cell relative_to)
case RC_ABSOLUTE_PPC_2_2:
return load_value_2_2();
case RC_ABSOLUTE_PPC_2:
return load_value_masked(rel_absolute_ppc_2_mask,0,0);
return load_value_masked(rel_absolute_ppc_2_mask,16,0);
case RC_RELATIVE_PPC_2:
return load_value_masked(rel_relative_ppc_2_mask,16,0) + relative_to;
case RC_RELATIVE_PPC_3:
@ -80,12 +80,6 @@ void instruction_operand::store_value_2_2(fixnum value)
void instruction_operand::store_value_masked(fixnum value, cell mask, cell shift)
{
cell *ptr = (cell *)pointer;
/* This is unaccurate but good enough */
fixnum test = (fixnum)mask >> 1;
if(value <= -test || value >= test)
critical_error("Value does not fit inside relocation",0);
*ptr = ((*ptr & ~mask) | ((value >> shift) & mask));
}

View File

@ -568,11 +568,11 @@ struct factor_vm
code_block *frame_code(stack_frame *frame);
code_block_type frame_type(stack_frame *frame);
cell frame_executing(stack_frame *frame);
cell frame_executing_quot(stack_frame *frame);
stack_frame *frame_successor(stack_frame *frame);
cell frame_scan(stack_frame *frame);
void primitive_callstack_to_array();
stack_frame *innermost_stack_frame(callstack *stack);
stack_frame *innermost_stack_frame_quot(callstack *callstack);
void primitive_innermost_stack_frame_executing();
void primitive_innermost_stack_frame_scan();
void primitive_set_innermost_stack_frame_quot();