backend fixups

db4
Joe Groff 2009-11-14 20:59:03 -06:00
parent e323071c44
commit 8a8699ac98
5 changed files with 199 additions and 188 deletions

View File

@ -1,5 +1,10 @@
! (c)2009 Joe Groff bsd license ! (c)2009 Joe Groff bsd license
USING: accessors fry generalizations kernel locals math sequences USING: accessors arrays classes combinators
combinators.short-circuit compiler.cfg.builder.blocks
compiler.cfg.registers compiler.cfg.stacks
compiler.cfg.stacks.local compiler.tree.propagation.info
cpu.architecture effects fry generalizations help.lint.checks
kernel locals macros math namespaces quotations sequences
splitting words ; splitting words ;
IN: compiler.cfg.intrinsics.simd.backend IN: compiler.cfg.intrinsics.simd.backend
@ -8,53 +13,49 @@ IN: compiler.cfg.intrinsics.simd.backend
: can-has? ( quot -- ? ) : can-has? ( quot -- ? )
[ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
GENERIC: create-can-has-word ( word -- word' ) GENERIC: create-can-has ( word -- word' )
PREDICATE: vector-op-word PREDICATE: vector-op-word < word
{ {
[ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ] [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ]
[ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ] [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
} 1&& ; } 1&& ;
: reps-word ( word -- word' ) : reps-word ( word -- word' )
name>> "^^" ?head drop "##" ?head drop name>> "^^" ?head drop "##" ?head drop
"%" "-reps" surround "cpu.architecture" lookup ; "%" "-reps" surround "cpu.architecture" lookup ;
:: can-has-^^-quot ( word def effect -- def' ) :: can-has-^^-quot ( word def effect -- quot )
effect in>> { "rep" } split1 [ length ] bi@ 1 + effect in>> { "rep" } split1 [ length ] bi@ 1 +
word reps-word word reps-word
effect out>> length f <array> >quotation effect out>> length f <array> >quotation
'[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ; '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ;
:: can-has-^-quot ( word def effect -- def' ) :: can-has-^-quot ( word def effect -- quot )
def create-can-has ; def create-can-has ;
M: object create-can-has ; M: object create-can-has 1quotation ;
M: sequence create-can-has M: array create-can-has
[ create-can-has-word ] map ; [ create-can-has ] map concat ;
M: callable create-can-has
[ create-can-has ] map concat ;
: (create-can-has-word) ( word -- word' created? ) : (can-has-word) ( word -- word' )
name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
2dup lookup
[ 2nip f ] [ create t ] if* ;
: (create-can-has-quot) ( word -- def effect ) : (can-has-quot) ( word -- quot )
[ ] [ def>> ] [ stack-effect ] tri [ [ ] [ def>> ] [ stack-effect ] tri {
{ { [ pick name>> "^^" head? ] [ can-has-^^-quot ] }
{ [ pick "^^" head? ] [ can-has-^^-quot ] } { [ pick name>> "##" head? ] [ can-has-^^-quot ] }
{ [ pick "##" head? ] [ can-has-^^-quot ] } { [ pick name>> "^" head? ] [ can-has-^-quot ] }
{ [ pick "^" head? ] [ can-has-^-quot ] } } cond ;
} cond
] keep ;
M: vector-op-word create-can-has M: vector-op-word create-can-has
[ (create-can-has-word) ] keep dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
'[ _ (create-can-has-quot) define-declared ]
[ nip ] if ;
GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair ) GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
M:: callable >can-has-cond M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
#dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ; #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ;
M:: pair >can-has-cond ( pair #pick #dup -- quotpair ) M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
@ -113,7 +114,7 @@ CONSTANT: [quaternary]
-4 inc-d -4 inc-d
] ]
:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ; :: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot )
params-quot trials op-quot literal-preds params-quot trials op-quot literal-preds
'[ [ _ dip _ @ ds-push ] _ if-literals-match ] ; '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
@ -126,10 +127,11 @@ MACRO: emit-vv-vector-op ( trials -- )
MACRO: emit-vvvv-vector-op ( trials -- ) MACRO: emit-vvvv-vector-op ( trials -- )
[quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ; [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- ) MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- )
literal-pred trials literal-pred trials literal-pred imm-trials literal-pred var-trials
'[ '[
dup node-input-infos 2 tail-slice* first literal>> @ dup node-input-infos 2 tail-slice* first literal>> @
[ _ _ emit-vl-vector-op ] [ _ _ emit-vl-vector-op ]
[ _ emit-vv-vector-op ] if [ _ emit-vv-vector-op ] if
] ; ] ;

View File

@ -1,18 +1,20 @@
! Copyright (C) 2009 Slava Pestov, Joe Groff. ! Copyright (C) 2009 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien byte-arrays fry classes.algebra USING: accessors alien alien.c-types byte-arrays fry
cpu.architecture kernel math sequences math.vectors classes.algebra cpu.architecture kernel math sequences
math.vectors.simd macros generalizations combinators math.vectors math.vectors.simd math.vectors.simd.private
combinators.short-circuit arrays locals macros generalizations combinators combinators.short-circuit
compiler.tree.propagation.info compiler.cfg.builder.blocks arrays locals compiler.tree.propagation.info
compiler.cfg.builder.blocks
compiler.cfg.comparisons compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics
compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.alien
compiler.cfg.intrinsics.simd.backend compiler.cfg.intrinsics.simd.backend
specialized-arrays ; specialized-arrays ;
FROM: alien.c-types => heap-size char short int longlong float double ; FROM: alien.c-types => heap-size char short int longlong float double ;
SPECIALIZED-ARRAYS: char short int longlong float double ; SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
IN: compiler.cfg.intrinsics.simd IN: compiler.cfg.intrinsics.simd
! compound vector ops ! compound vector ops
@ -69,8 +71,14 @@ IN: compiler.cfg.intrinsics.simd
mask false rep ^^andn-vector mask false rep ^^andn-vector
rep ^^or-vector ; rep ^^or-vector ;
: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) : ^not-vector ( src rep -- dst )
order-cc { {
[ ^^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
} v-vector-op ;
:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
cc order-cc {
{ cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] } { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
{ cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] } { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] }
{ cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] } { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
@ -96,7 +104,7 @@ IN: compiler.cfg.intrinsics.simd
[ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ] [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
reduce reduce
not? [ rep generate-not-vector ] when not? [ rep ^not-vector ] when
] if ; ] if ;
: ^compare-vector ( src1 src2 rep cc -- dst ) : ^compare-vector ( src1 src2 rep cc -- dst )
@ -118,7 +126,7 @@ IN: compiler.cfg.intrinsics.simd
{ signed-int-vector-rep [| src rep | { signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-head :> merged src src rep ^^merge-vector-head :> merged
rep rep-component-type heap-size 8 * :> bits rep rep-component-type heap-size 8 * :> bits
merged bits rep widen-rep ^shr-vector-imm merged bits rep widen-vector-rep ^^shr-vector-imm
] } ] }
{ signed-int-vector-rep [| src rep | { signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero rep ^^zero-vector :> zero
@ -135,7 +143,7 @@ IN: compiler.cfg.intrinsics.simd
{ signed-int-vector-rep [| src rep | { signed-int-vector-rep [| src rep |
src src rep ^^merge-vector-tail :> merged src src rep ^^merge-vector-tail :> merged
rep rep-component-type heap-size 8 * :> bits rep rep-component-type heap-size 8 * :> bits
merged bits rep ^widened-shr-vector-imm merged bits rep widen-vector-rep ^^shr-vector-imm
] } ] }
{ signed-int-vector-rep [| src rep | { signed-int-vector-rep [| src rep |
rep ^^zero-vector :> zero rep ^^zero-vector :> zero
@ -144,7 +152,7 @@ IN: compiler.cfg.intrinsics.simd
] } ] }
} v-vector-op ; } v-vector-op ;
: ^(sum-2) ( src rep -- dst ) : ^(sum-vector-2) ( src rep -- dst )
{ {
[ dupd ^^horizontal-add-vector ] [ dupd ^^horizontal-add-vector ]
[| src rep | [| src rep |
@ -154,7 +162,7 @@ IN: compiler.cfg.intrinsics.simd
] ]
} v-vector-op ; } v-vector-op ;
: ^(sum-4) ( src rep -- dst ) : ^(sum-vector-4) ( src rep -- dst )
{ {
[ [
[ dupd ^^horizontal-add-vector ] [ dupd ^^horizontal-add-vector ]
@ -165,14 +173,14 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src' head tail rep ^^add-vector :> src'
rep widen-rep :> rep' rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail' src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector head' tail' rep ^^add-vector
] ]
} v-vector-op ; } v-vector-op ;
: ^(sum-8) ( src rep -- dst ) : ^(sum-vector-8) ( src rep -- dst )
{ {
[ [
[ dupd ^^horizontal-add-vector ] [ dupd ^^horizontal-add-vector ]
@ -184,19 +192,19 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src' head tail rep ^^add-vector :> src'
rep widen-rep :> rep' rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail' src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src'' head' tail' rep ^^add-vector :> src''
rep' widen-rep :> rep'' rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail'' src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector head'' tail'' rep ^^add-vector
] ]
} v-vector-op ; } v-vector-op ;
: ^(sum-16) ( src rep -- dst ) : ^(sum-vector-16) ( src rep -- dst )
{ {
[ [
{ {
@ -211,17 +219,17 @@ IN: compiler.cfg.intrinsics.simd
src src rep ^^merge-vector-tail :> tail src src rep ^^merge-vector-tail :> tail
head tail rep ^^add-vector :> src' head tail rep ^^add-vector :> src'
rep widen-rep :> rep' rep widen-vector-rep :> rep'
src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-head :> head'
src' src' rep' ^^merge-vector-tail :> tail' src' src' rep' ^^merge-vector-tail :> tail'
head' tail' rep ^^add-vector :> src'' head' tail' rep ^^add-vector :> src''
rep' widen-rep :> rep'' rep' widen-vector-rep :> rep''
src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-head :> head''
src'' src'' rep'' ^^merge-vector-tail :> tail'' src'' src'' rep'' ^^merge-vector-tail :> tail''
head'' tail'' rep ^^add-vector :> src''' head'' tail'' rep ^^add-vector :> src'''
rep'' widen-rep :> rep''' rep'' widen-vector-rep :> rep'''
src''' src''' rep''' ^^merge-vector-head :> head''' src''' src''' rep''' ^^merge-vector-head :> head'''
src''' src''' rep''' ^^merge-vector-tail :> tail''' src''' src''' rep''' ^^merge-vector-tail :> tail'''
head''' tail''' rep ^^add-vector head''' tail''' rep ^^add-vector
@ -230,11 +238,11 @@ IN: compiler.cfg.intrinsics.simd
: ^(sum-vector) ( src rep -- dst ) : ^(sum-vector) ( src rep -- dst )
[ [
rep-length { dup rep-length {
{ 2 [ ^(sum-2) ] } { 2 [ ^(sum-vector-2) ] }
{ 4 [ ^(sum-4) ] } { 4 [ ^(sum-vector-4) ] }
{ 8 [ ^(sum-8) ] } { 8 [ ^(sum-vector-8) ] }
{ 16 [ ^(sum-16) ] } { 16 [ ^(sum-vector-16) ] }
} case } case
] [ ^^vector>scalar ] bi ; ] [ ^^vector>scalar ] bi ;
@ -244,11 +252,29 @@ IN: compiler.cfg.intrinsics.simd
{ int-vector-rep [| src rep | { int-vector-rep [| src rep |
src rep ^unpack-vector-head :> head src rep ^unpack-vector-head :> head
src rep ^unpack-vector-tail :> tail src rep ^unpack-vector-tail :> tail
rep widen-rep :> wide-rep rep widen-vector-rep :> wide-rep
head tail wide-rep ^^add-vector wide-rep ^(sum-vector) head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
] } ] }
} v-vector-op ; } v-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: ^shuffle-vector-imm ( src1 src2 rep -- dst )
{
[ ^^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
} vl-vector-op ;
: ^broadcast-vector ( src n rep -- dst )
[ rep-length swap <array> ] keep
^shuffle-vector-imm ;
: ^with-vector ( src rep -- dst )
[ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
: ^select-vector ( src n rep -- dst )
[ ^broadcast-vector ] keep ^^vector>scalar ;
! intrinsic emitters ! intrinsic emitters
: emit-simd-v+ ( node -- ) : emit-simd-v+ ( node -- )
@ -380,8 +406,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-vnot ( node -- ) : emit-simd-vnot ( node -- )
{ {
[ ^^not-vector ] [ ^not-vector ]
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
} emit-v-vector-op ; } emit-v-vector-op ;
: emit-simd-vlshift ( node -- ) : emit-simd-vlshift ( node -- )
@ -408,12 +433,9 @@ IN: compiler.cfg.intrinsics.simd
[ ^^horizontal-shr-vector-imm ] [ ^^horizontal-shr-vector-imm ]
} [ integer? ] emit-vl-vector-op ; } [ integer? ] emit-vl-vector-op ;
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: emit-simd-vshuffle-elements ( node -- ) : emit-simd-vshuffle-elements ( node -- )
{ {
[ ^^shuffle-vector-imm ] [ ^shuffle-vector-imm ]
[ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
} [ shuffle? ] emit-vl-vector-op ; } [ shuffle? ] emit-vl-vector-op ;
: emit-simd-vshuffle-bytes ( node -- ) : emit-simd-vshuffle-bytes ( node -- )
@ -458,28 +480,28 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-vany? ( node -- ) : emit-simd-vany? ( node -- )
{ {
[ vcc-any ^test-vector ] [ vcc-any ^^test-vector ]
} emit-vv-vector-op ; } emit-vv-vector-op ;
: emit-simd-vall? ( node -- ) : emit-simd-vall? ( node -- )
{ {
[ vcc-all ^test-vector ] [ vcc-all ^^test-vector ]
} emit-vv-vector-op ; } emit-vv-vector-op ;
: emit-simd-vnone? ( node -- ) : emit-simd-vnone? ( node -- )
{ {
[ vcc-none ^test-vector ] [ vcc-none ^^test-vector ]
} emit-vv-vector-op ; } emit-vv-vector-op ;
: emit-simd-v>float ( node -- ) : emit-simd-v>float ( node -- )
{ {
{ float-vector-rep [ drop ] } { float-vector-rep [ drop ] }
{ int-vector-rep [ ^^integer>float-vector ] } { int-vector-rep [ ^^integer>float-vector ] }
} emit-vv-vector-op ; } emit-v-vector-op ;
: emit-simd-v>integer ( node -- ) : emit-simd-v>integer ( node -- )
{ {
{ float-vector-rep [ ^^float>integer-vector ] } { float-vector-rep [ ^^float>integer-vector ] }
{ int-vector-rep [ dup ] } { int-vector-rep [ dup ] }
} emit-vv-vector-op ; } emit-v-vector-op ;
: emit-simd-vpack-signed ( node -- ) : emit-simd-vpack-signed ( node -- )
{ {
@ -503,7 +525,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-with ( node -- ) : emit-simd-with ( node -- )
{ {
[ ^^with-vector ] [ ^with-vector ]
} emit-v-vector-op ; } emit-v-vector-op ;
: emit-simd-gather-2 ( node -- ) : emit-simd-gather-2 ( node -- )
@ -518,7 +540,7 @@ IN: compiler.cfg.intrinsics.simd
: emit-simd-select ( node -- ) : emit-simd-select ( node -- )
{ {
[ ^^select-vector ] [ ^select-vector ]
} [ integer? ] emit-vl-vector-op ; } [ integer? ] emit-vl-vector-op ;
: emit-alien-vector ( node -- ) : emit-alien-vector ( node -- )
@ -540,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd
inline-alien inline-alien
] with { [ %alien-vector-reps member? ] } if-literals-match ; ] with { [ %alien-vector-reps member? ] } if-literals-match ;
: enable-simd ( -- ) ! : enable-simd ( -- )
{ ! {
{ (simd-v+) [ emit-simd-v+ ] } ! { (simd-v+) [ emit-simd-v+ ] }
{ (simd-v-) [ emit-simd-v- ] } ! { (simd-v-) [ emit-simd-v- ] }
{ (simd-vneg) [ emit-simd-vneg ] } ! { (simd-vneg) [ emit-simd-vneg ] }
{ (simd-v+-) [ emit-simd-v+- ] } ! { (simd-v+-) [ emit-simd-v+- ] }
{ (simd-vs+) [ emit-simd-vs+ ] } ! { (simd-vs+) [ emit-simd-vs+ ] }
{ (simd-vs-) [ emit-simd-vs- ] } ! { (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] } ! { (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] } ! { (simd-v*) [ emit-simd-v* ] }
{ (simd-v/) [ emit-simd-v/ ] } ! { (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] } ! { (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] } ! { (simd-vmax) [ emit-simd-vmax ] }
{ (simd-v.) [ emit-simd-v. ] } ! { (simd-v.) [ emit-simd-v. ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] } ! { (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] } ! { (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] } ! { (simd-vabs) [ emit-simd-vabs ] }
{ (simd-vbitand) [ emit-simd-vand ] } ! { (simd-vbitand) [ emit-simd-vand ] }
{ (simd-vbitandn) [ emit-simd-vandn ] } ! { (simd-vbitandn) [ emit-simd-vandn ] }
{ (simd-vbitor) [ emit-simd-vor ] } ! { (simd-vbitor) [ emit-simd-vor ] }
{ (simd-vbitxor) [ emit-simd-vxor ] } ! { (simd-vbitxor) [ emit-simd-vxor ] }
{ (simd-vbitnot) [ emit-simd-vnot ] } ! { (simd-vbitnot) [ emit-simd-vnot ] }
{ (simd-vand) [ emit-simd-vand ] } ! { (simd-vand) [ emit-simd-vand ] }
{ (simd-vandn) [ emit-simd-vandn ] } ! { (simd-vandn) [ emit-simd-vandn ] }
{ (simd-vor) [ emit-simd-vor ] } ! { (simd-vor) [ emit-simd-vor ] }
{ (simd-vxor) [ emit-simd-vxor ] } ! { (simd-vxor) [ emit-simd-vxor ] }
{ (simd-vnot) [ emit-simd-vnot ] } ! { (simd-vnot) [ emit-simd-vnot ] }
{ (simd-vlshift) [ emit-simd-vlshift ] } ! { (simd-vlshift) [ emit-simd-vlshift ] }
{ (simd-vrshift) [ emit-simd-vrshift ] } ! { (simd-vrshift) [ emit-simd-vrshift ] }
{ (simd-hlshift) [ emit-simd-hlshift ] } ! { (simd-hlshift) [ emit-simd-hlshift ] }
{ (simd-hrshift) [ emit-simd-hrshift ] } ! { (simd-hrshift) [ emit-simd-hrshift ] }
{ (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } ! { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
{ (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } ! { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
{ (simd-vmerge-head) [ emit-simd-vmerge-head ] } ! { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
{ (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } ! { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
{ (simd-v<=) [ emit-simd-v<= ] } ! { (simd-v<=) [ emit-simd-v<= ] }
{ (simd-v<) [ emit-simd-v< ] } ! { (simd-v<) [ emit-simd-v< ] }
{ (simd-v=) [ emit-simd-v= ] } ! { (simd-v=) [ emit-simd-v= ] }
{ (simd-v>) [ emit-simd-v> ] } ! { (simd-v>) [ emit-simd-v> ] }
{ (simd-v>=) [ emit-simd-v>= ] } ! { (simd-v>=) [ emit-simd-v>= ] }
{ (simd-vunordered?) [ emit-simd-vunordered? ] } ! { (simd-vunordered?) [ emit-simd-vunordered? ] }
{ (simd-vany?) [ emit-simd-vany? ] } ! { (simd-vany?) [ emit-simd-vany? ] }
{ (simd-vall?) [ emit-simd-vall? ] } ! { (simd-vall?) [ emit-simd-vall? ] }
{ (simd-vnone?) [ emit-simd-vnone? ] } ! { (simd-vnone?) [ emit-simd-vnone? ] }
{ (simd-v>float) [ emit-simd-v>float ] } ! { (simd-v>float) [ emit-simd-v>float ] }
{ (simd-v>integer) [ emit-simd-v>integer ] } ! { (simd-v>integer) [ emit-simd-v>integer ] }
{ (simd-vpack-signed) [ emit-simd-vpack-signed ] } ! { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
{ (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } ! { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
{ (simd-vunpack-head) [ emit-simd-vunpack-head ] } ! { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
{ (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } ! { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
{ (simd-with) [ emit-simd-with ] } ! { (simd-with) [ emit-simd-with ] }
{ (simd-gather-2) [ emit-simd-gather-2 ] } ! { (simd-gather-2) [ emit-simd-gather-2 ] }
{ (simd-gather-4) [ emit-simd-gather-4 ] } ! { (simd-gather-4) [ emit-simd-gather-4 ] }
{ (simd-select) [ emit-simd-select ] } ! { (simd-select) [ emit-simd-select ] }
{ alien-vector [ emit-alien-vector ] } ! { alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] } ! { set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ; ! } enable-intrinsics ;
!
enable-simd ! enable-simd

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators fry sequences USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd ; math.intervals math.vectors.simd math.vectors.simd.private ;
IN: compiler.tree.propagation.simd IN: compiler.tree.propagation.simd
{ {
@ -51,7 +51,6 @@ IN: compiler.tree.propagation.simd
(simd-gather-2) (simd-gather-2)
(simd-gather-4) (simd-gather-4)
alien-vector alien-vector
alien-vector-aligned
} [ { byte-array } "default-output-classes" set-word-prop ] each } [ { byte-array } "default-output-classes" set-word-prop ] each
: scalar-output-class ( rep -- class ) : scalar-output-class ( rep -- class )

View File

@ -1,6 +1,6 @@
USING: classes.tuple.private cpu.architecture help.markup USING: classes.tuple.private cpu.architecture help.markup
help.syntax kernel.private math math.vectors help.syntax kernel.private math math.vectors
math.vectors.simd.intrinsics sequences ; sequences ;
IN: math.vectors.simd IN: math.vectors.simd
ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support" ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
@ -23,7 +23,7 @@ $nl
$nl $nl
"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." "SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
$nl $nl
"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." "SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
$nl $nl
"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "." "SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
$nl $nl
@ -36,26 +36,7 @@ $nl
ARTICLE: "math.vectors.simd.types" "SIMD vector types" ARTICLE: "math.vectors.simd.types" "SIMD vector types"
"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." "Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
$nl $nl
"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" "The following vector types are available:"
{ $subsections
POSTPONE: SIMD:
POSTPONE: SIMDS:
}
"The following scalar types are supported:"
{ $code
"char"
"uchar"
"short"
"ushort"
"int"
"uint"
"longlong"
"ulonglong"
"float"
"double"
}
"The following vector types are generated from the above scalar types:"
{ $code { $code
"char-16" "char-16"
"uchar-16" "uchar-16"
@ -218,16 +199,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"math.vectors.simd.intrinsics" "math.vectors.simd.intrinsics"
} ; } ;
HELP: SIMD:
{ $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
HELP: SIMDS:
{ $syntax "SIMDS: type type type ... ;" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words
ABOUT: "math.vectors.simd" ABOUT: "math.vectors.simd"

View File

@ -1,5 +1,9 @@
! (c)2009 Slava Pestov, Joe Groff bsd license USING: accessors alien.c-types byte-arrays classes combinators
USING: math.vectors math.vectors.private ; cpu.architecture fry functors generalizations generic
generic.parser kernel lexer literals macros math math.functions
math.vectors math.vectors.private namespaces parser
prettyprint.custom quotations sequences sequences.private vocabs
vocabs.loader ;
QUALIFIED-WITH: alien.c-types c QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd IN: math.vectors.simd
@ -8,8 +12,11 @@ DEFER: simd-with
DEFER: simd-boa DEFER: simd-boa
DEFER: simd-cast DEFER: simd-cast
<PRIVATE ERROR: bad-simd-call word ;
ERROR: bad-simd-length got expected ;
<<
<PRIVATE
! Primitive SIMD constructors ! Primitive SIMD constructors
GENERIC: new-underlying ( underlying seq -- seq' ) GENERIC: new-underlying ( underlying seq -- seq' )
@ -18,6 +25,10 @@ GENERIC: new-underlying ( underlying seq -- seq' )
dip new-underlying ; inline dip new-underlying ; inline
: change-underlying ( seq quot -- seq' ) : change-underlying ( seq quot -- seq' )
'[ underlying>> @ ] keep new-underlying ; inline '[ underlying>> @ ] keep new-underlying ; inline
PRIVATE>
>>
<PRIVATE
! SIMD intrinsics ! SIMD intrinsics
@ -34,18 +45,18 @@ GENERIC: new-underlying ( underlying seq -- seq' )
: (simd-vmax) ( a b rep -- c ) \ vmax bad-simd-call ; : (simd-vmax) ( a b rep -- c ) \ vmax bad-simd-call ;
: (simd-v.) ( a b rep -- n ) \ v. bad-simd-call ; : (simd-v.) ( a b rep -- n ) \ v. bad-simd-call ;
: (simd-vsqrt) ( a rep -- c ) \ vsqrt bad-simd-call ; : (simd-vsqrt) ( a rep -- c ) \ vsqrt bad-simd-call ;
: (simd-sum) ( a b rep -- n ) \ sum bad-simd-call ; : (simd-sum) ( a rep -- n ) \ sum bad-simd-call ;
: (simd-vabs) ( a rep -- c ) \ vabs bad-simd-call ; : (simd-vabs) ( a rep -- c ) \ vabs bad-simd-call ;
: (simd-vbitand) ( a b rep -- c ) \ vbitand bad-simd-call ; : (simd-vbitand) ( a b rep -- c ) \ vbitand bad-simd-call ;
: (simd-vbitandn) ( a b rep -- c ) \ vbitandn bad-simd-call ; : (simd-vbitandn) ( a b rep -- c ) \ vbitandn bad-simd-call ;
: (simd-vbitor) ( a b rep -- c ) \ vbitor bad-simd-call ; : (simd-vbitor) ( a b rep -- c ) \ vbitor bad-simd-call ;
: (simd-vbitxor) ( a b rep -- c ) \ vbitxor bad-simd-call ; : (simd-vbitxor) ( a b rep -- c ) \ vbitxor bad-simd-call ;
: (simd-vbitnot) ( a b rep -- c ) \ vbitnot bad-simd-call ; : (simd-vbitnot) ( a rep -- c ) \ vbitnot bad-simd-call ;
: (simd-vand) ( a b rep -- c ) \ vand bad-simd-call ; : (simd-vand) ( a b rep -- c ) \ vand bad-simd-call ;
: (simd-vandn) ( a b rep -- c ) \ vandn bad-simd-call ; : (simd-vandn) ( a b rep -- c ) \ vandn bad-simd-call ;
: (simd-vor) ( a b rep -- c ) \ vor bad-simd-call ; : (simd-vor) ( a b rep -- c ) \ vor bad-simd-call ;
: (simd-vxor) ( a b rep -- c ) \ vxor bad-simd-call ; : (simd-vxor) ( a b rep -- c ) \ vxor bad-simd-call ;
: (simd-vnot) ( a b rep -- c ) \ vnot bad-simd-call ; : (simd-vnot) ( a rep -- c ) \ vnot bad-simd-call ;
: (simd-vlshift) ( a n rep -- c ) \ vlshift bad-simd-call ; : (simd-vlshift) ( a n rep -- c ) \ vlshift bad-simd-call ;
: (simd-vrshift) ( a n rep -- c ) \ vrshift bad-simd-call ; : (simd-vrshift) ( a n rep -- c ) \ vrshift bad-simd-call ;
: (simd-hlshift) ( a n rep -- c ) \ hlshift bad-simd-call ; : (simd-hlshift) ( a n rep -- c ) \ hlshift bad-simd-call ;
@ -74,9 +85,13 @@ GENERIC: new-underlying ( underlying seq -- seq' )
: (simd-gather-4) ( m n o p rep -- v ) \ simd-boa bad-simd-call ; : (simd-gather-4) ( m n o p rep -- v ) \ simd-boa bad-simd-call ;
: (simd-select) ( a n rep -- n ) \ nth bad-simd-call ; : (simd-select) ( a n rep -- n ) \ nth bad-simd-call ;
PRIVATE>
: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; : alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
<PRIVATE
! Helper for boolean vector literals ! Helper for boolean vector literals
: vector-true-value ( class -- value ) : vector-true-value ( class -- value )
@ -102,10 +117,11 @@ TUPLE: simd-128
GENERIC: simd-element-type ( obj -- c-type ) GENERIC: simd-element-type ( obj -- c-type )
GENERIC: simd-rep ( simd -- rep ) GENERIC: simd-rep ( simd -- rep )
<<
: rep-length ( rep -- n ) : rep-length ( rep -- n )
16 swap rep-component-type heap-size /i ; foldable 16 swap rep-component-type heap-size /i ; foldable
<< <PRIVATE <PRIVATE
! SIMD concrete type functor ! SIMD concrete type functor
@ -161,9 +177,11 @@ c:<c-type>
;FUNCTOR ;FUNCTOR
SYNTAX: SIMD-128: SYNTAX: SIMD-128:
scan scan-word define-simd-128 ; scan define-simd-128 ;
PRIVATE> >> PRIVATE>
>>
SIMD-128: char-16 SIMD-128: char-16
SIMD-128: uchar-16 SIMD-128: uchar-16
@ -176,16 +194,14 @@ SIMD-128: ulonglong-2
SIMD-128: float-4 SIMD-128: float-4
SIMD-128: double-2 SIMD-128: double-2
ERROR: bad-simd-call word ;
ERROR: bad-simd-length got expected ;
: assert-positive ( x -- y ) ; : assert-positive ( x -- y ) ;
! SIMD vectors as sequences ! SIMD vectors as sequences
M: simd-128 hashcode* underlying>> hashcode* ; inline
M: simd-128 clone [ clone ] change-underlying ; inline M: simd-128 clone [ clone ] change-underlying ; inline
M: simd-128 length simd-rep rep-length ; inline M: simd-128 length simd-rep rep-length ; inline
M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline
M: simd-128 c:byte-length drop 16 ; inline M: simd-128 c:byte-length drop 16 ; inline
M: simd-128 new-sequence M: simd-128 new-sequence
@ -193,16 +209,13 @@ M: simd-128 new-sequence
[ nip [ 16 (byte-array) ] make-underlying ] [ nip [ 16 (byte-array) ] make-underlying ]
[ length bad-simd-length ] if ; inline [ length bad-simd-length ] if ; inline
M: simd-128 equal?
[ v= vall? ] [ 2drop f ] if-vectors-match ; inline
M: simd-128 >pprint-sequence ; M: simd-128 >pprint-sequence ;
M: simd-128 pprint* pprint-object ; M: simd-128 pprint* pprint-object ;
INSTANCE: simd-128 sequence INSTANCE: simd-128 sequence
! Unboxers for SIMD operations ! Unboxers for SIMD operations
<<
<PRIVATE <PRIVATE
: if-both-vectors ( a b t f -- ) : if-both-vectors ( a b t f -- )
@ -221,6 +234,9 @@ INSTANCE: simd-128 sequence
: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c ) : simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c )
[ simd-unbox ] dip 2curry make-underlying ; inline [ simd-unbox ] dip 2curry make-underlying ; inline
: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c )
[ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline
: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n ) : simd-v->n-op ( a quot: ( (a) rep -- n ) -- n )
[ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline
@ -241,7 +257,7 @@ INSTANCE: simd-128 sequence
[ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline
: (simd-method-fallback) ( accum word -- accum ) : (simd-method-fallback) ( accum word -- accum )
[ current-method get \ (call-next-method) [ ] 2sequence suffix! ] [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ]
dip suffix! ; dip suffix! ;
SYNTAX: simd-vv->v-op SYNTAX: simd-vv->v-op
@ -252,6 +268,10 @@ SYNTAX: simd-vv->n-op
\ (simd-vv->n-op) (simd-method-fallback) ; \ (simd-vv->n-op) (simd-method-fallback) ;
PRIVATE> PRIVATE>
>>
M: simd-128 equal?
[ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline
! SIMD constructors ! SIMD constructors
@ -283,26 +303,26 @@ M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inl
M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline
M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline
M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline
M: simd-128 sum [ (simd-sum) ] simd-vv->n-op ; inline M: simd-128 sum [ (simd-sum) ] simd-v->n-op ; inline
M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline
M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline
M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline
M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline
M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline
M: simd-128 vbitnot [ (simd-vbitnot) ] simd-vv->v-op ; inline M: simd-128 vbitnot [ (simd-vbitnot) ] simd-v->v-op ; inline
M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline
M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline
M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline
M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline
M: simd-128 vnot [ (simd-vnot) ] simd-vv->v-op ; inline M: simd-128 vnot [ (simd-vnot) ] simd-v->v-op ; inline
M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline
M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline
M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline
M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline
M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline
M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline
M: simd-128 vmerge-head [ (simd-vmerge-head) ] simd-vv->v-op ; inline M: simd-128 (vmerge-head) [ (simd-vmerge-head) ] simd-vv->v-op ; inline
M: simd-128 vmerge-tail [ (simd-vmerge-tail) ] simd-vv->v-op ; inline M: simd-128 (vmerge-tail) [ (simd-vmerge-tail) ] simd-vv->v-op ; inline
M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline
M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline
M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline
@ -326,7 +346,6 @@ M: simd-128 v*n over simd-with v* ; inline
M: simd-128 v/n over simd-with v/ ; inline M: simd-128 v/n over simd-with v/ ; inline
M: simd-128 norm-sq dup v. assert-positive ; inline M: simd-128 norm-sq dup v. assert-positive ; inline
M: simd-128 norm norm-sq sqrt ; inline M: simd-128 norm norm-sq sqrt ; inline
M: simd-128 normalize dup norm v/n ; inline
M: simd-128 distance v- norm ; inline M: simd-128 distance v- norm ; inline
! misc ! misc