From 8a8699ac984aed542b748ffe2f7fa1791d8a38f1 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 14 Nov 2009 20:59:03 -0600 Subject: [PATCH] backend fixups --- .../intrinsics/simd/backend/backend.factor | 60 ++--- .../compiler/cfg/intrinsics/simd/simd.factor | 220 ++++++++++-------- .../tree/propagation/simd/simd.factor | 3 +- basis/math/vectors/simd/simd-docs.factor | 37 +-- basis/math/vectors/simd/simd.factor | 67 ++++-- 5 files changed, 199 insertions(+), 188 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index 4fe9774282..90514c6cc9 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -1,5 +1,10 @@ ! (c)2009 Joe Groff bsd license -USING: accessors fry generalizations kernel locals math sequences +USING: accessors arrays classes combinators +combinators.short-circuit compiler.cfg.builder.blocks +compiler.cfg.registers compiler.cfg.stacks +compiler.cfg.stacks.local compiler.tree.propagation.info +cpu.architecture effects fry generalizations help.lint.checks +kernel locals macros math namespaces quotations sequences splitting words ; IN: compiler.cfg.intrinsics.simd.backend @@ -8,55 +13,51 @@ IN: compiler.cfg.intrinsics.simd.backend : can-has? ( quot -- ? ) [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline -GENERIC: create-can-has-word ( word -- word' ) +GENERIC: create-can-has ( word -- word' ) -PREDICATE: vector-op-word +PREDICATE: vector-op-word < word { [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ] - [ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ] + [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ] } 1&& ; : reps-word ( word -- word' ) name>> "^^" ?head drop "##" ?head drop "%" "-reps" surround "cpu.architecture" lookup ; -:: can-has-^^-quot ( word def effect -- def' ) +:: can-has-^^-quot ( word def effect -- quot ) effect in>> { "rep" } split1 [ length ] bi@ 1 + word reps-word effect out>> length f >quotation '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ; -:: can-has-^-quot ( word def effect -- def' ) +:: can-has-^-quot ( word def effect -- quot ) def create-can-has ; -M: object create-can-has ; +M: object create-can-has 1quotation ; -M: sequence create-can-has - [ create-can-has-word ] map ; +M: array create-can-has + [ create-can-has ] map concat ; +M: callable create-can-has + [ create-can-has ] map concat ; -: (create-can-has-word) ( word -- word' created? ) - name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" - 2dup lookup - [ 2nip f ] [ create t ] if* ; +: (can-has-word) ( word -- word' ) + name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ; -: (create-can-has-quot) ( word -- def effect ) - [ ] [ def>> ] [ stack-effect ] tri [ - { - { [ pick "^^" head? ] [ can-has-^^-quot ] } - { [ pick "##" head? ] [ can-has-^^-quot ] } - { [ pick "^" head? ] [ can-has-^-quot ] } - } cond - ] keep ; +: (can-has-quot) ( word -- quot ) + [ ] [ def>> ] [ stack-effect ] tri { + { [ pick name>> "^^" head? ] [ can-has-^^-quot ] } + { [ pick name>> "##" head? ] [ can-has-^^-quot ] } + { [ pick name>> "^" head? ] [ can-has-^-quot ] } + } cond ; M: vector-op-word create-can-has - [ (create-can-has-word) ] keep - '[ _ (create-can-has-quot) define-declared ] - [ nip ] if ; + dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ; GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair ) -M:: callable >can-has-cond +M:: callable >can-has-cond ( quot #pick #dup -- quotpair ) #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ; - + M:: pair >can-has-cond ( pair #pick #dup -- quotpair ) pair first2 :> ( class quot ) #pick class #dup quot create-can-has @@ -113,7 +114,7 @@ CONSTANT: [quaternary] -4 inc-d ] -:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ; +:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) params-quot trials op-quot literal-preds '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ; @@ -126,10 +127,11 @@ MACRO: emit-vv-vector-op ( trials -- ) MACRO: emit-vvvv-vector-op ( trials -- ) [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ; -MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- ) - literal-pred trials literal-pred trials +MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- ) + literal-pred imm-trials literal-pred var-trials '[ dup node-input-infos 2 tail-slice* first literal>> @ [ _ _ emit-vl-vector-op ] [ _ emit-vv-vector-op ] if ] ; + diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 1cf076af1d..512df6c129 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,18 +1,20 @@ ! Copyright (C) 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien byte-arrays fry classes.algebra -cpu.architecture kernel math sequences math.vectors -math.vectors.simd macros generalizations combinators -combinators.short-circuit arrays locals -compiler.tree.propagation.info compiler.cfg.builder.blocks +USING: accessors alien alien.c-types byte-arrays fry +classes.algebra cpu.architecture kernel math sequences +math.vectors math.vectors.simd math.vectors.simd.private +macros generalizations combinators combinators.short-circuit +arrays locals compiler.tree.propagation.info +compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers +compiler.cfg.intrinsics compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.simd.backend specialized-arrays ; FROM: alien.c-types => heap-size char short int longlong float double ; -SPECIALIZED-ARRAYS: char short int longlong float double ; +SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ; IN: compiler.cfg.intrinsics.simd ! compound vector ops @@ -69,8 +71,14 @@ IN: compiler.cfg.intrinsics.simd mask false rep ^^andn-vector rep ^^or-vector ; -: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) - order-cc { +: ^not-vector ( src rep -- dst ) + { + [ ^^not-vector ] + [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + } v-vector-op ; + +:: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) + cc order-cc { { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] } { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] } { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] } @@ -96,7 +104,7 @@ IN: compiler.cfg.intrinsics.simd [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ] reduce - not? [ rep generate-not-vector ] when + not? [ rep ^not-vector ] when ] if ; : ^compare-vector ( src1 src2 rep cc -- dst ) @@ -118,7 +126,7 @@ IN: compiler.cfg.intrinsics.simd { signed-int-vector-rep [| src rep | src src rep ^^merge-vector-head :> merged rep rep-component-type heap-size 8 * :> bits - merged bits rep widen-rep ^shr-vector-imm + merged bits rep widen-vector-rep ^^shr-vector-imm ] } { signed-int-vector-rep [| src rep | rep ^^zero-vector :> zero @@ -135,7 +143,7 @@ IN: compiler.cfg.intrinsics.simd { signed-int-vector-rep [| src rep | src src rep ^^merge-vector-tail :> merged rep rep-component-type heap-size 8 * :> bits - merged bits rep ^widened-shr-vector-imm + merged bits rep widen-vector-rep ^^shr-vector-imm ] } { signed-int-vector-rep [| src rep | rep ^^zero-vector :> zero @@ -144,7 +152,7 @@ IN: compiler.cfg.intrinsics.simd ] } } v-vector-op ; -: ^(sum-2) ( src rep -- dst ) +: ^(sum-vector-2) ( src rep -- dst ) { [ dupd ^^horizontal-add-vector ] [| src rep | @@ -154,7 +162,7 @@ IN: compiler.cfg.intrinsics.simd ] } v-vector-op ; -: ^(sum-4) ( src rep -- dst ) +: ^(sum-vector-4) ( src rep -- dst ) { [ [ dupd ^^horizontal-add-vector ] @@ -165,14 +173,14 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector ] } v-vector-op ; -: ^(sum-8) ( src rep -- dst ) +: ^(sum-vector-8) ( src rep -- dst ) { [ [ dupd ^^horizontal-add-vector ] @@ -184,19 +192,19 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector :> src'' - rep' widen-rep :> rep'' + rep' widen-vector-rep :> rep'' src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-tail :> tail'' head'' tail'' rep ^^add-vector ] } v-vector-op ; -: ^(sum-16) ( src rep -- dst ) +: ^(sum-vector-16) ( src rep -- dst ) { [ { @@ -211,17 +219,17 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector :> src'' - rep' widen-rep :> rep'' + rep' widen-vector-rep :> rep'' src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-tail :> tail'' head'' tail'' rep ^^add-vector :> src''' - rep'' widen-rep :> rep''' + rep'' widen-vector-rep :> rep''' src''' src''' rep''' ^^merge-vector-head :> head''' src''' src''' rep''' ^^merge-vector-tail :> tail''' head''' tail''' rep ^^add-vector @@ -230,11 +238,11 @@ IN: compiler.cfg.intrinsics.simd : ^(sum-vector) ( src rep -- dst ) [ - rep-length { - { 2 [ ^(sum-2) ] } - { 4 [ ^(sum-4) ] } - { 8 [ ^(sum-8) ] } - { 16 [ ^(sum-16) ] } + dup rep-length { + { 2 [ ^(sum-vector-2) ] } + { 4 [ ^(sum-vector-4) ] } + { 8 [ ^(sum-vector-8) ] } + { 16 [ ^(sum-vector-16) ] } } case ] [ ^^vector>scalar ] bi ; @@ -244,11 +252,29 @@ IN: compiler.cfg.intrinsics.simd { int-vector-rep [| src rep | src rep ^unpack-vector-head :> head src rep ^unpack-vector-tail :> tail - rep widen-rep :> wide-rep + rep widen-vector-rep :> wide-rep head tail wide-rep ^^add-vector wide-rep ^(sum-vector) ] } } v-vector-op ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + +: ^shuffle-vector-imm ( src1 src2 rep -- dst ) + { + [ ^^shuffle-vector-imm ] + [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ] + } vl-vector-op ; + +: ^broadcast-vector ( src n rep -- dst ) + [ rep-length swap ] keep + ^shuffle-vector-imm ; + +: ^with-vector ( src rep -- dst ) + [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ; + +: ^select-vector ( src n rep -- dst ) + [ ^broadcast-vector ] keep ^^vector>scalar ; + ! intrinsic emitters : emit-simd-v+ ( node -- ) @@ -380,8 +406,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vnot ( node -- ) { - [ ^^not-vector ] - [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + [ ^not-vector ] } emit-v-vector-op ; : emit-simd-vlshift ( node -- ) @@ -408,12 +433,9 @@ IN: compiler.cfg.intrinsics.simd [ ^^horizontal-shr-vector-imm ] } [ integer? ] emit-vl-vector-op ; -: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; - : emit-simd-vshuffle-elements ( node -- ) { - [ ^^shuffle-vector-imm ] - [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ] + [ ^shuffle-vector-imm ] } [ shuffle? ] emit-vl-vector-op ; : emit-simd-vshuffle-bytes ( node -- ) @@ -458,28 +480,28 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vany? ( node -- ) { - [ vcc-any ^test-vector ] + [ vcc-any ^^test-vector ] } emit-vv-vector-op ; : emit-simd-vall? ( node -- ) { - [ vcc-all ^test-vector ] + [ vcc-all ^^test-vector ] } emit-vv-vector-op ; : emit-simd-vnone? ( node -- ) { - [ vcc-none ^test-vector ] + [ vcc-none ^^test-vector ] } emit-vv-vector-op ; : emit-simd-v>float ( node -- ) { { float-vector-rep [ drop ] } { int-vector-rep [ ^^integer>float-vector ] } - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-v>integer ( node -- ) { { float-vector-rep [ ^^float>integer-vector ] } { int-vector-rep [ dup ] } - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-vpack-signed ( node -- ) { @@ -503,7 +525,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-with ( node -- ) { - [ ^^with-vector ] + [ ^with-vector ] } emit-v-vector-op ; : emit-simd-gather-2 ( node -- ) @@ -518,7 +540,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-select ( node -- ) { - [ ^^select-vector ] + [ ^select-vector ] } [ integer? ] emit-vl-vector-op ; : emit-alien-vector ( node -- ) @@ -540,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd inline-alien ] with { [ %alien-vector-reps member? ] } if-literals-match ; -: enable-simd ( -- ) - { - { (simd-v+) [ emit-simd-v+ ] } - { (simd-v-) [ emit-simd-v- ] } - { (simd-vneg) [ emit-simd-vneg ] } - { (simd-v+-) [ emit-simd-v+- ] } - { (simd-vs+) [ emit-simd-vs+ ] } - { (simd-vs-) [ emit-simd-vs- ] } - { (simd-vs*) [ emit-simd-vs* ] } - { (simd-v*) [ emit-simd-v* ] } - { (simd-v/) [ emit-simd-v/ ] } - { (simd-vmin) [ emit-simd-vmin ] } - { (simd-vmax) [ emit-simd-vmax ] } - { (simd-v.) [ emit-simd-v. ] } - { (simd-vsqrt) [ emit-simd-vsqrt ] } - { (simd-sum) [ emit-simd-sum ] } - { (simd-vabs) [ emit-simd-vabs ] } - { (simd-vbitand) [ emit-simd-vand ] } - { (simd-vbitandn) [ emit-simd-vandn ] } - { (simd-vbitor) [ emit-simd-vor ] } - { (simd-vbitxor) [ emit-simd-vxor ] } - { (simd-vbitnot) [ emit-simd-vnot ] } - { (simd-vand) [ emit-simd-vand ] } - { (simd-vandn) [ emit-simd-vandn ] } - { (simd-vor) [ emit-simd-vor ] } - { (simd-vxor) [ emit-simd-vxor ] } - { (simd-vnot) [ emit-simd-vnot ] } - { (simd-vlshift) [ emit-simd-vlshift ] } - { (simd-vrshift) [ emit-simd-vrshift ] } - { (simd-hlshift) [ emit-simd-hlshift ] } - { (simd-hrshift) [ emit-simd-hrshift ] } - { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } - { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } - { (simd-vmerge-head) [ emit-simd-vmerge-head ] } - { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } - { (simd-v<=) [ emit-simd-v<= ] } - { (simd-v<) [ emit-simd-v< ] } - { (simd-v=) [ emit-simd-v= ] } - { (simd-v>) [ emit-simd-v> ] } - { (simd-v>=) [ emit-simd-v>= ] } - { (simd-vunordered?) [ emit-simd-vunordered? ] } - { (simd-vany?) [ emit-simd-vany? ] } - { (simd-vall?) [ emit-simd-vall? ] } - { (simd-vnone?) [ emit-simd-vnone? ] } - { (simd-v>float) [ emit-simd-v>float ] } - { (simd-v>integer) [ emit-simd-v>integer ] } - { (simd-vpack-signed) [ emit-simd-vpack-signed ] } - { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } - { (simd-vunpack-head) [ emit-simd-vunpack-head ] } - { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } - { (simd-with) [ emit-simd-with ] } - { (simd-gather-2) [ emit-simd-gather-2 ] } - { (simd-gather-4) [ emit-simd-gather-4 ] } - { (simd-select) [ emit-simd-select ] } - { alien-vector [ emit-alien-vector ] } - { set-alien-vector [ emit-set-alien-vector ] } - } enable-intrinsics ; - -enable-simd +! : enable-simd ( -- ) +! { +! { (simd-v+) [ emit-simd-v+ ] } +! { (simd-v-) [ emit-simd-v- ] } +! { (simd-vneg) [ emit-simd-vneg ] } +! { (simd-v+-) [ emit-simd-v+- ] } +! { (simd-vs+) [ emit-simd-vs+ ] } +! { (simd-vs-) [ emit-simd-vs- ] } +! { (simd-vs*) [ emit-simd-vs* ] } +! { (simd-v*) [ emit-simd-v* ] } +! { (simd-v/) [ emit-simd-v/ ] } +! { (simd-vmin) [ emit-simd-vmin ] } +! { (simd-vmax) [ emit-simd-vmax ] } +! { (simd-v.) [ emit-simd-v. ] } +! { (simd-vsqrt) [ emit-simd-vsqrt ] } +! { (simd-sum) [ emit-simd-sum ] } +! { (simd-vabs) [ emit-simd-vabs ] } +! { (simd-vbitand) [ emit-simd-vand ] } +! { (simd-vbitandn) [ emit-simd-vandn ] } +! { (simd-vbitor) [ emit-simd-vor ] } +! { (simd-vbitxor) [ emit-simd-vxor ] } +! { (simd-vbitnot) [ emit-simd-vnot ] } +! { (simd-vand) [ emit-simd-vand ] } +! { (simd-vandn) [ emit-simd-vandn ] } +! { (simd-vor) [ emit-simd-vor ] } +! { (simd-vxor) [ emit-simd-vxor ] } +! { (simd-vnot) [ emit-simd-vnot ] } +! { (simd-vlshift) [ emit-simd-vlshift ] } +! { (simd-vrshift) [ emit-simd-vrshift ] } +! { (simd-hlshift) [ emit-simd-hlshift ] } +! { (simd-hrshift) [ emit-simd-hrshift ] } +! { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } +! { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } +! { (simd-vmerge-head) [ emit-simd-vmerge-head ] } +! { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } +! { (simd-v<=) [ emit-simd-v<= ] } +! { (simd-v<) [ emit-simd-v< ] } +! { (simd-v=) [ emit-simd-v= ] } +! { (simd-v>) [ emit-simd-v> ] } +! { (simd-v>=) [ emit-simd-v>= ] } +! { (simd-vunordered?) [ emit-simd-vunordered? ] } +! { (simd-vany?) [ emit-simd-vany? ] } +! { (simd-vall?) [ emit-simd-vall? ] } +! { (simd-vnone?) [ emit-simd-vnone? ] } +! { (simd-v>float) [ emit-simd-v>float ] } +! { (simd-v>integer) [ emit-simd-v>integer ] } +! { (simd-vpack-signed) [ emit-simd-vpack-signed ] } +! { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } +! { (simd-vunpack-head) [ emit-simd-vunpack-head ] } +! { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } +! { (simd-with) [ emit-simd-with ] } +! { (simd-gather-2) [ emit-simd-gather-2 ] } +! { (simd-gather-4) [ emit-simd-gather-4 ] } +! { (simd-select) [ emit-simd-select ] } +! { alien-vector [ emit-alien-vector ] } +! { set-alien-vector [ emit-set-alien-vector ] } +! } enable-intrinsics ; +! +! enable-simd diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1eac88598b..b71a34e938 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math -math.intervals math.vectors.simd ; +math.intervals math.vectors.simd math.vectors.simd.private ; IN: compiler.tree.propagation.simd { @@ -51,7 +51,6 @@ IN: compiler.tree.propagation.simd (simd-gather-2) (simd-gather-4) alien-vector - alien-vector-aligned } [ { byte-array } "default-output-classes" set-word-prop ] each : scalar-output-class ( rep -- class ) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 2fbe823965..d600b0bc24 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,6 +1,6 @@ USING: classes.tuple.private cpu.architecture help.markup help.syntax kernel.private math math.vectors -math.vectors.simd.intrinsics sequences ; +sequences ; IN: math.vectors.simd ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support" @@ -23,7 +23,7 @@ $nl $nl "SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." +"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." $nl "SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "." $nl @@ -36,26 +36,7 @@ $nl ARTICLE: "math.vectors.simd.types" "SIMD vector types" "Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." $nl -"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" -{ $subsections - POSTPONE: SIMD: - POSTPONE: SIMDS: -} -"The following scalar types are supported:" -{ $code - "char" - "uchar" - "short" - "ushort" - "int" - "uint" - "longlong" - "ulonglong" - "float" - "double" -} - -"The following vector types are generated from the above scalar types:" +"The following vector types are available:" { $code "char-16" "uchar-16" @@ -218,16 +199,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "math.vectors.simd.intrinsics" } ; -HELP: SIMD: -{ $syntax "SIMD: type" } -{ $values { "type" "a scalar C type" } } -{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ; - -HELP: SIMDS: -{ $syntax "SIMDS: type type type ... ;" } -{ $values { "type" "a scalar C type" } } -{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; - -{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words - ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 4953abb4ea..10305c673a 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,5 +1,9 @@ -! (c)2009 Slava Pestov, Joe Groff bsd license -USING: math.vectors math.vectors.private ; +USING: accessors alien.c-types byte-arrays classes combinators +cpu.architecture fry functors generalizations generic +generic.parser kernel lexer literals macros math math.functions +math.vectors math.vectors.private namespaces parser +prettyprint.custom quotations sequences sequences.private vocabs +vocabs.loader ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd @@ -8,8 +12,11 @@ DEFER: simd-with DEFER: simd-boa DEFER: simd-cast -> @ ] keep new-underlying ; inline +PRIVATE> +>> + + + : alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; + ;FUNCTOR SYNTAX: SIMD-128: - scan scan-word define-simd-128 ; + scan define-simd-128 ; -PRIVATE> >> +PRIVATE> + +>> SIMD-128: char-16 SIMD-128: uchar-16 @@ -176,16 +194,14 @@ SIMD-128: ulonglong-2 SIMD-128: float-4 SIMD-128: double-2 -ERROR: bad-simd-call word ; -ERROR: bad-simd-length got expected ; - : assert-positive ( x -- y ) ; ! SIMD vectors as sequences +M: simd-128 hashcode* underlying>> hashcode* ; inline M: simd-128 clone [ clone ] change-underlying ; inline M: simd-128 length simd-rep rep-length ; inline -M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline +M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline M: simd-128 c:byte-length drop 16 ; inline M: simd-128 new-sequence @@ -193,16 +209,13 @@ M: simd-128 new-sequence [ nip [ 16 (byte-array) ] make-underlying ] [ length bad-simd-length ] if ; inline -M: simd-128 equal? - [ v= vall? ] [ 2drop f ] if-vectors-match ; inline - M: simd-128 >pprint-sequence ; M: simd-128 pprint* pprint-object ; INSTANCE: simd-128 sequence ! Unboxers for SIMD operations - +<< v-op ( a quot: ( (a) rep -- (c) ) -- c ) [ simd-unbox ] dip 2curry make-underlying ; inline +: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c ) + [ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline + : simd-v->n-op ( a quot: ( (a) rep -- n ) -- n ) [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline @@ -241,7 +257,7 @@ INSTANCE: simd-128 sequence [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline : (simd-method-fallback) ( accum word -- accum ) - [ current-method get \ (call-next-method) [ ] 2sequence suffix! ] + [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ] dip suffix! ; SYNTAX: simd-vv->v-op @@ -252,6 +268,10 @@ SYNTAX: simd-vv->n-op \ (simd-vv->n-op) (simd-method-fallback) ; PRIVATE> +>> + +M: simd-128 equal? + [ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline ! SIMD constructors @@ -283,26 +303,26 @@ M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inl M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline -M: simd-128 sum [ (simd-sum) ] simd-vv->n-op ; inline +M: simd-128 sum [ (simd-sum) ] simd-v->n-op ; inline M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline -M: simd-128 vbitnot [ (simd-vbitnot) ] simd-vv->v-op ; inline +M: simd-128 vbitnot [ (simd-vbitnot) ] simd-v->v-op ; inline M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline -M: simd-128 vnot [ (simd-vnot) ] simd-vv->v-op ; inline +M: simd-128 vnot [ (simd-vnot) ] simd-v->v-op ; inline M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline -M: simd-128 vmerge-head [ (simd-vmerge-head) ] simd-vv->v-op ; inline -M: simd-128 vmerge-tail [ (simd-vmerge-tail) ] simd-vv->v-op ; inline +M: simd-128 (vmerge-head) [ (simd-vmerge-head) ] simd-vv->v-op ; inline +M: simd-128 (vmerge-tail) [ (simd-vmerge-tail) ] simd-vv->v-op ; inline M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline @@ -326,7 +346,6 @@ M: simd-128 v*n over simd-with v* ; inline M: simd-128 v/n over simd-with v/ ; inline M: simd-128 norm-sq dup v. assert-positive ; inline M: simd-128 norm norm-sq sqrt ; inline -M: simd-128 normalize dup norm v/n ; inline M: simd-128 distance v- norm ; inline ! misc