From 9184254f2cb29add8f4c68a6b3833cf9f8267efc Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sun, 1 Nov 2009 22:10:28 -0600 Subject: [PATCH 01/46] move define-inline-method from classes.struct.private to generic.parser --- basis/classes/struct/struct.factor | 3 --- core/generic/parser/parser.factor | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/basis/classes/struct/struct.factor b/basis/classes/struct/struct.factor index d5e5fdc6c3..f86f3c9d97 100755 --- a/basis/classes/struct/struct.factor +++ b/basis/classes/struct/struct.factor @@ -189,9 +189,6 @@ M: struct-c-type c-struct? drop t ; \ cleave [ ] 2sequence \ output>array [ ] 2sequence ; -: define-inline-method ( class generic quot -- ) - [ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ; - : (define-struct-slot-values-method) ( class -- ) [ \ struct-slot-values ] [ struct-slot-values-quot ] bi define-inline-method ; diff --git a/core/generic/parser/parser.factor b/core/generic/parser/parser.factor index ce048c41da..11fb2b5b42 100644 --- a/core/generic/parser/parser.factor +++ b/core/generic/parser/parser.factor @@ -13,6 +13,9 @@ ERROR: not-in-a-method-error ; : create-method-in ( class generic -- method ) create-method dup set-word dup save-location ; +: define-inline-method ( class generic quot -- ) + [ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ; + : CREATE-METHOD ( -- method ) scan-word bootstrap-word scan-word create-method-in ; From e0ba0c5539809c661625d29b2492e2c3c0feeb59 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 2 Nov 2009 14:21:19 -0600 Subject: [PATCH 02/46] genericize vector ops --- basis/math/vectors/vectors.factor | 265 +++++++++++++++++------------- core/sequences/sequences.factor | 3 +- 2 files changed, 157 insertions(+), 111 deletions(-) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 63564f064d..f3dfcda18a 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -6,29 +6,47 @@ byte-arrays accessors locals ; QUALIFIED-WITH: alien.c-types c IN: math.vectors -MIXIN: simd-128 -MIXIN: simd-256 +GENERIC: vneg ( u -- v ) +M: object vneg [ neg ] map ; -GENERIC: element-type ( obj -- c-type ) -M: object element-type drop f ; inline +GENERIC# v+n 1 ( u n -- v ) +M: object v+n [ + ] curry map ; -: vneg ( u -- v ) [ neg ] map ; +GENERIC: n+v ( n v -- w ) +M: object n+v [ + ] with map ; -: v+n ( u n -- v ) [ + ] curry map ; -: n+v ( n u -- v ) [ + ] with map ; -: v-n ( u n -- v ) [ - ] curry map ; -: n-v ( n u -- v ) [ - ] with map ; +GENERIC# v-n 1 ( u n -- w ) +M: object v-n [ - ] curry map ; -: v*n ( u n -- v ) [ * ] curry map ; -: n*v ( n u -- v ) [ * ] with map ; -: v/n ( u n -- v ) [ / ] curry map ; -: n/v ( n u -- v ) [ / ] with map ; +GENERIC: n-v ( n v -- w ) +M: object n-v [ - ] with map ; -: v+ ( u v -- w ) [ + ] 2map ; -: v- ( u v -- w ) [ - ] 2map ; -: [v-] ( u v -- w ) [ [-] ] 2map ; -: v* ( u v -- w ) [ * ] 2map ; -: v/ ( u v -- w ) [ / ] 2map ; +GENERIC# v*n 1 ( u n -- v ) +M: object v*n [ * ] curry map ; + +GENERIC: n*v ( n v -- w ) +M: object n*v [ * ] with map ; + +GENERIC# v/n 1 ( u n -- v ) +M: object v/n [ / ] curry map ; + +GENERIC: n/v ( n v -- w ) +M: object n/v [ / ] with map ; + +GENERIC: v+ ( u v -- w ) +M: object v+ [ + ] 2map ; + +GENERIC: v- ( u v -- w ) +M: object v- [ - ] 2map ; + +GENERIC: [v-] ( u v -- w ) +M: object [v-] [ [-] ] 2map ; + +GENERIC: v* ( u v -- w ) +M: object v* [ * ] 2map ; + +GENERIC: v/ ( u v -- w ) +M: object v/ [ / ] 2map ; -: vmax ( u v -- w ) [ [ float-max ] [ max ] if-both-floats ] 2map ; -: vmin ( u v -- w ) [ [ float-min ] [ min ] if-both-floats ] 2map ; +GENERIC: vmax ( u v -- w ) +M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ; -: v+- ( u v -- w ) +GENERIC: vmin ( u v -- w ) +M: object vmin [ [ float-min ] [ min ] if-both-floats ] 2map ; + +GENERIC: v+- ( u v -- w ) +M: object v+- [ t ] 2dip [ [ not ] 2dip pick [ + ] [ - ] if ] 2map nip ; - +GENERIC: vs* ( u v -- w ) +M: object vs* [ * ] 2map ; -: vs+ ( u v -- w ) [ + ] 2saturate-map ; -: vs- ( u v -- w ) [ - ] 2saturate-map ; -: vs* ( u v -- w ) [ * ] 2saturate-map ; +GENERIC: vabs ( u -- v ) +M: object vabs [ abs ] map ; -: vabs ( u -- v ) [ abs ] map ; -: vsqrt ( u -- v ) [ >float fsqrt ] map ; +GENERIC: vsqrt ( u -- v ) +M: object vsqrt [ >float fsqrt ] map ; bits ] bi@ ] dip call bits>double ] } - { c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] } - [ drop call ] - } case ; inline - -: fp-bitwise-unary ( x seq quot -- z ) - swap element-type { - { c:double [ [ double>bits ] dip call bits>double ] } - { c:float [ [ float>bits ] dip call bits>float ] } - [ drop call ] - } case ; inline - -: element>bool ( x seq -- ? ) - element-type [ [ f ] when-zero ] when ; inline - : bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline -GENERIC: new-underlying ( underlying seq -- seq' ) - -: change-underlying ( seq quot -- seq' ) - '[ underlying>> @ ] keep new-underlying ; inline - PRIVATE> -: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; -: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ; -: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ; -: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; -: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; +GENERIC: vbitand ( u v -- w ) +M: object vbitand [ bitand ] 2map ; +GENERIC: vbitandn ( u v -- w ) +M: object vbitandn [ bitandn ] 2map ; +GENERIC: vbitor ( u v -- w ) +M: object vbitor [ bitor ] 2map ; +GENERIC: vbitxor ( u v -- w ) +M: object vbitxor [ bitxor ] 2map ; +GENERIC: vbitnot ( u -- w ) +M: object vbitnot [ bitnot ] 2map ; -:: vbroadcast ( u n -- v ) u length n u nth u like ; +GENERIC# vbroadcast 1 ( u n -- v ) +M:: object vbroadcast ( u n -- v ) u length n u nth u like ; -: vshuffle-elements ( u perm -- v ) +GENERIC# vshuffle-elements 1 ( u perm -- v ) +M: object vshuffle-elements over length 0 pad-tail swap [ '[ _ nth ] ] keep map-as ; -: vshuffle-bytes ( u perm -- v ) +GENERIC# vshuffle-bytes 1 ( u perm -- v ) +M: object vshuffle-bytes underlying>> [ swap [ '[ 15 bitand _ nth ] ] keep map-as ] curry change-underlying ; @@ -107,43 +116,72 @@ PRIVATE> GENERIC: vshuffle ( u perm -- v ) M: array vshuffle ( u perm -- v ) vshuffle-elements ; inline -M: simd-128 vshuffle ( u perm -- v ) - vshuffle-bytes ; inline -: vlshift ( u n -- w ) '[ _ shift ] map ; -: vrshift ( u n -- w ) neg '[ _ shift ] map ; +GENERIC# vlshift 1 ( u n -- w ) +M: object vlshift '[ _ shift ] map ; +GENERIC# vrshift 1 ( u n -- w ) +M: object vrshift neg '[ _ shift ] map ; -: hlshift ( u n -- w ) '[ _ prepend 16 head ] change-underlying ; -: hrshift ( u n -- w ) '[ _ append 16 tail* ] change-underlying ; +GENERIC# hlshift 1 ( u n -- w ) +M: object hlshift '[ _ prepend 16 head ] change-underlying ; +GENERIC# hrshift 1 ( u n -- w ) +M: object hrshift '[ _ append 16 tail* ] change-underlying ; -: (vmerge-head) ( u v -- h ) - over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ; -: (vmerge-tail) ( u v -- t ) - over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ; +GENERIC: (vmerge-head) ( u v -- h ) +M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ; +GENERIC: (vmerge-tail) ( u v -- t ) +M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ; -: (vmerge) ( u v -- h t ) +GENERIC: (vmerge) ( u v -- h t ) [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline -: vmerge ( u v -- w ) [ zip ] keep concat-as ; +GENERIC: vmerge ( u v -- w ) +M: object vmerge [ zip ] keep concat-as ; -: vand ( u v -- w ) over '[ [ _ element>bool ] bi@ and ] 2map ; -: vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ; -: vor ( u v -- w ) over '[ [ _ element>bool ] bi@ or ] 2map ; -: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ; -: vnot ( u -- w ) dup '[ _ element>bool not ] map ; +GENERIC: vand ( u v -- w ) +M: object vand [ and ] 2map ; -: vall? ( v -- ? ) dup '[ _ element>bool ] all? ; -: vany? ( v -- ? ) dup '[ _ element>bool ] any? ; -: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ; +GENERIC: vandn ( u v -- w ) +M: object vandn [ [ not ] dip and ] 2map ; -: v< ( u v -- w ) [ < ] 2map ; -: v<= ( u v -- w ) [ <= ] 2map ; -: v>= ( u v -- w ) [ >= ] 2map ; -: v> ( u v -- w ) [ > ] 2map ; -: vunordered? ( u v -- w ) [ unordered? ] 2map ; -: v= ( u v -- w ) [ = ] 2map ; +GENERIC: vor ( u v -- w ) +M: object vor [ or ] 2map ; -: v? ( mask true false -- result ) +GENERIC: vxor ( u v -- w ) +M: object vxor [ xor ] 2map ; + +GENERIC: vnot ( u -- w ) +M: object vnot [ not ] map ; + +GENERIC: vall? ( v -- ? ) +M: object vall? [ ] all? ; + +GENERIC: vany? ( v -- ? ) +M: object vany? [ ] any? ; + +GENERIC: vnone? ( v -- ? ) +M: object vnone? [ not ] all? ; + +GENERIC: v< ( u v -- w ) +M: object v< [ < ] 2map ; + +GENERIC: v<= ( u v -- w ) +M: object v<= [ <= ] 2map ; + +GENERIC: v>= ( u v -- w ) +M: object v>= [ >= ] 2map ; + +GENERIC: v> ( u v -- w ) +M: object v> [ > ] 2map ; + +GENERIC: vunordered? ( u v -- w ) +M: object vunordered? [ unordered? ] 2map ; + +GENERIC: v= ( u v -- w ) +M: object v= [ = ] 2map ; + +GENERIC: v? ( mask true false -- result ) +M: object v? [ vand ] [ vandn ] bi-curry* bi vor ; inline :: vif ( mask true-quot false-quot -- result ) @@ -157,15 +195,22 @@ M: simd-128 vshuffle ( u perm -- v ) : vceiling ( u -- v ) [ ceiling ] map ; : vtruncate ( u -- v ) [ truncate ] map ; -: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; -: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; +: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; inline +: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; inline -: v. ( u v -- x ) [ conjugate * ] [ + ] 2map-reduce ; -: norm-sq ( v -- x ) [ absq ] [ + ] map-reduce ; -: norm ( v -- x ) norm-sq sqrt ; -: normalize ( u -- v ) dup norm v/n ; +GENERIC: v. ( u v -- x ) +M: object v. [ conjugate * ] [ + ] 2map-reduce ; -: distance ( u v -- x ) [ - absq ] [ + ] 2map-reduce sqrt ; +GENERIC: norm-sq ( v -- x ) +M: object norm-sq [ absq ] [ + ] map-reduce ; + +GENERIC: norm ( v -- x ) +M: object norm norm-sq sqrt ; + +: normalize ( u -- v ) dup norm v/n ; inline + +GENERIC: distance ( u v -- x ) +M: object distance [ - absq ] [ + ] 2map-reduce sqrt ; : set-axis ( u v axis -- w ) [ [ zero? 2over ? ] dip swap nth ] map-index 2nip ; @@ -198,27 +243,27 @@ PRIVATE> : v~ ( a b epsilon -- ? ) [ ~ ] curry 2all? ; inline -HINTS: vneg { array } ; -HINTS: norm-sq { array } ; -HINTS: norm { array } ; -HINTS: normalize { array } ; -HINTS: distance { array array } ; +HINTS: M\ object vneg { array } ; +HINTS: M\ object norm-sq { array } ; +HINTS: M\ object norm { array } ; +HINTS: M\ object distance { array array } ; -HINTS: n*v { object array } ; -HINTS: v*n { array object } ; -HINTS: n/v { array } ; -HINTS: v/n { array object } ; +HINTS: M\ object n*v { object array } ; +HINTS: M\ object v*n { array object } ; +HINTS: M\ object n/v { object array } ; +HINTS: M\ object v/n { array object } ; -HINTS: v+ { array array } ; -HINTS: v- { array array } ; -HINTS: v* { array array } ; -HINTS: v/ { array array } ; -HINTS: vmax { array array } ; -HINTS: vmin { array array } ; -HINTS: v. { array array } ; +HINTS: M\ object v+ { array array } ; +HINTS: M\ object v- { array array } ; +HINTS: M\ object v* { array array } ; +HINTS: M\ object v/ { array array } ; +HINTS: M\ object vmax { array array } ; +HINTS: M\ object vmin { array array } ; +HINTS: M\ object v. { array array } ; HINTS: vlerp { array array array } ; HINTS: vnlerp { array array object } ; HINTS: bilerp { object object object object array } ; HINTS: trilerp { object object object object object object object object array } ; + diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index 1bcedb1d15..e8c24a3b96 100755 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -929,7 +929,8 @@ PRIVATE> : trim ( seq quot -- newseq ) [ trim-slice ] [ drop ] 2bi like ; inline -: sum ( seq -- n ) 0 [ + ] binary-reduce ; +GENERIC: sum ( seq -- n ) +M: object sum 0 [ + ] binary-reduce ; inline : product ( seq -- n ) 1 [ * ] binary-reduce ; From 9cf3ab3da1afb5699919d3f5511df6bca17fbf19 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 2 Nov 2009 14:24:29 -0600 Subject: [PATCH 03/46] redo math.vectors.simd to use generics for specialization --- basis/math/vectors/simd/simd.factor | 353 +++++++++++++++++++++++++--- 1 file changed, 325 insertions(+), 28 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 388fed5f31..139060333c 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,42 +1,339 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types combinators fry kernel parser math math.parser -math.vectors.simd.functor sequences splitting vocabs.generated -vocabs.loader vocabs.parser words accessors vocabs compiler.units -definitions ; +! (c)2009 Slava Pestov, Joe Groff bsd license +USING: math.vectors math.vectors.private ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd -ERROR: bad-base-type type ; +DEFER: vconvert +DEFER: simd-with +DEFER: simd-boa +DEFER: simd-cast > "math.vectors.simd.instances." prepend ; +! Primitive SIMD constructors -: parse-base-type ( c-type -- c-type ) - dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } member-eq? - [ bad-base-type ] unless ; +GENERIC: new-underlying ( underlying seq -- seq' ) -: forget-instances ( -- ) - [ - "math.vectors.simd.instances" child-vocabs - [ forget-vocab ] each - ] with-compilation-unit ; +: make-underlying ( seq quot -- seq' ) + dip new-underlying ; inline +: change-underlying ( seq quot -- seq' ) + '[ underlying>> @ ] keep new-underlying ; inline + +! SIMD intrinsics + +: (simd-v+) ( a b rep -- c ) \ v+ bad-simd-call ; +: (simd-v-) ( a b rep -- c ) \ v- bad-simd-call ; +: (simd-vneg) ( a rep -- c ) \ vneg bad-simd-call ; +: (simd-v+-) ( a b rep -- c ) \ v+- bad-simd-call ; +: (simd-vs+) ( a b rep -- c ) \ vs+ bad-simd-call ; +: (simd-vs-) ( a b rep -- c ) \ vs- bad-simd-call ; +: (simd-vs*) ( a b rep -- c ) \ vs* bad-simd-call ; +: (simd-v*) ( a b rep -- c ) \ v* bad-simd-call ; +: (simd-v/) ( a b rep -- c ) \ v/ bad-simd-call ; +: (simd-vmin) ( a b rep -- c ) \ vmin bad-simd-call ; +: (simd-vmax) ( a b rep -- c ) \ vmax bad-simd-call ; +: (simd-v.) ( a b rep -- n ) \ v. bad-simd-call ; +: (simd-vsqrt) ( a rep -- c ) \ vsqrt bad-simd-call ; +: (simd-sum) ( a b rep -- n ) \ sum bad-simd-call ; +: (simd-vabs) ( a rep -- c ) \ vabs bad-simd-call ; +: (simd-vbitand) ( a b rep -- c ) \ vbitand bad-simd-call ; +: (simd-vbitandn) ( a b rep -- c ) \ vbitandn bad-simd-call ; +: (simd-vbitor) ( a b rep -- c ) \ vbitor bad-simd-call ; +: (simd-vbitxor) ( a b rep -- c ) \ vbitxor bad-simd-call ; +: (simd-vbitnot) ( a b rep -- c ) \ vbitnot bad-simd-call ; +: (simd-vand) ( a b rep -- c ) \ vand bad-simd-call ; +: (simd-vandn) ( a b rep -- c ) \ vandn bad-simd-call ; +: (simd-vor) ( a b rep -- c ) \ vor bad-simd-call ; +: (simd-vxor) ( a b rep -- c ) \ vxor bad-simd-call ; +: (simd-vnot) ( a b rep -- c ) \ vnot bad-simd-call ; +: (simd-vlshift) ( a n rep -- c ) \ vlshift bad-simd-call ; +: (simd-vrshift) ( a n rep -- c ) \ vrshift bad-simd-call ; +: (simd-hlshift) ( a n rep -- c ) \ hlshift bad-simd-call ; +: (simd-hrshift) ( a n rep -- c ) \ hrshift bad-simd-call ; +: (simd-vshuffle-elements) ( a n rep -- c ) \ vshuffle-elements bad-simd-call ; +: (simd-vshuffle-bytes) ( a b rep -- c ) \ vshuffle-bytes bad-simd-call ; +: (simd-vmerge-head) ( a b rep -- c ) \ (vmerge-head) bad-simd-call ; +: (simd-vmerge-tail) ( a b rep -- c ) \ (vmerge-tail) bad-simd-call ; +: (simd-v<=) ( a b rep -- c ) \ v<= bad-simd-call ; +: (simd-v<) ( a b rep -- c ) \ v< bad-simd-call ; +: (simd-v=) ( a b rep -- c ) \ v= bad-simd-call ; +: (simd-v>) ( a b rep -- c ) \ v> bad-simd-call ; +: (simd-v>=) ( a b rep -- c ) \ v>= bad-simd-call ; +: (simd-vunordered?) ( a b rep -- c ) \ vunordered? bad-simd-call ; +: (simd-vany?) ( a rep -- ? ) \ vany? bad-simd-call ; +: (simd-vall?) ( a rep -- ? ) \ vall? bad-simd-call ; +: (simd-vnone?) ( a rep -- ? ) \ vnone? bad-simd-call ; +: (simd-v>float) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-v>integer) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-vpack-signed) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-vpack-unsigned) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-vunpack-head) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-vunpack-tail) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-with) ( n rep -- v ) \ simd-with bad-simd-call ; +: (simd-gather-2) ( m n rep -- v ) \ simd-boa bad-simd-call ; +: (simd-gather-4) ( m n o p rep -- v ) \ simd-boa bad-simd-call ; +: (simd-select) ( a n rep -- n ) \ nth bad-simd-call ; + +: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; +: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; + +: alien-vector-aligned ( c-ptr n rep -- value ) \ alien-vector-aligned bad-simd-call ; +: set-alien-vector-aligned ( c-ptr n rep -- value ) \ set-alien-vector-aligned bad-simd-call ; + +! Helper for boolean vector literals + +: vector-true-value ( class -- value ) + { c:float c:double } member? [ -1 bits>double ] [ -1 ] if ; foldable + +: vector-false-value ( type -- value ) + { c:float c:double } member? [ 0.0 ] [ 0 ] if ; foldable + +: boolean>element ( bool/elt type -- elt ) + swap { + { t [ vector-true-value ] } + { f [ vector-false-value ] } + [ nip ] + } case ; inline PRIVATE> -: define-simd-vocab ( type -- vocab ) - parse-base-type - [ simd-vocab ] keep '[ - _ - [ define-simd-128 ] - [ define-simd-256 ] bi - ] generate-vocab ; +! SIMD base type -SYNTAX: SIMD: - scan-word define-simd-vocab use-vocab ; +TUPLE: simd-128 + { underlying byte-array read-only initial: $[ 16 ] } ; -SYNTAX: SIMDS: - \ ; parse-until [ define-simd-vocab use-vocab ] each ; +GENERIC: simd-element-type ( obj -- c-type ) +GENERIC: simd-rep ( simd -- rep ) + +: rep-length ( rep -- n ) + 16 swap rep-component-type heap-size /i ; foldable + +<< A DEFINES >${T} +A-boa DEFINES ${T}-boa +A-with DEFINES ${T}-with +A-cast DEFINES ${T}-cast +A{ DEFINES ${T}{ + +ELT [ A-rep rep-component-type ] +N [ A-rep rep-length ] + +SET-NTH [ ELT dup c:c-setter c:array-accessor ] + +WHERE + +TUPLE: A < simd-128 ; + +M: A new-underlying drop \ A boa ; inline +M: A simd-rep drop A-rep ; inline +M: A simd-element-type drop ELT ; inline +M: A length drop N ; inline + +M: A set-nth-unsafe + [ ELT boolean>element ] 2dip + underlying>> SET-NTH call ; inline + +: >A ( seq -- simd ) \ A new clone-like ; inline + +M: A like drop dup \ A instance? [ >A ] unless ; inline + +: A-with ( n -- v ) \ A new simd-with ; inline +: A-cast ( v -- v' ) \ A new simd-cast ; inline +: A-boa ( ...n -- v ) \ A new simd-boa ; inline + +M: A pprint-delims drop \ A{ \ } ; +SYNTAX: A{ \ } [ >A ] parse-literal ; + +c: + byte-array >>class + A >>boxed-class + [ A-rep alien-vector \ A boa ] >>getter + [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter + 16 >>size + 16 >>align + A-rep >>rep +\ A c:typedef + +;FUNCTOR + +SYNTAX: SIMD-128: + scan scan-word define-simd-128 ; + +PRIVATE> >> + +SIMD-128: char-16 +SIMD-128: uchar-16 +SIMD-128: short-8 +SIMD-128: ushort-8 +SIMD-128: int-4 +SIMD-128: uint-4 +SIMD-128: longlong-2 +SIMD-128: ulonglong-2 +SIMD-128: float-4 +SIMD-128: double-2 + +ERROR: bad-simd-call word ; +ERROR: bad-simd-length got expected ; + +: assert-positive ( x -- y ) ; + +! SIMD vectors as sequences + +M: simd-128 clone [ clone ] change-underlying ; inline +M: simd-128 length simd-rep rep-length ; inline +M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline +M: simd-128 c:byte-length drop 16 ; inline + +M: simd-128 new-sequence + 2dup length = + [ nip [ 16 (byte-array) ] make-underlying ] + [ length bad-simd-length ] if ; inline + +M: simd-128 equal? + [ v= vall? ] [ 2drop f ] if-vectors-match ; inline + +M: simd-128 >pprint-sequence ; +M: simd-128 pprint* pprint-object ; + +INSTANCE: simd-128 sequence + +! Unboxers for SIMD operations + +> ] [ simd-rep ] tri ; inline + +: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c ) + [ simd-unbox ] dip 2curry make-underlying ; inline + +: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n ) + [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline + +: ((simd-vv->v-op)) ( a b quot: ( (a) (b) rep -- (c) ) -- c ) + [ simd-unbox ] [ underlying>> swap ] [ 3curry ] tri* make-underlying ; inline + +: ((simd-vv->n-op)) ( a b quot: ( (a) (b) rep -- n ) -- n ) + [ [ underlying>> ] [ simd-rep ] bi ] + [ underlying>> swap ] [ ] tri* call ; inline + +: (simd-vv->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) + [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors-match ; inline + +: (simd-vv'->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) + [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors ; inline + +: (simd-vv->n-op) ( a b quot: ( (a) (b) rep -- n ) fallback-quot -- n ) + [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline + +: (simd-method-fallback) ( accum word -- accum ) + [ current-method get \ (call-next-method) [ ] 2sequence suffix! ] + dip suffix! ; + +SYNTAX: simd-vv->v-op + \ (simd-vv->v-op) (simd-method-fallback) ; +SYNTAX: simd-vv'->v-op + \ (simd-vv'->v-op) (simd-method-fallback) ; +SYNTAX: simd-vv->n-op + \ (simd-vv->n-op) (simd-method-fallback) ; + +PRIVATE> + +! SIMD constructors + +: simd-with ( n seq -- v ) + [ (simd-with) ] simd-construct-op ; inline + +MACRO: simd-boa ( seq -- ) + dup length { + { 2 [ '[ _ dup [ (simd-gather-2) ] simd-construct-op ] ] } + { 4 [ '[ _ dup [ (simd-gather-4) ] simd-construct-op ] ] } + [ '[ _ _ nsequence ] ] + } case ; + +: simd-cast ( v seq -- v' ) + [ underlying>> ] dip new-underlying ; inline + +! SIMD primitive operations + +M: simd-128 v+ [ (simd-v+) ] simd-vv->v-op ; inline +M: simd-128 v- [ (simd-v-) ] simd-vv->v-op ; inline +M: simd-128 vneg [ (simd-vneg) ] simd-v->v-op ; inline +M: simd-128 v+- [ (simd-v+-) ] simd-vv->v-op ; inline +M: simd-128 vs+ [ (simd-vs+) ] simd-vv->v-op ; inline +M: simd-128 vs- [ (simd-vs-) ] simd-vv->v-op ; inline +M: simd-128 vs* [ (simd-vs*) ] simd-vv->v-op ; inline +M: simd-128 v* [ (simd-v*) ] simd-vv->v-op ; inline +M: simd-128 v/ [ (simd-v/) ] simd-vv->v-op ; inline +M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inline +M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline +M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline +M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline +M: simd-128 sum [ (simd-sum) ] simd-vv->n-op ; inline +M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline +M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline +M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline +M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline +M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline +M: simd-128 vbitnot [ (simd-vbitnot) ] simd-vv->v-op ; inline +M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline +M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline +M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline +M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline +M: simd-128 vnot [ (simd-vnot) ] simd-vv->v-op ; inline +M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline +M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline +M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline +M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline +M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline +M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline +M: simd-128 vmerge-head [ (simd-vmerge-head) ] simd-vv->v-op ; inline +M: simd-128 vmerge-tail [ (simd-vmerge-tail) ] simd-vv->v-op ; inline +M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline +M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline +M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline +M: simd-128 v> [ (simd-v>) ] simd-vv->v-op ; inline +M: simd-128 v>= [ (simd-v>=) ] simd-vv->v-op ; inline +M: simd-128 vunordered? [ (simd-vunordered?) ] simd-vv->v-op ; inline +M: simd-128 vany? [ (simd-vany?) ] simd-v->n-op ; inline +M: simd-128 vall? [ (simd-vall?) ] simd-v->n-op ; inline +M: simd-128 vnone? [ (simd-vnone?) ] simd-v->n-op ; inline + +! SIMD high-level specializations + +M: simd-128 vbroadcast [ swap nth ] keep simd-with ; inline +M: simd-128 n+v [ simd-with ] keep v+ ; inline +M: simd-128 n-v [ simd-with ] keep v- ; inline +M: simd-128 n*v [ simd-with ] keep v* ; inline +M: simd-128 n/v [ simd-with ] keep v/ ; inline +M: simd-128 v+n over simd-with v+ +M: simd-128 v-n over simd-with v- ; inline +M: simd-128 v*n over simd-with v* ; inline +M: simd-128 v/n over simd-with v/ ; inline +M: simd-128 norm-sq dup v. assert-positive ; inline +M: simd-128 norm norm-sq sqrt ; inline +M: simd-128 normalize dup norm v/n ; inline +M: simd-128 distance v- norm ; inline + +! misc + +M: simd-128 vshuffle ( u perm -- v ) + vshuffle-bytes ; inline From 73d2a756440f41a0913781afd15673937b739fae Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 2 Nov 2009 15:00:39 -0600 Subject: [PATCH 04/46] remove math.vectors .specialization, .simd.functor, .simd.intrinsics --- basis/math/vectors/simd/functor/authors.txt | 1 - .../math/vectors/simd/functor/functor.factor | 522 ------------------ .../math/vectors/simd/intrinsics/authors.txt | 1 - .../simd/intrinsics/intrinsics-tests.factor | 18 - .../vectors/simd/intrinsics/intrinsics.factor | 207 ------- .../specialization-tests.factor | 28 - .../specialization/specialization.factor | 207 ------- 7 files changed, 984 deletions(-) delete mode 100644 basis/math/vectors/simd/functor/authors.txt delete mode 100644 basis/math/vectors/simd/functor/functor.factor delete mode 100644 basis/math/vectors/simd/intrinsics/authors.txt delete mode 100644 basis/math/vectors/simd/intrinsics/intrinsics-tests.factor delete mode 100644 basis/math/vectors/simd/intrinsics/intrinsics.factor delete mode 100644 basis/math/vectors/specialization/specialization-tests.factor delete mode 100644 basis/math/vectors/specialization/specialization.factor diff --git a/basis/math/vectors/simd/functor/authors.txt b/basis/math/vectors/simd/functor/authors.txt deleted file mode 100644 index d4f5d6b3ae..0000000000 --- a/basis/math/vectors/simd/functor/authors.txt +++ /dev/null @@ -1 +0,0 @@ -Slava Pestov \ No newline at end of file diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor deleted file mode 100644 index 480981d165..0000000000 --- a/basis/math/vectors/simd/functor/functor.factor +++ /dev/null @@ -1,522 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs byte-arrays classes classes.algebra effects fry -functors generalizations kernel literals locals math math.functions -math.vectors math.vectors.private math.vectors.simd.intrinsics -math.vectors.conversion.backend -math.vectors.specialization parser prettyprint.custom sequences -sequences.private strings words definitions macros cpu.architecture -namespaces arrays quotations combinators combinators.short-circuit sets -layouts ; -QUALIFIED-WITH: alien.c-types c -QUALIFIED: math.private -IN: math.vectors.simd.functor - -ERROR: bad-length got expected ; - -: vector-true-value ( class -- value ) - { - { [ dup integer class<= ] [ drop -1 ] } - { [ dup float class<= ] [ drop -1 bits>double ] } - } cond ; foldable - -: vector-false-value ( class -- value ) - { - { [ dup integer class<= ] [ drop 0 ] } - { [ dup float class<= ] [ drop 0.0 ] } - } cond ; foldable - -: boolean>element ( bool/elt class -- elt ) - swap { - { t [ vector-true-value ] } - { f [ vector-false-value ] } - [ nip ] - } case ; inline - -MACRO: simd-boa ( rep class -- simd-array ) - [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; - -: can-be-unboxed? ( type -- ? ) - { - { c:float [ \ math.private:float+ "intrinsic" word-prop ] } - { c:double [ \ math.private:float+ "intrinsic" word-prop ] } - [ c:heap-size cell < ] - } case ; - -: simd-boa-fast? ( rep -- ? ) - [ dup rep-gather-word supported-simd-op? ] - [ rep-component-type can-be-unboxed? ] - bi and ; - -:: define-boa-custom-inlining ( word rep class -- ) - word [ - drop - rep simd-boa-fast? [ - [ rep (simd-boa) class boa ] - ] [ word def>> ] if - ] "custom-inlining" set-word-prop ; - -: simd-with ( rep class x -- simd-array ) - [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline - -: simd-with/nth-fast? ( rep -- ? ) - [ \ (simd-vshuffle-elements) supported-simd-op? ] - [ rep-component-type can-be-unboxed? ] - bi and ; - -:: define-with-custom-inlining ( word rep class -- ) - word [ - drop - rep simd-with/nth-fast? [ - [ rep rep-coerce rep (simd-with) class boa ] - ] [ word def>> ] if - ] "custom-inlining" set-word-prop ; - -: simd-nth-fast ( rep -- quot ) - [ rep-components ] keep - '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index - '[ swap >fixnum _ case ] ; - -: simd-nth-slow ( rep -- quot ) - rep-component-type dup c:c-type-getter-boxer c:array-accessor ; - -MACRO: simd-nth ( rep -- x ) - dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ; - -: boa-effect ( rep n -- effect ) - [ rep-components ] dip * - [ CHAR: a + 1string ] map - { "simd-vector" } ; - -: supported-simd-ops ( assoc rep -- assoc' ) - [ simd-ops get ] dip - '[ nip _ swap supported-simd-op? ] assoc-filter - '[ drop _ key? ] assoc-filter ; - -ERROR: bad-schema op schema ; - -:: op-wrapper ( op specials schemas -- wrapper ) - op { - [ specials at ] - [ word-schema schemas at ] - [ dup word-schema bad-schema ] - } 1|| ; - -: low-level-ops ( simd-ops specials schemas -- alist ) - '[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ; - -:: high-level-ops ( ctor elt-class -- assoc ) - ! Some SIMD operations are defined in terms of others. - { - { vbroadcast [ swap nth ctor execute ] } - { n+v [ [ ctor execute ] dip v+ ] } - { v+n [ ctor execute v+ ] } - { n-v [ [ ctor execute ] dip v- ] } - { v-n [ ctor execute v- ] } - { n*v [ [ ctor execute ] dip v* ] } - { v*n [ ctor execute v* ] } - { n/v [ [ ctor execute ] dip v/ ] } - { v/n [ ctor execute v/ ] } - { norm-sq [ dup v. assert-positive ] } - { norm [ norm-sq sqrt ] } - { normalize [ dup norm v/n ] } - } - ! To compute dot product and distance with integer vectors, we - ! have to do things less efficiently, with integer overflow checks, - ! in the general case. - elt-class float = [ { distance [ v- norm ] } suffix ] when ; - -TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ; - -: define-simd ( simd -- ) - dup rep>> rep-component-type c:c-type-boxed-class >>elt-class - { - [ class>> ] - [ elt-class>> ] - [ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ] - [ rep>> supported-simd-ops ] - [ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ] - } cleave - specialize-vector-words ; - -:: define-simd-128-type ( class rep -- ) - c: - byte-array >>class - class >>boxed-class - [ rep alien-vector class boa ] >>getter - [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter - 16 >>size - 8 >>align - rep >>rep - class c:typedef ; - -: (define-simd-128) ( simd -- ) - simd-ops get >>ops - [ define-simd ] - [ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ; - -FUNCTOR: define-simd-128 ( T -- ) - -N [ 16 T c:heap-size /i ] - -A DEFINES-CLASS ${T}-${N} -A-boa DEFINES ${A}-boa -A-with DEFINES ${A}-with -A-cast DEFINES ${A}-cast ->A DEFINES >${A} -A{ DEFINES ${A}{ - -SET-NTH [ T dup c:c-setter c:array-accessor ] - -A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] -A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op -A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op -A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op -A-v->v-op DEFINES-PRIVATE ${A}-v->v-op -A-v->n-op DEFINES-PRIVATE ${A}-v->n-op -A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op -A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op - -A-element-class [ A-rep rep-component-type c:c-type-boxed-class ] - -WHERE - -TUPLE: A -{ underlying byte-array read-only initial: $[ 16 ] } ; - -INSTANCE: A simd-128 - -M: A clone underlying>> clone \ A boa ; inline - -M: A length drop N ; inline - -M: A equal? - over \ A instance? [ v= vall? ] [ 2drop f ] if ; - -M: A nth-unsafe underlying>> A-rep simd-nth ; inline - -M: A set-nth-unsafe - [ A-element-class boolean>element ] 2dip - underlying>> SET-NTH call ; inline - -: >A ( seq -- simd-array ) \ A new clone-like ; - -M: A like drop dup \ A instance? [ >A ] unless ; inline - -M: A new-underlying drop \ A boa ; inline - -M: A new-sequence - drop dup N = - [ drop 16 \ A boa ] - [ N bad-length ] - if ; inline - -M: A c:byte-length underlying>> length ; inline - -M: A element-type drop A-rep rep-component-type ; - -M: A pprint-delims drop \ A{ \ } ; - -M: A >pprint-sequence ; - -M: A pprint* pprint-object ; - -SYNTAX: A{ \ } [ >A ] parse-literal ; - -: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ; - -\ A-with \ A-rep \ A define-with-custom-inlining - -\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared - -\ A-rep rep-gather-word [ - \ A-boa \ A-rep \ A define-boa-custom-inlining -] when - -: A-cast ( simd-array -- simd-array' ) - underlying>> \ A boa ; inline - -INSTANCE: A sequence - -v-op ( v1 v2 quot -- v3 ) - [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline - -: A-vn->v-op ( v1 v2 quot -- v3 ) - [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline - -: A-vv->n-op ( v1 v2 quot -- n ) - [ [ underlying>> ] bi@ A-rep ] dip call ; inline - -: A-v->v-op ( v1 quot -- v2 ) - [ underlying>> A-rep ] dip call \ A boa ; inline - -: A-v->n-op ( v quot -- n ) - [ underlying>> A-rep ] dip call ; inline - -: A-v-conversion-op ( v1 to-type quot -- v2 ) - swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline - -: A-vv-conversion-op ( v1 v2 to-type quot -- v2 ) - swap { - [ underlying>> ] - [ underlying>> A-rep ] - [ call ] - [ '[ _ boa ] call( u -- v ) ] - } spread ; inline - -simd new - \ A >>class - \ A-with >>ctor - \ A-rep >>rep - { - { (v>float) A-v-conversion-op } - { (v>integer) A-v-conversion-op } - { (vpack-signed) A-vv-conversion-op } - { (vpack-unsigned) A-vv-conversion-op } - { (vunpack-head) A-v-conversion-op } - { (vunpack-tail) A-v-conversion-op } - } >>special-wrappers - { - { { +vector+ +vector+ -> +vector+ } A-vv->v-op } - { { +vector+ +any-vector+ -> +vector+ } A-vv->v-op } - { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } - { { +vector+ +literal+ -> +vector+ } A-vn->v-op } - { { +vector+ +vector+ -> +scalar+ } A-vv->n-op } - { { +vector+ +vector+ -> +boolean+ } A-vv->n-op } - { { +vector+ -> +vector+ } A-v->v-op } - { { +vector+ -> +scalar+ } A-v->n-op } - { { +vector+ -> +boolean+ } A-v->n-op } - { { +vector+ -> +nonnegative+ } A-v->n-op } - } >>schema-wrappers -(define-simd-128) - -PRIVATE> - -;FUNCTOR - -! Synthesize 256-bit vectors from a pair of 128-bit vectors -SLOT: underlying1 -SLOT: underlying2 - -:: define-simd-256-type ( class rep -- ) - c: - class >>class - class >>boxed-class - [ - [ rep alien-vector ] - [ 16 + >fixnum rep alien-vector ] 2bi - class boa - ] >>getter - [ - [ [ underlying1>> ] 2dip rep set-alien-vector ] - [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] - 3bi - ] >>setter - 32 >>size - 8 >>align - rep >>rep - class c:typedef ; - -: (define-simd-256) ( simd -- ) - simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops - [ define-simd ] - [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ; - -FUNCTOR: define-simd-256 ( T -- ) - -N [ 32 T c:heap-size /i ] - -N/2 [ N 2 /i ] -A/2 IS ${T}-${N/2} -A/2-boa IS ${A/2}-boa -A/2-with IS ${A/2}-with - -A DEFINES-CLASS ${T}-${N} -A-boa DEFINES ${A}-boa -A-with DEFINES ${A}-with -A-cast DEFINES ${A}-cast ->A DEFINES >${A} -A{ DEFINES ${A}{ - -A-deref DEFINES-PRIVATE ${A}-deref - -A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] -A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op -A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op -A-v->v-op DEFINES-PRIVATE ${A}-v->v-op -A-v.-op DEFINES-PRIVATE ${A}-v.-op -(A-v->n-op) DEFINES-PRIVATE (${A}-v->v-op) -A-sum-op DEFINES-PRIVATE ${A}-sum-op -A-vany-op DEFINES-PRIVATE ${A}-vany-op -A-vall-op DEFINES-PRIVATE ${A}-vall-op -A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op -A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op -A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op -A-vpack-op DEFINES-PRIVATE ${A}-vpack-op -A-vunpack-head-op DEFINES-PRIVATE ${A}-vunpack-head-op -A-vunpack-tail-op DEFINES-PRIVATE ${A}-vunpack-tail-op - -WHERE - -SLOT: underlying1 -SLOT: underlying2 - -TUPLE: A -{ underlying1 byte-array initial: $[ 16 ] read-only } -{ underlying2 byte-array initial: $[ 16 ] read-only } ; - -INSTANCE: A simd-256 - -M: A clone - [ underlying1>> clone ] [ underlying2>> clone ] bi - \ A boa ; inline - -M: A length drop N ; inline - -M: A equal? - over \ A instance? [ v= vall? ] [ 2drop f ] if ; - -: A-deref ( n seq -- n' seq' ) - over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline - -M: A nth-unsafe A-deref nth-unsafe ; inline - -M: A set-nth-unsafe A-deref set-nth-unsafe ; inline - -: >A ( seq -- simd-array ) \ A new clone-like ; - -M: A like drop dup \ A instance? [ >A ] unless ; inline - -M: A new-sequence - drop dup N = - [ drop 16 16 \ A boa ] - [ N bad-length ] - if ; inline - -M: A c:byte-length drop 32 ; inline - -M: A element-type drop A-rep rep-component-type ; - -SYNTAX: A{ \ } [ >A ] parse-literal ; - -M: A pprint-delims drop \ A{ \ } ; - -M: A >pprint-sequence ; - -M: A pprint* pprint-object ; - -: A-with ( x -- simd-array ) - [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@ - \ A boa ; inline - -: A-boa ( ... -- simd-array ) - [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ - \ A boa ; inline - -\ A-rep 2 boa-effect \ A-boa set-stack-effect - -: A-cast ( simd-array -- simd-array' ) - [ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline - -INSTANCE: A sequence - -: A-vv->v-op ( v1 v2 quot -- v3 ) - [ [ [ underlying1>> ] bi@ A-rep ] dip call ] - [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi - \ A boa ; inline - -: A-vn->v-op ( v1 v2 quot -- v3 ) - [ [ [ underlying1>> ] dip A-rep ] dip call ] - [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi - \ A boa ; inline - -: A-v->v-op ( v1 combine-quot -- v2 ) - [ [ underlying1>> A-rep ] dip call ] - [ [ underlying2>> A-rep ] dip call ] 2bi - \ A boa ; inline - -: A-v.-op ( v1 v2 quot -- n ) - [ [ [ underlying1>> ] bi@ A-rep ] dip call ] - [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi - + ; inline - -: (A-v->n-op) ( v1 quot reduce-quot -- n ) - '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline - -: A-sum-op ( v1 quot -- n ) - [ (simd-v+) ] (A-v->n-op) ; inline - -: A-vany-op ( v1 quot -- n ) - [ (simd-vbitor) ] (A-v->n-op) ; inline -: A-vall-op ( v1 quot -- n ) - [ (simd-vbitand) ] (A-v->n-op) ; inline - -: A-vmerge-head-op ( v1 v2 quot -- v ) - drop - [ underlying1>> ] bi@ - [ A-rep (simd-(vmerge-head)) ] - [ A-rep (simd-(vmerge-tail)) ] 2bi - \ A boa ; inline - -: A-vmerge-tail-op ( v1 v2 quot -- v ) - drop - [ underlying2>> ] bi@ - [ A-rep (simd-(vmerge-head)) ] - [ A-rep (simd-(vmerge-tail)) ] 2bi - \ A boa ; inline - -: A-v-conversion-op ( v1 to-type quot -- v ) - swap [ - [ [ underlying1>> A-rep ] dip call ] - [ [ underlying2>> A-rep ] dip call ] 2bi - ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline - -: A-vpack-op ( v1 v2 to-type quot -- v ) - swap [ - '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi* - ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline - -: A-vunpack-head-op ( v1 to-type quot -- v ) - '[ - underlying1>> - [ A-rep @ ] - [ A-rep (simd-(vunpack-tail)) ] bi - ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline - -: A-vunpack-tail-op ( v1 to-type quot -- v ) - '[ - underlying2>> - [ A-rep (simd-(vunpack-head)) ] - [ A-rep @ ] bi - ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline - -simd new - \ A >>class - \ A-with >>ctor - \ A-rep >>rep - { - { v. A-v.-op } - { sum A-sum-op } - { vnone? A-vany-op } - { vany? A-vany-op } - { vall? A-vall-op } - { (vmerge-head) A-vmerge-head-op } - { (vmerge-tail) A-vmerge-tail-op } - { (v>integer) A-v-conversion-op } - { (v>float) A-v-conversion-op } - { (vpack-signed) A-vpack-op } - { (vpack-unsigned) A-vpack-op } - { (vunpack-head) A-vunpack-head-op } - { (vunpack-tail) A-vunpack-tail-op } - } >>special-wrappers - { - { { +vector+ +vector+ -> +vector+ } A-vv->v-op } - { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } - { { +vector+ +literal+ -> +vector+ } A-vn->v-op } - { { +vector+ -> +vector+ } A-v->v-op } - } >>schema-wrappers -(define-simd-256) - -;FUNCTOR diff --git a/basis/math/vectors/simd/intrinsics/authors.txt b/basis/math/vectors/simd/intrinsics/authors.txt deleted file mode 100644 index d4f5d6b3ae..0000000000 --- a/basis/math/vectors/simd/intrinsics/authors.txt +++ /dev/null @@ -1 +0,0 @@ -Slava Pestov \ No newline at end of file diff --git a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor deleted file mode 100644 index 84eee935a0..0000000000 --- a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor +++ /dev/null @@ -1,18 +0,0 @@ -IN: math.vectors.simd.intrinsics.tests -USING: math.vectors.simd.intrinsics cpu.architecture tools.test ; - -[ 16 ] [ uchar-16-rep rep-components ] unit-test -[ 16 ] [ char-16-rep rep-components ] unit-test -[ 8 ] [ ushort-8-rep rep-components ] unit-test -[ 8 ] [ short-8-rep rep-components ] unit-test -[ 4 ] [ uint-4-rep rep-components ] unit-test -[ 4 ] [ int-4-rep rep-components ] unit-test -[ 4 ] [ float-4-rep rep-components ] unit-test -[ 2 ] [ double-2-rep rep-components ] unit-test - -{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as -{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as -{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as -{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as - - diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor deleted file mode 100644 index 003b42fe83..0000000000 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ /dev/null @@ -1,207 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: alien alien.c-types alien.data assocs combinators -cpu.architecture compiler.cfg.comparisons fry generalizations -kernel libc macros math -math.vectors.conversion.backend -sequences sets effects accessors namespaces -lexer parser vocabs.parser words arrays math.vectors ; -IN: math.vectors.simd.intrinsics - -ERROR: bad-simd-call word ; - -<< - -: simd-effect ( word -- effect ) - stack-effect [ in>> "rep" suffix ] [ out>> ] bi ; -: simd-conversion-effect ( word -- effect ) - stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi ; - -SYMBOL: simd-ops - -V{ } clone simd-ops set-global - -: (SIMD-OP:) ( accum quot -- accum ) - [ - scan-word dup name>> "(simd-" ")" surround create-in - [ nip dup '[ _ bad-simd-call ] define ] - ] dip - '[ _ dip set-stack-effect ] - [ 2array simd-ops get push ] - 2tri ; inline - -SYNTAX: SIMD-OP: - [ simd-effect ] (SIMD-OP:) ; - -SYNTAX: SIMD-CONVERSION-OP: - [ simd-conversion-effect ] (SIMD-OP:) ; - ->> - -SIMD-OP: v+ -SIMD-OP: v- -SIMD-OP: vneg -SIMD-OP: v+- -SIMD-OP: vs+ -SIMD-OP: vs- -SIMD-OP: vs* -SIMD-OP: v* -SIMD-OP: v/ -SIMD-OP: vmin -SIMD-OP: vmax -SIMD-OP: v. -SIMD-OP: vsqrt -SIMD-OP: sum -SIMD-OP: vabs -SIMD-OP: vbitand -SIMD-OP: vbitandn -SIMD-OP: vbitor -SIMD-OP: vbitxor -SIMD-OP: vbitnot -SIMD-OP: vand -SIMD-OP: vandn -SIMD-OP: vor -SIMD-OP: vxor -SIMD-OP: vnot -SIMD-OP: vlshift -SIMD-OP: vrshift -SIMD-OP: hlshift -SIMD-OP: hrshift -SIMD-OP: vshuffle-elements -SIMD-OP: vshuffle-bytes -SIMD-OP: (vmerge-head) -SIMD-OP: (vmerge-tail) -SIMD-OP: v<= -SIMD-OP: v< -SIMD-OP: v= -SIMD-OP: v> -SIMD-OP: v>= -SIMD-OP: vunordered? -SIMD-OP: vany? -SIMD-OP: vall? -SIMD-OP: vnone? - -SIMD-CONVERSION-OP: (v>float) -SIMD-CONVERSION-OP: (v>integer) -SIMD-CONVERSION-OP: (vpack-signed) -SIMD-CONVERSION-OP: (vpack-unsigned) -SIMD-CONVERSION-OP: (vunpack-head) -SIMD-CONVERSION-OP: (vunpack-tail) - -: (simd-with) ( x rep -- v ) bad-simd-call ; -: (simd-gather-2) ( a b rep -- v ) bad-simd-call ; -: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; -: (simd-select) ( v n rep -- x ) bad-simd-call ; - -: assert-positive ( x -- y ) ; - -: alien-vector ( c-ptr n rep -- value ) - ! Inefficient version for when intrinsics are missing - [ swap ] dip rep-size memory>byte-array ; - -: set-alien-vector ( value c-ptr n rep -- ) - ! Inefficient version for when intrinsics are missing - [ swap swap ] dip rep-size memcpy ; - -<< - -: rep-components ( rep -- n ) - 16 swap rep-component-type heap-size /i ; foldable - -: rep-coercer ( rep -- quot ) - { - { [ dup int-vector-rep? ] [ [ >fixnum ] ] } - { [ dup float-vector-rep? ] [ [ >float ] ] } - } cond nip ; foldable - -: rep-coerce ( value rep -- value' ) - rep-coercer call( value -- value' ) ; inline - -CONSTANT: rep-gather-words - { - { 2 (simd-gather-2) } - { 4 (simd-gather-4) } - } - -: rep-gather-word ( rep -- word ) - rep-components rep-gather-words at ; - ->> - -MACRO: (simd-boa) ( rep -- quot ) - { - [ rep-coercer ] - [ rep-components ] - [ ] - [ rep-gather-word ] - } cleave - '[ _ _ napply _ _ execute ] ; - -GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) - -: (%unpack-reps) ( -- reps ) - %merge-vector-reps [ int-vector-rep? ] filter - %unpack-vector-head-reps union ; - -: (%abs-reps) ( -- reps ) - cc> %compare-vector-reps [ int-vector-rep? ] filter - %xor-vector-reps [ float-vector-rep? ] filter - union - [ { } ] [ { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ] if-empty ; - -: (%shuffle-imm-reps) ( -- reps ) - %shuffle-vector-reps %shuffle-vector-imm-reps union ; - -M: vector-rep supported-simd-op? - { - { \ (simd-v+) [ %add-vector-reps ] } - { \ (simd-vs+) [ %saturated-add-vector-reps ] } - { \ (simd-v+-) [ %add-sub-vector-reps ] } - { \ (simd-v-) [ %sub-vector-reps ] } - { \ (simd-vs-) [ %saturated-sub-vector-reps ] } - { \ (simd-vneg) [ %sub-vector-reps ] } - { \ (simd-v*) [ %mul-vector-reps ] } - { \ (simd-vs*) [ %saturated-mul-vector-reps ] } - { \ (simd-v/) [ %div-vector-reps ] } - { \ (simd-vmin) [ %min-vector-reps cc< %compare-vector-reps union ] } - { \ (simd-vmax) [ %max-vector-reps cc> %compare-vector-reps union ] } - { \ (simd-v.) [ %dot-vector-reps ] } - { \ (simd-vsqrt) [ %sqrt-vector-reps ] } - { \ (simd-sum) [ %horizontal-add-vector-reps ] } - { \ (simd-vabs) [ (%abs-reps) ] } - { \ (simd-vbitand) [ %and-vector-reps ] } - { \ (simd-vbitandn) [ %andn-vector-reps ] } - { \ (simd-vbitor) [ %or-vector-reps ] } - { \ (simd-vbitxor) [ %xor-vector-reps ] } - { \ (simd-vbitnot) [ %xor-vector-reps ] } - { \ (simd-vand) [ %and-vector-reps ] } - { \ (simd-vandn) [ %andn-vector-reps ] } - { \ (simd-vor) [ %or-vector-reps ] } - { \ (simd-vxor) [ %xor-vector-reps ] } - { \ (simd-vnot) [ %xor-vector-reps ] } - { \ (simd-vlshift) [ %shl-vector-reps ] } - { \ (simd-vrshift) [ %shr-vector-reps ] } - { \ (simd-hlshift) [ %horizontal-shl-vector-imm-reps ] } - { \ (simd-hrshift) [ %horizontal-shr-vector-imm-reps ] } - { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] } - { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] } - { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } - { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } - { \ (simd-(v>float)) [ %integer>float-vector-reps ] } - { \ (simd-(v>integer)) [ %float>integer-vector-reps ] } - { \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] } - { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] } - { \ (simd-(vunpack-head)) [ (%unpack-reps) ] } - { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] } - { \ (simd-v<=) [ unsign-rep cc<= %compare-vector-reps ] } - { \ (simd-v<) [ unsign-rep cc< %compare-vector-reps ] } - { \ (simd-v=) [ unsign-rep cc= %compare-vector-reps ] } - { \ (simd-v>) [ unsign-rep cc> %compare-vector-reps ] } - { \ (simd-v>=) [ unsign-rep cc>= %compare-vector-reps ] } - { \ (simd-vunordered?) [ unsign-rep cc/<>= %compare-vector-reps ] } - { \ (simd-gather-2) [ %gather-vector-2-reps ] } - { \ (simd-gather-4) [ %gather-vector-4-reps ] } - { \ (simd-vany?) [ %test-vector-reps ] } - { \ (simd-vall?) [ %test-vector-reps ] } - { \ (simd-vnone?) [ %test-vector-reps ] } - } case member? ; diff --git a/basis/math/vectors/specialization/specialization-tests.factor b/basis/math/vectors/specialization/specialization-tests.factor deleted file mode 100644 index f4d4fd93e8..0000000000 --- a/basis/math/vectors/specialization/specialization-tests.factor +++ /dev/null @@ -1,28 +0,0 @@ -IN: math.vectors.specialization.tests -USING: compiler.tree.debugger math.vectors tools.test kernel -kernel.private math specialized-arrays ; -QUALIFIED-WITH: alien.c-types c -QUALIFIED-WITH: alien.complex c -SPECIALIZED-ARRAY: c:double -SPECIALIZED-ARRAY: c:complex-float -SPECIALIZED-ARRAY: c:float - -[ V{ t } ] [ - [ { double-array double-array } declare distance 0.0 < not ] final-literals -] unit-test - -[ V{ float } ] [ - [ { float-array float } declare v*n norm ] final-classes -] unit-test - -[ V{ complex } ] [ - [ { complex-float-array complex-float-array } declare v. ] final-classes -] unit-test - -[ V{ float } ] [ - [ { float-array float } declare v*n norm ] final-classes -] unit-test - -[ V{ float } ] [ - [ { complex-float-array complex } declare v*n norm ] final-classes -] unit-test \ No newline at end of file diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor deleted file mode 100644 index 602fd9802c..0000000000 --- a/basis/math/vectors/specialization/specialization.factor +++ /dev/null @@ -1,207 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: words kernel make sequences effects sets kernel.private -accessors combinators math math.intervals math.vectors -math.vectors.conversion.backend namespaces assocs fry splitting -classes.algebra generalizations locals -compiler.tree.propagation.info ; -IN: math.vectors.specialization - -SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ; - -: parent-vector-class ( type -- type' ) - { - { [ dup simd-128 class<= ] [ drop simd-128 ] } - { [ dup simd-256 class<= ] [ drop simd-256 ] } - [ "Not a vector class" throw ] - } cond ; - -: signature-for-schema ( array-type elt-type schema -- signature ) - [ - { - { +vector+ [ drop ] } - { +any-vector+ [ drop parent-vector-class ] } - { +scalar+ [ nip ] } - { +boolean+ [ 2drop boolean ] } - { +nonnegative+ [ nip ] } - { +literal+ [ 2drop f ] } - } case - ] with with map ; - -: (specialize-vector-word) ( word array-type elt-type schema -- word' ) - signature-for-schema - [ [ name>> ] [ [ name>> ] map "," join ] bi* "=>" glue f ] - [ [ , \ declare , def>> % ] [ ] make ] - [ drop stack-effect ] - 2tri - [ define-declared ] [ 2drop ] 3bi ; - -: output-infos ( array-type elt-type schema -- value-infos ) - [ - { - { +vector+ [ drop ] } - { +any-vector+ [ drop parent-vector-class ] } - { +scalar+ [ nip ] } - { +boolean+ [ 2drop boolean ] } - { - +nonnegative+ - [ - nip - dup complex class<= [ drop float ] when - [0,inf] - ] - } - } case - ] with with map ; - -: record-output-signature ( word array-type elt-type schema -- word ) - output-infos - [ drop ] - [ drop ] - [ [ stack-effect in>> length '[ _ ndrop ] ] dip append ] 2tri - "outputs" set-word-prop ; - -CONSTANT: vector-words -H{ - { [v-] { +vector+ +vector+ -> +vector+ } } - { distance { +vector+ +vector+ -> +nonnegative+ } } - { n*v { +scalar+ +vector+ -> +vector+ } } - { n+v { +scalar+ +vector+ -> +vector+ } } - { n-v { +scalar+ +vector+ -> +vector+ } } - { n/v { +scalar+ +vector+ -> +vector+ } } - { norm { +vector+ -> +nonnegative+ } } - { norm-sq { +vector+ -> +nonnegative+ } } - { normalize { +vector+ -> +vector+ } } - { v* { +vector+ +vector+ -> +vector+ } } - { vs* { +vector+ +vector+ -> +vector+ } } - { v*n { +vector+ +scalar+ -> +vector+ } } - { v+ { +vector+ +vector+ -> +vector+ } } - { vs+ { +vector+ +vector+ -> +vector+ } } - { v+- { +vector+ +vector+ -> +vector+ } } - { v+n { +vector+ +scalar+ -> +vector+ } } - { v- { +vector+ +vector+ -> +vector+ } } - { vneg { +vector+ -> +vector+ } } - { vs- { +vector+ +vector+ -> +vector+ } } - { v-n { +vector+ +scalar+ -> +vector+ } } - { v. { +vector+ +vector+ -> +scalar+ } } - { v/ { +vector+ +vector+ -> +vector+ } } - { v/n { +vector+ +scalar+ -> +vector+ } } - { vceiling { +vector+ -> +vector+ } } - { vfloor { +vector+ -> +vector+ } } - { vmax { +vector+ +vector+ -> +vector+ } } - { vmin { +vector+ +vector+ -> +vector+ } } - { vneg { +vector+ -> +vector+ } } - { vtruncate { +vector+ -> +vector+ } } - { sum { +vector+ -> +scalar+ } } - { vabs { +vector+ -> +vector+ } } - { vsqrt { +vector+ -> +vector+ } } - { vbitand { +vector+ +vector+ -> +vector+ } } - { vbitandn { +vector+ +vector+ -> +vector+ } } - { vbitor { +vector+ +vector+ -> +vector+ } } - { vbitxor { +vector+ +vector+ -> +vector+ } } - { vbitnot { +vector+ -> +vector+ } } - { vand { +vector+ +vector+ -> +vector+ } } - { vandn { +vector+ +vector+ -> +vector+ } } - { vor { +vector+ +vector+ -> +vector+ } } - { vxor { +vector+ +vector+ -> +vector+ } } - { vnot { +vector+ -> +vector+ } } - { vlshift { +vector+ +scalar+ -> +vector+ } } - { vrshift { +vector+ +scalar+ -> +vector+ } } - { hlshift { +vector+ +literal+ -> +vector+ } } - { hrshift { +vector+ +literal+ -> +vector+ } } - { vshuffle-elements { +vector+ +literal+ -> +vector+ } } - { vshuffle-bytes { +vector+ +any-vector+ -> +vector+ } } - { vbroadcast { +vector+ +literal+ -> +vector+ } } - { (vmerge-head) { +vector+ +vector+ -> +vector+ } } - { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } - { (v>float) { +vector+ +literal+ -> +vector+ } } - { (v>integer) { +vector+ +literal+ -> +vector+ } } - { (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } } - { (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } } - { (vunpack-head) { +vector+ +literal+ -> +vector+ } } - { (vunpack-tail) { +vector+ +literal+ -> +vector+ } } - { v<= { +vector+ +vector+ -> +vector+ } } - { v< { +vector+ +vector+ -> +vector+ } } - { v= { +vector+ +vector+ -> +vector+ } } - { v> { +vector+ +vector+ -> +vector+ } } - { v>= { +vector+ +vector+ -> +vector+ } } - { vunordered? { +vector+ +vector+ -> +vector+ } } - { vany? { +vector+ -> +boolean+ } } - { vall? { +vector+ -> +boolean+ } } - { vnone? { +vector+ -> +boolean+ } } -} - -PREDICATE: vector-word < word vector-words key? ; - -: specializations ( word -- assoc ) - dup "specializations" word-prop - [ ] [ V{ } clone [ "specializations" set-word-prop ] keep ] ?if ; - -M: vector-word subwords specializations values [ word? ] filter ; - -: add-specialization ( new-word signature word -- ) - specializations set-at ; - -ERROR: bad-vector-word word ; - -: word-schema ( word -- schema ) - vector-words ?at [ bad-vector-word ] unless ; - -: inputs ( schema -- seq ) { -> } split first ; - -: outputs ( schema -- seq ) { -> } split second ; - -: loop-vector-op ( word array-type elt-type -- word' ) - pick word-schema - [ inputs (specialize-vector-word) ] - [ outputs record-output-signature ] 3bi ; - -:: specialize-vector-word ( word array-type elt-type simd -- word/quot' ) - word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ; - -:: input-signature ( word array-type elt-type -- signature ) - array-type elt-type word word-schema inputs signature-for-schema ; - -: vector-words-for-type ( elt-type -- words ) - { - ! Can't do shifts on floats - { [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] } - ! Can't divide integers - { [ dup integer class<= ] [ vector-words keys { vsqrt n/v v/n v/ normalize } diff ] } - ! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt) - { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] } - [ { } ] - } cond - ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD - { - hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast - (v>integer) (v>float) - (vpack-signed) (vpack-unsigned) - (vunpack-head) (vunpack-tail) - } diff - nip ; - -:: specialize-vector-words ( array-type elt-type simd -- ) - elt-type vector-words-for-type simd keys union [ - [ array-type elt-type simd specialize-vector-word ] - [ array-type elt-type input-signature ] - [ ] - tri add-specialization - ] each ; - -: specialization-matches? ( value-infos signature -- ? ) - [ [ [ class>> ] dip class<= ] [ literal?>> ] if* ] 2all? ; - -: find-specialization ( classes word -- word/f ) - specializations - [ first specialization-matches? ] with find - swap [ second ] when ; - -: vector-word-custom-inlining ( #call -- word/f ) - [ in-d>> [ value-info ] map ] [ word>> ] bi - find-specialization ; - -vector-words keys [ - [ vector-word-custom-inlining ] - "custom-inlining" set-word-prop -] each From 42493b9778a19797866e32f63e48e44516cd6251 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 2 Nov 2009 15:09:16 -0600 Subject: [PATCH 05/46] update compiler.tree.propagation.simd, and don't load it till math.vectors.simd is loaded --- .../known-words/known-words.factor | 3 +-- .../tree/propagation/simd/simd.factor | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index 5646dca3fb..aa2bc01f9e 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -16,8 +16,7 @@ compiler.tree.propagation.slots compiler.tree.propagation.simple compiler.tree.propagation.constraints compiler.tree.propagation.call-effect -compiler.tree.propagation.transforms -compiler.tree.propagation.simd ; +compiler.tree.propagation.transforms ; FROM: alien.c-types => (signed-interval) (unsigned-interval) ; IN: compiler.tree.propagation.known-words diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1637148b88..1eac88598b 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math -math.intervals math.vectors.simd.intrinsics ; +math.intervals math.vectors.simd ; IN: compiler.tree.propagation.simd { @@ -33,14 +33,14 @@ IN: compiler.tree.propagation.simd (simd-hrshift) (simd-vshuffle-bytes) (simd-vshuffle-elements) - (simd-(vmerge-head)) - (simd-(vmerge-tail)) - (simd-(v>float)) - (simd-(v>integer)) - (simd-(vpack-signed)) - (simd-(vpack-unsigned)) - (simd-(vunpack-head)) - (simd-(vunpack-tail)) + (simd-vmerge-head) + (simd-vmerge-tail) + (simd-v>float) + (simd-v>integer) + (simd-vpack-signed) + (simd-vpack-unsigned) + (simd-vunpack-head) + (simd-vunpack-tail) (simd-v<=) (simd-v<) (simd-v=) @@ -51,6 +51,7 @@ IN: compiler.tree.propagation.simd (simd-gather-2) (simd-gather-4) alien-vector + alien-vector-aligned } [ { byte-array } "default-output-classes" set-word-prop ] each : scalar-output-class ( rep -- class ) From e36eb438fa518997ee605eea11979d68153f4c15 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 2 Nov 2009 15:17:34 -0600 Subject: [PATCH 06/46] move all simd intrinsics to compiler.cfg.intrinsics.simd, and only load it when math.vectors.simd is loaded --- .../compiler/cfg/intrinsics/intrinsics.factor | 59 ------------------ .../compiler/cfg/intrinsics/simd/simd.factor | 62 ++++++++++++++++++- basis/cpu/x86/x86.factor | 1 - 3 files changed, 61 insertions(+), 61 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index a03f04f182..632c32b12f 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -151,64 +151,5 @@ IN: compiler.cfg.intrinsics { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } } enable-intrinsics ; -: enable-simd ( -- ) - { - { math.vectors.simd.intrinsics:assert-positive [ drop ] } - { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] } - { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] } - { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] } - { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } - { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } - { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] } - { math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } - { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } - { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } - { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } - } enable-intrinsics ; - : emit-intrinsic ( node word -- ) "intrinsic" word-prop call( node -- ) ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index a8dfaab2dd..bac86e2457 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien byte-arrays fry classes.algebra cpu.architecture kernel math sequences math.vectors -math.vectors.simd.intrinsics macros generalizations combinators +math.vectors.simd macros generalizations combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons @@ -351,3 +351,63 @@ MACRO: if-literals-match ( quots -- ) [ generate-blend-vector ] 3bi ] if ; +: enable-simd ( -- ) + { + { math.vectors.simd:assert-positive [ drop ] } + { math.vectors.simd:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] } + { math.vectors.simd:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] } + { math.vectors.simd:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] } + { math.vectors.simd:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] } + { math.vectors.simd:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-gather-2) [ emit-gather-vector-2 ] } + { math.vectors.simd:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd:(simd-vshuffle-elements) [ emit-shuffle-vector ] } + { math.vectors.simd:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } + { math.vectors.simd:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-v>float) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-v>integer) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vpack-signed) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vpack-unsigned) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } + { math.vectors.simd:(simd-vunpack-head) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-vunpack-tail) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] } + { math.vectors.simd:(simd-select) [ emit-select-vector ] } + { math.vectors.simd:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } + { math.vectors.simd:alien-vector [ emit-alien-vector ] } + { math.vectors.simd:set-alien-vector [ emit-set-alien-vector ] } + } enable-intrinsics ; + +enable-simd diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index b4d4b43e59..53c9c98ed3 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1370,7 +1370,6 @@ M: x86 immediate-bitwise? ( n -- ? ) #! set up by the caller. stack-frame get total-size>> + stack@ ; -enable-simd enable-min/max enable-fixnum-log2 From d655c3c9cca88d9cd952082c9ae24d7e2bfa8b38 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 3 Nov 2009 21:38:29 -0600 Subject: [PATCH 07/46] make horizontal shift available to float vectors (it'd still be faster than the software fallback despite pipeline penalty) --- basis/cpu/x86/x86.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 53c9c98ed3..b0a5dc0897 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1166,7 +1166,7 @@ M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- ) M: x86 %horizontal-shl-vector-imm-reps { - { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } } } available-reps ; M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- ) @@ -1174,7 +1174,7 @@ M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- ) M: x86 %horizontal-shr-vector-imm-reps { - { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } } } available-reps ; M: x86 %abs-vector ( dst src rep -- ) From bd77633d5b3ab8ca1114af0bf2f3a5e7f3fc2f1f Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 3 Nov 2009 21:38:45 -0600 Subject: [PATCH 08/46] new intrinsic generators, pt1 --- .../compiler/cfg/intrinsics/simd/simd.factor | 720 +++++++++--------- 1 file changed, 345 insertions(+), 375 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index bac86e2457..208e19ccc3 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -10,78 +10,27 @@ compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien specialized-arrays ; -FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ; -SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ; +FROM: alien.c-types => heap-size char short int longlong float double ; +SPECIALIZED-ARRAYS: char short int longlong float double ; IN: compiler.cfg.intrinsics.simd -MACRO: check-elements ( quots -- ) - [ length '[ _ firstn ] ] - [ '[ _ spread ] ] - [ length 1 - \ and [ ] like ] - tri 3append ; +! compound vector ops -MACRO: if-literals-match ( quots -- ) - [ length ] [ ] [ length ] tri - ! n quots n - '[ - ! node quot - [ - dup node-input-infos - _ tail-slice* [ literal>> ] map - dup _ check-elements - ] dip - swap [ - ! node literals quot - [ _ firstn ] dip call - drop - ] [ 2drop emit-primitive ] if - ] ; +: ^load-neg-zero-vector ( rep -- dst ) + { + { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] } + { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] } + } case ; -: emit-vector-op ( node quot: ( rep -- ) -- ) - { [ representation? ] } if-literals-match ; inline - -: [binary] ( quot -- quot' ) - '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline - -: emit-binary-vector-op ( node quot -- ) - [binary] emit-vector-op ; inline - -: [unary] ( quot -- quot' ) - '[ [ ds-drop ds-pop ] dip @ ds-push ] ; inline - -: emit-unary-vector-op ( node quot -- ) - [unary] emit-vector-op ; inline - -: [unary/param] ( quot -- quot' ) - '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline - -: emit-shift-vector-imm-op ( node quot -- ) - [unary/param] - { [ integer? ] [ representation? ] } if-literals-match ; inline - -:: emit-shift-vector-op ( node imm-quot var-quot -- ) - node node-input-infos 2 tail-slice* first literal>> integer? - [ node imm-quot emit-shift-vector-imm-op ] - [ node var-quot emit-binary-vector-op ] if ; inline - -: emit-gather-vector-2 ( node -- ) - [ ^^gather-vector-2 ] emit-binary-vector-op ; - -: emit-gather-vector-4 ( node -- ) - [ - ds-drop - [ - D 3 peek-loc - D 2 peek-loc - D 1 peek-loc - D 0 peek-loc - -4 inc-d - ] dip - ^^gather-vector-4 - ds-push - ] emit-vector-op ; - -: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; +: ^load-add-sub-vector ( rep -- dst ) + unsign-rep { + { float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] } + { double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] } + { char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] } + { short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] } + { int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] } + { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] } + } case ; : >variable-shuffle ( shuffle rep -- shuffle' ) rep-component-type heap-size @@ -89,325 +38,346 @@ MACRO: if-literals-match ( quots -- ) [ iota >byte-array ] bi '[ _ n*v _ v+ ] map concat ; -: generate-shuffle-vector-imm ( src shuffle rep -- dst ) - dup %shuffle-vector-imm-reps member? - [ ^^shuffle-vector-imm ] - [ - [ >variable-shuffle ^^load-constant ] keep - ^^shuffle-vector - ] if ; +: ^load-immediate-shuffle ( shuffle rep -- dst ) + >variable-shuffle ^^load-constant ; -: emit-shuffle-vector-imm ( node -- ) - ! Pad the permutation with zeroes if it's too short, since we - ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param] - { [ shuffle? ] [ representation? ] } if-literals-match ; - -: emit-shuffle-vector-var ( node -- ) - [ ^^shuffle-vector ] [binary] - { [ %shuffle-vector-reps member? ] } if-literals-match ; - -: emit-shuffle-vector ( node -- ) - dup node-input-infos { - [ length 3 = ] - [ first class>> byte-array class<= ] - [ second class>> byte-array class<= ] - [ third literal>> representation? ] - } 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ; - -: ^^broadcast-vector ( src n rep -- dst ) - [ rep-components swap ] keep - generate-shuffle-vector-imm ; - -: emit-broadcast-vector ( node -- ) - [ ^^broadcast-vector ] [unary/param] - { [ integer? ] [ representation? ] } if-literals-match ; - -: ^^with-vector ( src rep -- dst ) - [ ^^scalar>vector ] keep [ 0 ] dip ^^broadcast-vector ; - -: ^^select-vector ( src n rep -- dst ) - [ ^^broadcast-vector ] keep ^^vector>scalar ; - -: emit-select-vector ( node -- ) - [ ^^select-vector ] [unary/param] - { [ integer? ] [ representation? ] } if-literals-match ; inline - -: emit-alien-vector-op ( node quot: ( rep -- ) -- ) - { [ %alien-vector-reps member? ] } if-literals-match ; inline - -: emit-alien-vector ( node -- ) - dup [ - '[ - ds-drop prepare-alien-getter - _ ^^alien-vector ds-push - ] - [ inline-alien-getter? ] inline-alien - ] with emit-alien-vector-op ; - -: emit-set-alien-vector ( node -- ) - dup [ - '[ - ds-drop prepare-alien-setter ds-pop - _ ##set-alien-vector - ] - [ byte-array inline-alien-setter? ] - inline-alien - ] with emit-alien-vector-op ; - -: generate-not-vector ( src rep -- dst ) - dup %not-vector-reps member? - [ ^^not-vector ] - [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; - -:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst ) - {cc,swap} first2 :> ( cc swap? ) - swap? - [ src2 src1 rep cc ^^compare-vector ] - [ src1 src2 rep cc ^^compare-vector ] if ; - -:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst ) - rep orig-cc %compare-vector-ccs :> ( ccs not? ) - - ccs empty? - [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] - [ - ccs unclip :> ( rest-ccs first-cc ) - src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst - - rest-ccs first-dst - [ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ] - reduce - - not? [ rep generate-not-vector ] when - ] if ; - -: sign-bit-mask ( rep -- byte-array ) - unsign-rep { - { char-16-rep [ uchar-array{ - HEX: 80 HEX: 80 HEX: 80 HEX: 80 - HEX: 80 HEX: 80 HEX: 80 HEX: 80 - HEX: 80 HEX: 80 HEX: 80 HEX: 80 - HEX: 80 HEX: 80 HEX: 80 HEX: 80 - } underlying>> ] } - { short-8-rep [ ushort-array{ - HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 - HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 - } underlying>> ] } - { int-4-rep [ uint-array{ - HEX: 8000,0000 HEX: 8000,0000 - HEX: 8000,0000 HEX: 8000,0000 - } underlying>> ] } - { longlong-2-rep [ ulonglong-array{ - HEX: 8000,0000,0000,0000 - HEX: 8000,0000,0000,0000 - } underlying>> ] } - } case ; - -:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst ) - orig-cc order-cc { - { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] } - { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= (generate-compare-vector) ] } - { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] } - { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= (generate-compare-vector) ] } - } case ; - -:: generate-compare-vector ( src1 src2 rep orig-cc -- dst ) - { - { - [ rep orig-cc %compare-vector-reps member? ] - [ src1 src2 rep orig-cc (generate-compare-vector) ] - } - { - [ rep %min-vector-reps member? ] - [ src1 src2 rep orig-cc (generate-minmax-compare-vector) ] - } - { - [ rep unsign-rep orig-cc %compare-vector-reps member? ] - [ - rep sign-bit-mask ^^load-constant :> sign-bits - src1 sign-bits rep ^^xor-vector - src2 sign-bits rep ^^xor-vector - rep unsign-rep orig-cc (generate-compare-vector) - ] - } - } cond ; - -:: generate-unpack-vector-head ( src rep -- dst ) - { - { - [ rep %unpack-vector-head-reps member? ] - [ src rep ^^unpack-vector-head ] - } - { - [ rep unsigned-int-vector-rep? ] - [ - rep ^^zero-vector :> zero - src zero rep ^^merge-vector-head - ] - } - { - [ rep widen-vector-rep %shr-vector-imm-reps member? ] - [ - src src rep ^^merge-vector-head - rep rep-component-type - heap-size 8 * rep widen-vector-rep ^^shr-vector-imm - ] - } - [ - rep ^^zero-vector :> zero - zero src rep cc> ^^compare-vector :> sign - src sign rep ^^merge-vector-head - ] - } cond ; - -:: generate-unpack-vector-tail ( src rep -- dst ) - { - { - [ rep %unpack-vector-tail-reps member? ] - [ src rep ^^unpack-vector-tail ] - } - { - [ rep %unpack-vector-head-reps member? ] - [ - src rep ^^tail>head-vector :> tail - tail rep ^^unpack-vector-head - ] - } - { - [ rep unsigned-int-vector-rep? ] - [ - rep ^^zero-vector :> zero - src zero rep ^^merge-vector-tail - ] - } - { - [ rep widen-vector-rep %shr-vector-imm-reps member? ] - [ - src src rep ^^merge-vector-tail - rep rep-component-type - heap-size 8 * rep widen-vector-rep ^^shr-vector-imm - ] - } - [ - rep ^^zero-vector :> zero - zero src rep cc> ^^compare-vector :> sign - src sign rep ^^merge-vector-tail - ] - } cond ; - -:: generate-load-neg-zero-vector ( rep -- dst ) - rep { - { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] } - { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] } - [ drop rep ^^zero-vector ] - } case ; - -:: generate-neg-vector ( src rep -- dst ) - rep generate-load-neg-zero-vector - src rep ^^sub-vector ; - -:: generate-blend-vector ( mask true false rep -- dst ) - mask true rep ^^and-vector +:: ^blend-vector ( mask true false rep -- dst ) + true mask rep ^^and-vector mask false rep ^^andn-vector rep ^^or-vector ; -:: generate-abs-vector ( src rep -- dst ) +: ^compare-vector ( src1 src2 rep cc -- dst ) + ... ; + +: ^widened-shr-vector-imm ( src shift rep -- dst ) + widen-vector-rep ^^shr-vector-imm ; + +! intrinsic emitters + +: emit-simd-v+ ( node -- ) { - { - [ rep unsigned-int-vector-rep? ] - [ src ] - } - { - [ rep %abs-vector-reps member? ] - [ src rep ^^abs-vector ] - } - { - [ rep float-vector-rep? ] - [ - rep generate-load-neg-zero-vector - src rep ^^andn-vector - ] - } - [ + [ ^^add-vector ] + } emit-vv-vector-op ; + +: emit-simd-v- ( node -- ) + { + [ ^^sub-vector ] + } emit-vv-vector-op ; + +: emit-simd-vneg ( node -- ) + { + { float-vector-rep [ [ ^load-neg-zero-vector ] [ ^^sub-vector ] bi ] } + { int-vector-rep [ [ ^^zero-vector ] [ ^^sub-vector ] bi ] } + } emit-v-vector-op ; + +: emit-simd-v+- ( node -- ) + { + [ ^^add-sub-vector ] + { float-vector-rep [| src1 src2 rep | + rep ^load-add-sub-vector :> signs + src2 signs rep ^^xor-vector :> src2' + src1 src2' rep ^^add-vector + ] } + { int-vector-rep [| src1 src2 rep | + rep ^load-add-sub-vector :> signs + src2 signs rep ^^xor-vector :> src2' + src2' signs rep ^^sub-vector :> src2'' + src1 src2'' rep ^^add-vector + ] } + } emit-vv-vector-op ; + +: emit-simd-vs+ ( node -- ) + { + { float-vector-rep [ ^^add-vector ] } + { int-vector-rep [ ^^saturated-add-vector ] } + } emit-vv-vector-op ; + +: emit-simd-vs- ( node -- ) + { + { float-vector-rep [ ^^sub-vector ] } + { int-vector-rep [ ^^saturated-sub-vector ] } + } emit-vv-vector-op ; + +: emit-simd-vs* ( node -- ) + { + { float-vector-rep [ ^^mul-vector ] } + { int-vector-rep [ ^^saturated-mul-vector ] } + } emit-vv-vector-op ; + +: emit-simd-v* ( node -- ) + { + [ ^^mul-vector ] + } emit-vv-vector-op ; + +: emit-simd-v/ ( node -- ) + { + [ ^^div-vector ] + } emit-vv-vector-op ; + +: emit-simd-vmin ( node -- ) + { + [ ^^min-vector ] + [ + [ cc< ^compare-vector ] + [ ^blend-vector ] 3bi + ] + } emit-vv-vector-op ; + +: emit-simd-vmax ( node -- ) + { + [ ^^max-vector ] + [ + [ cc> ^compare-vector ] + [ ^blend-vector ] 3bi + ] + } emit-vv-vector-op ; + +: emit-simd-v. ( node -- ) + { + [ ^^dot-vector ] + { float-vector-rep [| src1 src2 rep | + + ] } + { int-vector-rep [| src1 src2 rep | + ... + ] } + } emit-vv-vector-op ; + +: emit-simd-vsqrt ( node -- ) + { + [ ^^sqrt-vector ] + } emit-v-vector-op ; + +: emit-simd-sum ( node -- ) + ... ; + +: emit-simd-vabs ( node -- ) + { + { unsigned-int-vector-rep [ drop ] } + [ ^^abs-vector ] + { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] } + { int-vector-rep [| src rep | rep ^^zero-vector :> zero zero src rep ^^sub-vector :> -src - zero src rep cc> ^^compare-vector :> sign - sign -src src rep generate-blend-vector - ] - } cond ; + zero src rep cc> ^compare-vector :> sign + sign -src src rep ^blend-vector + ] } + } emit-v-vector-op ; -: generate-min-vector ( src1 src2 rep -- dst ) - dup %min-vector-reps member? - [ ^^min-vector ] [ - [ cc< generate-compare-vector ] - [ generate-blend-vector ] 3bi - ] if ; +: emit-simd-vand ( node -- ) + { + [ ^^and-vector ] + } emit-vv-vector-op ; -: generate-max-vector ( src1 src2 rep -- dst ) - dup %max-vector-reps member? - [ ^^max-vector ] [ - [ cc> generate-compare-vector ] - [ generate-blend-vector ] 3bi - ] if ; +: emit-simd-vandn ( node -- ) + { + [ ^^andn-vector ] + } emit-vv-vector-op ; + +: emit-simd-vor ( node -- ) + { + [ ^^or-vector ] + } emit-vv-vector-op ; + +: emit-simd-vxor ( node -- ) + { + [ ^^xor-vector ] + } emit-vv-vector-op ; + +: emit-simd-vnot ( node -- ) + { + [ ^^not-vector ] + [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + } emit-v-vector-op ; + +: emit-simd-vlshift ( node -- ) + { + [ ^^shl-vector ] + } { + [ ^^shl-vector-imm ] + } emit-vn-or-vl-vector-op ; + +: emit-simd-vrshift ( node -- ) + { + [ ^^shr-vector ] + } { + [ ^^shr-vector-imm ] + } emit-vn-or-vl-vector-op ; + +: emit-simd-hlshift ( node -- ) + { + [ ^^horizontal-shl-vector-imm ] + } emit-vl-vector-op ; + +: emit-simd-hrshift ( node -- ) + { + [ ^^horizontal-shr-vector-imm ] + } emit-vl-vector-op ; + +: emit-simd-vshuffle-elements ( node -- ) + { + [ ^^shuffle-vector-imm ] + [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ] + } emit-vl-vector-op ; + +: emit-simd-vshuffle-bytes ( node -- ) + { + [ ^^shuffle-vector ] + } emit-vv-vector-op ; + +: emit-simd-vmerge-head ( node -- ) + { + [ ^^merge-vector-head ] + } emit-vv-vector-op ; + +: emit-simd-vmerge-tail ( node -- ) + { + [ ^^merge-vector-tail ] + } emit-vv-vector-op ; + +: emit-simd-v<= ( node -- ) + [ cc<= ^compare-vector ] (emit-vv-vector-op) ; +: emit-simd-v< ( node -- ) + [ cc< ^compare-vector ] (emit-vv-vector-op) ; +: emit-simd-v= ( node -- ) + [ cc= ^compare-vector ] (emit-vv-vector-op) ; +: emit-simd-v> ( node -- ) + [ cc> ^compare-vector ] (emit-vv-vector-op) ; +: emit-simd-v>= ( node -- ) + [ cc>= ^compare-vector ] (emit-vv-vector-op) ; +: emit-simd-vunordered? ( node -- ) + [ cc/<>= ^compare-vector ] (emit-vv-vector-op) ; + +: emit-simd-vany? ( node -- ) + [ vcc-any ^test-vector ] (emit-vv-vector-op) ; +: emit-simd-vall? ( node -- ) + [ vcc-all ^test-vector ] (emit-vv-vector-op) ; +: emit-simd-vnone? ( node -- ) + [ vcc-none ^test-vector ] (emit-vv-vector-op) ; + +: emit-simd-v>float ( node -- ) + { + { float-vector-rep [ drop ] } + { int-vector-rep [ ^^integer>float-vector ] } + } emit-vv-vector-op ; + +: emit-simd-v>integer ( node -- ) + { + { float-vector-rep [ ^^float>integer-vector ] } + { int-vector-rep [ dup ] } + } emit-vv-vector-op ; + +: emit-simd-vpack-signed ( node -- ) + { + [ ^^signed-pack-vector ] + } emit-vv-vector-op ; + +: emit-simd-vpack-unsigned ( node -- ) + { + [ ^^unsigned-pack-vector ] + } emit-vv-vector-op ; + +! XXX shr vector rep is widened! +: emit-simd-vunpack-head ( node -- ) + { + [ ^^unpack-vector-head ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] } + { signed-int-vector-rep [| src rep | + src src rep ^^merge-vector-head :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep ^widened-shr-vector-imm + ] } + { signed-int-vector-rep [| src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-head + ] } + } emit-v-vector-op ; + +: emit-simd-vunpack-tail ( node -- ) + { + [ ^^unpack-vector-tail ] + [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] } + { signed-int-vector-rep [| src rep | + src src rep ^^merge-vector-tail :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep widen-vector-rep ^widened-shr-vector-imm + ] } + { signed-int-vector-rep [| src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-tail + ] } + } emit-v-vector-op ; + +: emit-simd-with ( node -- ) +: emit-simd-gather-2 ( node -- ) +: emit-simd-gather-4 ( node -- ) +: emit-simd-select ( node -- ) +: emit-alien-vector ( node -- ) +: emit-set-alien-vector ( node -- ) +: emit-alien-vector-aligned ( node -- ) +: emit-set-alien-vector-aligned ( node -- ) : enable-simd ( -- ) { - { math.vectors.simd:assert-positive [ drop ] } - { math.vectors.simd:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] } - { math.vectors.simd:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] } - { math.vectors.simd:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] } - { math.vectors.simd:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] } - { math.vectors.simd:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-gather-2) [ emit-gather-vector-2 ] } - { math.vectors.simd:(simd-gather-4) [ emit-gather-vector-4 ] } - { math.vectors.simd:(simd-vshuffle-elements) [ emit-shuffle-vector ] } - { math.vectors.simd:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } - { math.vectors.simd:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-v>float) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-v>integer) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vpack-signed) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vpack-unsigned) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } - { math.vectors.simd:(simd-vunpack-head) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-vunpack-tail) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] } - { math.vectors.simd:(simd-select) [ emit-select-vector ] } - { math.vectors.simd:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - { math.vectors.simd:alien-vector [ emit-alien-vector ] } - { math.vectors.simd:set-alien-vector [ emit-set-alien-vector ] } + { (simd-v+) [ emit-simd-v+ ] } + { (simd-v-) [ emit-simd-v- ] } + { (simd-vneg) [ emit-simd-vneg ] } + { (simd-v+-) [ emit-simd-v+- ] } + { (simd-vs+) [ emit-simd-vs+ ] } + { (simd-vs-) [ emit-simd-vs- ] } + { (simd-vs*) [ emit-simd-vs* ] } + { (simd-v*) [ emit-simd-v* ] } + { (simd-v/) [ emit-simd-v/ ] } + { (simd-vmin) [ emit-simd-vmin ] } + { (simd-vmax) [ emit-simd-vmax ] } + { (simd-v.) [ emit-simd-v. ] } + { (simd-vsqrt) [ emit-simd-vsqrt ] } + { (simd-sum) [ emit-simd-sum ] } + { (simd-vabs) [ emit-simd-vabs ] } + { (simd-vbitand) [ emit-simd-vand ] } + { (simd-vbitandn) [ emit-simd-vandn ] } + { (simd-vbitor) [ emit-simd-vor ] } + { (simd-vbitxor) [ emit-simd-vxor ] } + { (simd-vbitnot) [ emit-simd-vnot ] } + { (simd-vand) [ emit-simd-vand ] } + { (simd-vandn) [ emit-simd-vandn ] } + { (simd-vor) [ emit-simd-vor ] } + { (simd-vxor) [ emit-simd-vxor ] } + { (simd-vnot) [ emit-simd-vnot ] } + { (simd-vlshift) [ emit-simd-vlshift ] } + { (simd-vrshift) [ emit-simd-vrshift ] } + { (simd-hlshift) [ emit-simd-hlshift ] } + { (simd-hrshift) [ emit-simd-hrshift ] } + { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } + { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } + { (simd-vmerge-head) [ emit-simd-vmerge-head ] } + { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } + { (simd-v<=) [ emit-simd-v<= ] } + { (simd-v<) [ emit-simd-v< ] } + { (simd-v=) [ emit-simd-v= ] } + { (simd-v>) [ emit-simd-v> ] } + { (simd-v>=) [ emit-simd-v>= ] } + { (simd-vunordered?) [ emit-simd-vunordered? ] } + { (simd-vany?) [ emit-simd-vany? ] } + { (simd-vall?) [ emit-simd-vall? ] } + { (simd-vnone?) [ emit-simd-vnone? ] } + { (simd-v>float) [ emit-simd-v>float ] } + { (simd-v>integer) [ emit-simd-v>integer ] } + { (simd-vpack-signed) [ emit-simd-vpack-signed ] } + { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } + { (simd-vunpack-head) [ emit-simd-vunpack-head ] } + { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } + { (simd-with) [ emit-simd-with ] } + { (simd-gather-2) [ emit-simd-gather-2 ] } + { (simd-gather-4) [ emit-simd-gather-4 ] } + { (simd-select) [ emit-simd-select ] } + { alien-vector [ emit-alien-vector ] } + { set-alien-vector [ emit-set-alien-vector ] } + { alien-vector-aligned [ emit-alien-vector ] } + { set-alien-vector-aligned [ emit-set-alien-vector ] } } enable-intrinsics ; enable-simd From b98742be3095ba51cc21ca5f769d01dc26ad15db Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 3 Nov 2009 21:38:55 -0600 Subject: [PATCH 09/46] typos --- basis/math/vectors/simd/simd.factor | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 139060333c..1aff80a0a9 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -65,8 +65,8 @@ GENERIC: new-underlying ( underlying seq -- seq' ) : (simd-vnone?) ( a rep -- ? ) \ vnone? bad-simd-call ; : (simd-v>float) ( a rep -- c ) \ vconvert bad-simd-call ; : (simd-v>integer) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-vpack-signed) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-vpack-unsigned) ( a rep -- c ) \ vconvert bad-simd-call ; +: (simd-vpack-signed) ( a b rep -- c ) \ vconvert bad-simd-call ; +: (simd-vpack-unsigned) ( a b rep -- c ) \ vconvert bad-simd-call ; : (simd-vunpack-head) ( a rep -- c ) \ vconvert bad-simd-call ; : (simd-vunpack-tail) ( a rep -- c ) \ vconvert bad-simd-call ; : (simd-with) ( n rep -- v ) \ simd-with bad-simd-call ; @@ -337,3 +337,5 @@ M: simd-128 distance v- norm ; inline M: simd-128 vshuffle ( u perm -- v ) vshuffle-bytes ; inline +"compiler.tree.propagation.simd" require +"compiler.cfg.intrinsics.simd" require From f6643a1c72a905a543fa40b403fb5cd3dce1f45a Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 4 Nov 2009 12:18:01 -0600 Subject: [PATCH 10/46] change ##horizontal-add-vector insn to better match what the HADD SSE instructions do (add adjacent pairs, pack results) --- .../cfg/instructions/instructions.factor | 8 ++--- basis/cpu/architecture/architecture.factor | 4 +-- basis/cpu/x86/x86.factor | 31 ++++++++++++++----- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index d4d84a088a..30fe8b590e 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -408,13 +408,13 @@ use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-add-vector -def: dst/scalar-rep -use: src +def: dst +use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-sub-vector -def: dst/scalar-rep -use: src +def: dst +use: src1 src2 literal: rep ; PURE-INSN: ##horizontal-shl-vector-imm diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 75fbb85542..81aea67eb5 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -277,8 +277,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %dot-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) -HOOK: %horizontal-add-vector cpu ( dst src rep -- ) -HOOK: %horizontal-sub-vector cpu ( dst src rep -- ) +HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- ) +HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %abs-vector cpu ( dst src rep -- ) HOOK: %and-vector cpu ( dst src1 src2 rep -- ) HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index b0a5dc0897..68c2fb0438 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1134,14 +1134,25 @@ M: x86 %dot-vector { float-4-rep [ sse4.1? [ HEX: ff DPPS ] - [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ] - if + [ + [ MULPS ] [ + drop 2dup float-4-rep + [ %horizontal-add-vector ] + [ %horizontal-add-vector ] + [ nip %vector>scalar ] 3tri + ] 2bi + ] if ] } { double-2-rep [ sse4.1? [ HEX: ff DPPD ] - [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ] - if + [ + [ MULPD ] [ + drop 2dup double-2-rep + [ %horizontal-add-vector ] + [ nip %vector>scalar ] 3bi + ] 2bi + ] if ] } } case ; @@ -1150,15 +1161,19 @@ M: x86 %dot-vector-reps { sse3? { float-4-rep double-2-rep } } } available-reps ; -M: x86 %horizontal-add-vector ( dst src rep -- ) - { - { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] } - { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] } +M: x86 %horizontal-add-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep + unsign-rep { + { float-4-rep [ HADDPS ] } + { double-2-rep [ HADDPD ] } + { int-4-rep [ PHADDD ] } + { short-8-rep [ PHADDW ] } } case ; M: x86 %horizontal-add-vector-reps { { sse3? { float-4-rep double-2-rep } } + { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } } } available-reps ; M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- ) From 4d54f27cd1add5af1b55a742f161a565a0ff9c17 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 5 Nov 2009 09:52:57 -0600 Subject: [PATCH 11/46] more intrinsic madness --- .../compiler/cfg/intrinsics/simd/simd.factor | 298 ++++++++++++++---- basis/cpu/x86/x86.factor | 27 +- basis/math/vectors/simd/simd.factor | 3 - 3 files changed, 245 insertions(+), 83 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 208e19ccc3..c4fcdca23e 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2009 Slava Pestov. +! Copyright (C) 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien byte-arrays fry classes.algebra cpu.architecture kernel math sequences math.vectors @@ -16,6 +16,28 @@ IN: compiler.cfg.intrinsics.simd ! compound vector ops +: sign-bit-mask ( rep -- byte-array ) + unsign-rep { + { char-16-rep [ uchar-array{ + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + HEX: 80 HEX: 80 HEX: 80 HEX: 80 + } underlying>> ] } + { short-8-rep [ ushort-array{ + HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 + HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000 + } underlying>> ] } + { int-4-rep [ uint-array{ + HEX: 8000,0000 HEX: 8000,0000 + HEX: 8000,0000 HEX: 8000,0000 + } underlying>> ] } + { longlong-2-rep [ ulonglong-array{ + HEX: 8000,0000,0000,0000 + HEX: 8000,0000,0000,0000 + } underlying>> ] } + } case ; + : ^load-neg-zero-vector ( rep -- dst ) { { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] } @@ -46,11 +68,163 @@ IN: compiler.cfg.intrinsics.simd mask false rep ^^andn-vector rep ^^or-vector ; -: ^compare-vector ( src1 src2 rep cc -- dst ) - ... ; +: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) + order-cc { + { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] } + { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] } + { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] } + { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^^compare-vector ] } + } case ; -: ^widened-shr-vector-imm ( src shift rep -- dst ) - widen-vector-rep ^^shr-vector-imm ; +: ^compare-vector ( src1 src2 rep cc -- dst ) + { + [ ^^compare-vector ] + [ ^minmax-compare-vector ] + { unsigned-int-vector-rep [| src1 src2 rep cc | + rep sign-bit-mask ^^load-constant :> sign-bits + src1 sign-bits rep ^^xor-vector + src2 sign-bits rep ^^xor-vector + rep unsign-rep cc ^^compare-vector + ] } + } vv-cc-vector-op ; + +: ^unpack-vector-head ( src rep -- dst ) + { + [ ^^unpack-vector-head ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] } + { signed-int-vector-rep [| src rep | + src src rep ^^merge-vector-head :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep ^widened-shr-vector-imm + ] } + { signed-int-vector-rep [| src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-head + ] } + } v-vector-op ; + +: ^unpack-vector-tail ( src rep -- dst ) + { + [ ^^unpack-vector-tail ] + [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ] + { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] } + { signed-int-vector-rep [| src rep | + src src rep ^^merge-vector-tail :> merged + rep rep-component-type heap-size 8 * :> bits + merged bits rep ^widened-shr-vector-imm + ] } + { signed-int-vector-rep [| src rep | + rep ^^zero-vector :> zero + zero src rep cc> ^compare-vector :> sign + src sign rep ^^merge-vector-tail + ] } + } v-vector-op ; + +: ^(sum-2) ( src rep -- dst ) + { + [ dupd ^^horizontal-add-vector ] + [| src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-4) ( src rep -- dst ) + { + [ + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] bi + ] + [| src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-8) ( src rep -- dst ) + { + [ + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] tri + ] + [| src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector :> src'' + + rep' widen-rep :> rep'' + src'' src'' rep'' ^^merge-vector-head :> head'' + src'' src'' rep'' ^^merge-vector-tail :> tail'' + head'' tail'' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-16) ( src rep -- dst ) + { + [ + { + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + [ dupd ^^horizontal-add-vector ] + } cleave + ] + [| src rep | + src src rep ^^merge-vector-head :> head + src src rep ^^merge-vector-tail :> tail + head tail rep ^^add-vector :> src' + + rep widen-rep :> rep' + src' src' rep' ^^merge-vector-head :> head' + src' src' rep' ^^merge-vector-tail :> tail' + head' tail' rep ^^add-vector :> src'' + + rep' widen-rep :> rep'' + src'' src'' rep'' ^^merge-vector-head :> head'' + src'' src'' rep'' ^^merge-vector-tail :> tail'' + head'' tail'' rep ^^add-vector :> src''' + + rep'' widen-rep :> rep''' + src''' src''' rep''' ^^merge-vector-head :> head''' + src''' src''' rep''' ^^merge-vector-tail :> tail''' + head''' tail''' rep ^^add-vector + ] + } v-vector-op ; + +: ^(sum-vector) ( src rep -- dst ) + [ + rep-length { + { 2 [ ^(sum-2) ] } + { 4 [ ^(sum-4) ] } + { 8 [ ^(sum-8) ] } + { 16 [ ^(sum-16) ] } + } case + ] [ ^^vector>scalar ] bi ; + +: ^sum-vector ( src rep -- dst ) + unsign-rep { + { float-vector-rep [ ^(sum-vector) ] } + { int-vector-rep [| src rep | + src rep ^unpack-vector-head :> head + src rep ^unpack-vector-tail :> tail + rep widen-rep :> wide-rep + head tail wide-rep ^^add-vector wide-rep ^(sum-vector) + ] } + } v-vector-op ; ! intrinsic emitters @@ -135,12 +309,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-v. ( node -- ) { [ ^^dot-vector ] - { float-vector-rep [| src1 src2 rep | - - ] } - { int-vector-rep [| src1 src2 rep | - ... - ] } + [ [ ^^mul-vector ] [ ^sum-vector ] bi ] } emit-vv-vector-op ; : emit-simd-vsqrt ( node -- ) @@ -149,7 +318,9 @@ IN: compiler.cfg.intrinsics.simd } emit-v-vector-op ; : emit-simd-sum ( node -- ) - ... ; + { + [ ^sum-vector ] + } emit-v-vector-op ; : emit-simd-vabs ( node -- ) { @@ -195,30 +366,32 @@ IN: compiler.cfg.intrinsics.simd [ ^^shl-vector ] } { [ ^^shl-vector-imm ] - } emit-vn-or-vl-vector-op ; + } [ integer? ] emit-vv-or-vl-vector-op ; : emit-simd-vrshift ( node -- ) { [ ^^shr-vector ] } { [ ^^shr-vector-imm ] - } emit-vn-or-vl-vector-op ; + } [ integer? ] emit-vv-or-vl-vector-op ; : emit-simd-hlshift ( node -- ) { [ ^^horizontal-shl-vector-imm ] - } emit-vl-vector-op ; + } [ integer? ] emit-vl-vector-op ; : emit-simd-hrshift ( node -- ) { [ ^^horizontal-shr-vector-imm ] - } emit-vl-vector-op ; + } [ integer? ] emit-vl-vector-op ; + +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; : emit-simd-vshuffle-elements ( node -- ) { [ ^^shuffle-vector-imm ] [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ] - } emit-vl-vector-op ; + } [ shuffle? ] emit-vl-vector-op ; : emit-simd-vshuffle-bytes ( node -- ) { @@ -236,24 +409,42 @@ IN: compiler.cfg.intrinsics.simd } emit-vv-vector-op ; : emit-simd-v<= ( node -- ) - [ cc<= ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc<= ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-v< ( node -- ) - [ cc< ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc< ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-v= ( node -- ) - [ cc= ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc= ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-v> ( node -- ) - [ cc> ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc> ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-v>= ( node -- ) - [ cc>= ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc>= ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-vunordered? ( node -- ) - [ cc/<>= ^compare-vector ] (emit-vv-vector-op) ; + { + [ cc/<>= ^compare-vector ] + } emit-vv-vector-op ; : emit-simd-vany? ( node -- ) - [ vcc-any ^test-vector ] (emit-vv-vector-op) ; + { + [ vcc-any ^test-vector ] + } emit-vv-vector-op ; : emit-simd-vall? ( node -- ) - [ vcc-all ^test-vector ] (emit-vv-vector-op) ; + { + [ vcc-all ^test-vector ] + } emit-vv-vector-op ; : emit-simd-vnone? ( node -- ) - [ vcc-none ^test-vector ] (emit-vv-vector-op) ; + { + [ vcc-none ^test-vector ] + } emit-vv-vector-op ; : emit-simd-v>float ( node -- ) { @@ -277,48 +468,45 @@ IN: compiler.cfg.intrinsics.simd [ ^^unsigned-pack-vector ] } emit-vv-vector-op ; -! XXX shr vector rep is widened! : emit-simd-vunpack-head ( node -- ) { - [ ^^unpack-vector-head ] - { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] } - { signed-int-vector-rep [| src rep | - src src rep ^^merge-vector-head :> merged - rep rep-component-type heap-size 8 * :> bits - merged bits rep ^widened-shr-vector-imm - ] } - { signed-int-vector-rep [| src rep | - rep ^^zero-vector :> zero - zero src rep cc> ^compare-vector :> sign - src sign rep ^^merge-vector-head - ] } + [ ^unpack-vector-head ] } emit-v-vector-op ; : emit-simd-vunpack-tail ( node -- ) { - [ ^^unpack-vector-tail ] - [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ] - { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] } - { signed-int-vector-rep [| src rep | - src src rep ^^merge-vector-tail :> merged - rep rep-component-type heap-size 8 * :> bits - merged bits rep widen-vector-rep ^widened-shr-vector-imm - ] } - { signed-int-vector-rep [| src rep | - rep ^^zero-vector :> zero - zero src rep cc> ^compare-vector :> sign - src sign rep ^^merge-vector-tail - ] } + [ ^unpack-vector-tail ] } emit-v-vector-op ; : emit-simd-with ( node -- ) + { + [ ^^with-vector ] + } emit-v-vector-op ; + : emit-simd-gather-2 ( node -- ) + { + [ ^^gather-vector-2 ] + } emit-vv-vector-op ; + : emit-simd-gather-4 ( node -- ) + { + [ ^^gather-vector-4 ] + } emit-vvvv-vector-op ; + : emit-simd-select ( node -- ) + { + [ ^^select-vector ] + } [ integer? ] emit-vl-vector-op ; + : emit-alien-vector ( node -- ) + { + [ ^^alien-vector ] + } emit-alien-vector-op ; + : emit-set-alien-vector ( node -- ) -: emit-alien-vector-aligned ( node -- ) -: emit-set-alien-vector-aligned ( node -- ) + { + [ ^^set-alien-vector ] + } emit-set-alien-vector-op ; : enable-simd ( -- ) { @@ -376,8 +564,6 @@ IN: compiler.cfg.intrinsics.simd { (simd-select) [ emit-simd-select ] } { alien-vector [ emit-alien-vector ] } { set-alien-vector [ emit-set-alien-vector ] } - { alien-vector-aligned [ emit-alien-vector ] } - { set-alien-vector-aligned [ emit-set-alien-vector ] } } enable-intrinsics ; enable-simd diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 68c2fb0438..d78d8c852e 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1131,34 +1131,13 @@ M: x86 %max-vector-reps M: x86 %dot-vector [ two-operand ] keep { - { float-4-rep [ - sse4.1? - [ HEX: ff DPPS ] - [ - [ MULPS ] [ - drop 2dup float-4-rep - [ %horizontal-add-vector ] - [ %horizontal-add-vector ] - [ nip %vector>scalar ] 3tri - ] 2bi - ] if - ] } - { double-2-rep [ - sse4.1? - [ HEX: ff DPPD ] - [ - [ MULPD ] [ - drop 2dup double-2-rep - [ %horizontal-add-vector ] - [ nip %vector>scalar ] 3bi - ] 2bi - ] if - ] } + { float-4-rep [ HEX: ff DPPS ] } + { double-2-rep [ HEX: ff DPPD ] } } case ; M: x86 %dot-vector-reps { - { sse3? { float-4-rep double-2-rep } } + { sse4.1? { float-4-rep double-2-rep } } } available-reps ; M: x86 %horizontal-add-vector ( dst src1 src2 rep -- ) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 1aff80a0a9..c155c797ff 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -77,9 +77,6 @@ GENERIC: new-underlying ( underlying seq -- seq' ) : alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; -: alien-vector-aligned ( c-ptr n rep -- value ) \ alien-vector-aligned bad-simd-call ; -: set-alien-vector-aligned ( c-ptr n rep -- value ) \ set-alien-vector-aligned bad-simd-call ; - ! Helper for boolean vector literals : vector-true-value ( class -- value ) From eac9bacf40bb959061a88cef4f75841d805da1fa Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 10 Nov 2009 23:35:46 -0600 Subject: [PATCH 12/46] backend for choosing available SIMD intrinsic implementations --- .../intrinsics/simd/backend/backend.factor | 135 ++++++++++++++++++ .../compiler/cfg/intrinsics/simd/simd.factor | 52 +++++-- 2 files changed, 177 insertions(+), 10 deletions(-) create mode 100644 basis/compiler/cfg/intrinsics/simd/backend/backend.factor diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor new file mode 100644 index 0000000000..4fe9774282 --- /dev/null +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -0,0 +1,135 @@ +! (c)2009 Joe Groff bsd license +USING: accessors fry generalizations kernel locals math sequences +splitting words ; +IN: compiler.cfg.intrinsics.simd.backend + +! Selection of implementation based on available CPU instructions + +: can-has? ( quot -- ? ) + [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline + +GENERIC: create-can-has-word ( word -- word' ) + +PREDICATE: vector-op-word + { + [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ] + [ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ] + } 1&& ; + +: reps-word ( word -- word' ) + name>> "^^" ?head drop "##" ?head drop + "%" "-reps" surround "cpu.architecture" lookup ; + +:: can-has-^^-quot ( word def effect -- def' ) + effect in>> { "rep" } split1 [ length ] bi@ 1 + + word reps-word + effect out>> length f >quotation + '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ; + +:: can-has-^-quot ( word def effect -- def' ) + def create-can-has ; + +M: object create-can-has ; + +M: sequence create-can-has + [ create-can-has-word ] map ; + +: (create-can-has-word) ( word -- word' created? ) + name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" + 2dup lookup + [ 2nip f ] [ create t ] if* ; + +: (create-can-has-quot) ( word -- def effect ) + [ ] [ def>> ] [ stack-effect ] tri [ + { + { [ pick "^^" head? ] [ can-has-^^-quot ] } + { [ pick "##" head? ] [ can-has-^^-quot ] } + { [ pick "^" head? ] [ can-has-^-quot ] } + } cond + ] keep ; + +M: vector-op-word create-can-has + [ (create-can-has-word) ] keep + '[ _ (create-can-has-quot) define-declared ] + [ nip ] if ; + +GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair ) +M:: callable >can-has-cond + #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ; + +M:: pair >can-has-cond ( pair #pick #dup -- quotpair ) + pair first2 :> ( class quot ) + #pick class #dup quot create-can-has + '[ _ npick _ instance? [ _ ndup _ can-has? ] dip and ] + quot 2array ; + +MACRO: v-vector-op ( trials -- ) + [ 1 2 >can-has-cond ] map '[ _ cond ] ; +MACRO: vl-vector-op ( trials -- ) + [ 1 3 >can-has-cond ] map '[ _ cond ] ; +MACRO: vv-vector-op ( trials -- ) + [ 1 3 >can-has-cond ] map '[ _ cond ] ; +MACRO: vv-cc-vector-op ( trials -- ) + [ 2 4 >can-has-cond ] map '[ _ cond ] ; +MACRO: vvvv-vector-op ( trials -- ) + [ 1 5 >can-has-cond ] map '[ _ cond ] ; + +! Special-case conditional instructions + +: can-has-^(compare-vector) ( src1 src2 rep cc -- dst ) + [ 2drop ] 2dip %compare-vector-reps member? + \ can-has? [ and ] change + f ; + +! Intrinsic code emission + +MACRO: if-literals-match ( quots -- ) + [ length ] [ ] [ length ] tri + ! n quots n + '[ + ! node quot + [ + dup node-input-infos + _ tail-slice* [ literal>> ] map + dup _ check-elements + ] dip + swap [ + ! node literals quot + [ _ firstn ] dip call + drop + ] [ 2drop emit-primitive ] if + ] ; + +CONSTANT: [unary] [ ds-drop ds-pop ] +CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ] +CONSTANT: [binary] [ ds-drop 2inputs ] +CONSTANT: [quaternary] + [ + ds-drop + D 3 peek-loc + D 2 peek-loc + D 1 peek-loc + D 0 peek-loc + -4 inc-d + ] + +:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ; + params-quot trials op-quot literal-preds + '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ; + +MACRO: emit-v-vector-op ( trials -- ) + [unary] [ v-vector-op ] { [ representation? ] } [emit-vector-op] ; +MACRO: emit-vl-vector-op ( trials literal-pred -- ) + [ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ; +MACRO: emit-vv-vector-op ( trials -- ) + [binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ; +MACRO: emit-vvvv-vector-op ( trials -- ) + [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ; + +MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- ) + literal-pred trials literal-pred trials + '[ + dup node-input-infos 2 tail-slice* first literal>> @ + [ _ _ emit-vl-vector-op ] + [ _ emit-vv-vector-op ] if + ] ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index c4fcdca23e..1cf076af1d 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -9,6 +9,7 @@ compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien +compiler.cfg.intrinsics.simd.backend specialized-arrays ; FROM: alien.c-types => heap-size char short int longlong float double ; SPECIALIZED-ARRAYS: char short int longlong float double ; @@ -76,15 +77,37 @@ IN: compiler.cfg.intrinsics.simd { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^^compare-vector ] } } case ; +:: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst ) + {cc,swap} first2 :> ( cc swap? ) + swap? + [ src2 src1 rep cc ^^compare-vector ] + [ src1 src2 rep cc ^^compare-vector ] if ; + +:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst ) + rep orig-cc %compare-vector-ccs :> ( ccs not? ) + + ccs empty? + [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] + [ + ccs unclip :> ( rest-ccs first-cc ) + src1 src2 rep first-cc ^((compare-vector)) :> first-dst + + rest-ccs first-dst + [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ] + reduce + + not? [ rep generate-not-vector ] when + ] if ; + : ^compare-vector ( src1 src2 rep cc -- dst ) { - [ ^^compare-vector ] + [ ^(compare-vector) ] [ ^minmax-compare-vector ] { unsigned-int-vector-rep [| src1 src2 rep cc | rep sign-bit-mask ^^load-constant :> sign-bits src1 sign-bits rep ^^xor-vector src2 sign-bits rep ^^xor-vector - rep unsign-rep cc ^^compare-vector + rep unsign-rep cc ^(compare-vector) ] } } vv-cc-vector-op ; @@ -95,7 +118,7 @@ IN: compiler.cfg.intrinsics.simd { signed-int-vector-rep [| src rep | src src rep ^^merge-vector-head :> merged rep rep-component-type heap-size 8 * :> bits - merged bits rep ^widened-shr-vector-imm + merged bits rep widen-rep ^shr-vector-imm ] } { signed-int-vector-rep [| src rep | rep ^^zero-vector :> zero @@ -499,14 +522,23 @@ IN: compiler.cfg.intrinsics.simd } [ integer? ] emit-vl-vector-op ; : emit-alien-vector ( node -- ) - { - [ ^^alien-vector ] - } emit-alien-vector-op ; - + dup [ + '[ + ds-drop prepare-alien-getter + _ ^^alien-vector ds-push + ] + [ inline-alien-getter? ] inline-alien + ] with { [ %alien-vector-reps member? ] } if-literals-match ; + : emit-set-alien-vector ( node -- ) - { - [ ^^set-alien-vector ] - } emit-set-alien-vector-op ; + dup [ + '[ + ds-drop prepare-alien-setter ds-pop + _ ##set-alien-vector + ] + [ byte-array inline-alien-setter? ] + inline-alien + ] with { [ %alien-vector-reps member? ] } if-literals-match ; : enable-simd ( -- ) { From 324889e2acafec116c27bc525e9b5027d35ba921 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 11 Nov 2009 15:50:20 -0600 Subject: [PATCH 13/46] '[ ] should make an empty quot --- basis/fry/fry-tests.factor | 2 ++ basis/fry/fry.factor | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/basis/fry/fry-tests.factor b/basis/fry/fry-tests.factor index 10d9b282ad..f33eb276a0 100644 --- a/basis/fry/fry-tests.factor +++ b/basis/fry/fry-tests.factor @@ -5,6 +5,8 @@ IN: fry.tests SYMBOLS: a b c d e f g h ; +[ [ ] ] [ '[ ] ] unit-test +[ [ + ] ] [ '[ + ] ] unit-test [ [ 1 ] ] [ 1 '[ _ ] ] unit-test [ [ 1 ] ] [ [ 1 ] '[ @ ] ] unit-test [ [ 1 2 ] ] [ [ 1 ] [ 2 ] '[ @ @ ] ] unit-test diff --git a/basis/fry/fry.factor b/basis/fry/fry.factor index 931397e933..e58253692f 100644 --- a/basis/fry/fry.factor +++ b/basis/fry/fry.factor @@ -136,10 +136,12 @@ TUPLE: dredge-fry-state PRIVATE> M: callable fry ( quot -- quot' ) - 0 swap - [ dredge-fry ] [ - [ prequot>> >quotation ] - [ quot>> >quotation shallow-fry ] bi append - ] bi ; + [ [ [ ] ] ] [ + 0 swap + [ dredge-fry ] [ + [ prequot>> >quotation ] + [ quot>> >quotation shallow-fry ] bi append + ] bi + ] if-empty ; SYNTAX: '[ parse-quotation fry append! ; From e323071c44d263edc9710ce44da5c3588ebfe9f1 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 11 Nov 2009 16:08:40 -0600 Subject: [PATCH 14/46] sever lingering dependencies on simd from compiler --- .../compiler/cfg/intrinsics/intrinsics.factor | 2 - .../value-numbering/rewrite/rewrite.factor | 93 +------------- .../cfg/value-numbering/simd/simd.factor | 120 ++++++++++++++++++ .../value-numbering/simplify/simplify.factor | 12 +- .../value-numbering-tests.factor | 2 +- .../math/vectors/simd/mirrors/mirrors.factor | 3 + basis/math/vectors/simd/simd.factor | 7 +- basis/math/vectors/vectors.factor | 3 +- .../specialized-arrays/mirrors/mirrors.factor | 2 - .../specialized-arrays.factor | 9 +- 10 files changed, 135 insertions(+), 118 deletions(-) create mode 100644 basis/compiler/cfg/value-numbering/simd/simd.factor create mode 100644 basis/math/vectors/simd/mirrors/mirrors.factor diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 632c32b12f..f4bbcdb255 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -7,7 +7,6 @@ compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.allot compiler.cfg.intrinsics.fixnum compiler.cfg.intrinsics.float -compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.slots compiler.cfg.intrinsics.misc compiler.cfg.comparisons ; @@ -23,7 +22,6 @@ QUALIFIED: classes.tuple.private QUALIFIED: math.private QUALIFIED: math.integers.private QUALIFIED: math.floats.private -QUALIFIED: math.vectors.simd.intrinsics QUALIFIED: math.libm IN: compiler.cfg.intrinsics diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 4864a8bfb7..746fe0e5ea 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators combinators.short-circuit arrays fry kernel layouts math namespaces sequences cpu.architecture -math.bitwise math.order math.vectors.simd.intrinsics classes +math.bitwise math.order classes vectors locals make alien.c-types io.binary grouping compiler.cfg compiler.cfg.registers @@ -463,100 +463,9 @@ M: ##alien-signed-2 rewrite rewrite-alien-addressing ; M: ##alien-signed-4 rewrite rewrite-alien-addressing ; M: ##alien-float rewrite rewrite-alien-addressing ; M: ##alien-double rewrite rewrite-alien-addressing ; -M: ##alien-vector rewrite rewrite-alien-addressing ; M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ; M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ; M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ; M: ##set-alien-float rewrite rewrite-alien-addressing ; M: ##set-alien-double rewrite rewrite-alien-addressing ; -M: ##set-alien-vector rewrite rewrite-alien-addressing ; -! Some lame constant folding for SIMD intrinsics. Eventually this -! should be redone completely. - -: rewrite-shuffle-vector-imm ( insn expr -- insn' ) - 2dup [ rep>> ] bi@ eq? [ - [ [ dst>> ] [ src>> vn>vreg ] bi* ] - [ [ shuffle>> ] bi@ nths ] - [ drop rep>> ] - 2tri \ ##shuffle-vector-imm new-insn - ] [ 2drop f ] if ; - -: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' ) - 2dup length swap length /i group nths concat ; - -: fold-shuffle-vector-imm ( insn expr -- insn' ) - [ [ dst>> ] [ shuffle>> ] bi ] dip value>> - (fold-shuffle-vector-imm) \ ##load-constant new-insn ; - -M: ##shuffle-vector-imm rewrite - dup src>> vreg>expr { - { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] } - { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] } - { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] } - [ 2drop f ] - } cond ; - -: (fold-scalar>vector) ( insn bytes -- insn' ) - [ [ dst>> ] [ rep>> rep-components ] bi ] dip concat - \ ##load-constant new-insn ; - -: fold-scalar>vector ( insn expr -- insn' ) - value>> over rep>> { - { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] } - { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] } - [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ] - } case ; - -M: ##scalar>vector rewrite - dup src>> vreg>expr dup constant-expr? - [ fold-scalar>vector ] [ 2drop f ] if ; - -M: ##xor-vector rewrite - dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq? - [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ; - -: vector-not? ( expr -- ? ) - { - [ not-vector-expr? ] - [ { - [ xor-vector-expr? ] - [ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ] - } 1&& ] - } 1|| ; - -GENERIC: vector-not-src ( expr -- vreg ) -M: not-vector-expr vector-not-src src>> vn>vreg ; -M: xor-vector-expr vector-not-src - dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ; - -M: ##and-vector rewrite - { - { [ dup src1>> vreg>expr vector-not? ] [ - { - [ dst>> ] - [ src1>> vreg>expr vector-not-src ] - [ src2>> ] - [ rep>> ] - } cleave \ ##andn-vector new-insn - ] } - { [ dup src2>> vreg>expr vector-not? ] [ - { - [ dst>> ] - [ src2>> vreg>expr vector-not-src ] - [ src1>> ] - [ rep>> ] - } cleave \ ##andn-vector new-insn - ] } - [ drop f ] - } cond ; - -M: ##andn-vector rewrite - dup src1>> vreg>expr vector-not? [ - { - [ dst>> ] - [ src1>> vreg>expr vector-not-src ] - [ src2>> ] - [ rep>> ] - } cleave \ ##and-vector new-insn - ] [ drop f ] if ; diff --git a/basis/compiler/cfg/value-numbering/simd/simd.factor b/basis/compiler/cfg/value-numbering/simd/simd.factor new file mode 100644 index 0000000000..9bb4453313 --- /dev/null +++ b/basis/compiler/cfg/value-numbering/simd/simd.factor @@ -0,0 +1,120 @@ +! Copyright (C) 2008, 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators combinators.short-circuit arrays +fry kernel layouts math namespaces sequences cpu.architecture +math.bitwise math.order classes +vectors locals make alien.c-types io.binary grouping +math.vectors.simd +compiler.cfg +compiler.cfg.registers +compiler.cfg.comparisons +compiler.cfg.instructions +compiler.cfg.value-numbering.expressions +compiler.cfg.value-numbering.graph +compiler.cfg.value-numbering.rewrite +compiler.cfg.value-numbering.simplify ; +IN: compiler.cfg.value-numbering.simd + +M: ##alien-vector rewrite rewrite-alien-addressing ; +M: ##set-alien-vector rewrite rewrite-alien-addressing ; + +! Some lame constant folding for SIMD intrinsics. Eventually this +! should be redone completely. + +: rewrite-shuffle-vector-imm ( insn expr -- insn' ) + 2dup [ rep>> ] bi@ eq? [ + [ [ dst>> ] [ src>> vn>vreg ] bi* ] + [ [ shuffle>> ] bi@ nths ] + [ drop rep>> ] + 2tri \ ##shuffle-vector-imm new-insn + ] [ 2drop f ] if ; + +: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' ) + 2dup length swap length /i group nths concat ; + +: fold-shuffle-vector-imm ( insn expr -- insn' ) + [ [ dst>> ] [ shuffle>> ] bi ] dip value>> + (fold-shuffle-vector-imm) \ ##load-constant new-insn ; + +M: ##shuffle-vector-imm rewrite + dup src>> vreg>expr { + { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] } + { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] } + { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] } + [ 2drop f ] + } cond ; + +: (fold-scalar>vector) ( insn bytes -- insn' ) + [ [ dst>> ] [ rep>> rep-length ] bi ] dip concat + \ ##load-constant new-insn ; + +: fold-scalar>vector ( insn expr -- insn' ) + value>> over rep>> { + { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] } + { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] } + [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ] + } case ; + +M: ##scalar>vector rewrite + dup src>> vreg>expr dup constant-expr? + [ fold-scalar>vector ] [ 2drop f ] if ; + +M: ##xor-vector rewrite + dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq? + [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ; + +: vector-not? ( expr -- ? ) + { + [ not-vector-expr? ] + [ { + [ xor-vector-expr? ] + [ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ] + } 1&& ] + } 1|| ; + +GENERIC: vector-not-src ( expr -- vreg ) +M: not-vector-expr vector-not-src src>> vn>vreg ; +M: xor-vector-expr vector-not-src + dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ; + +M: ##and-vector rewrite + { + { [ dup src1>> vreg>expr vector-not? ] [ + { + [ dst>> ] + [ src1>> vreg>expr vector-not-src ] + [ src2>> ] + [ rep>> ] + } cleave \ ##andn-vector new-insn + ] } + { [ dup src2>> vreg>expr vector-not? ] [ + { + [ dst>> ] + [ src2>> vreg>expr vector-not-src ] + [ src1>> ] + [ rep>> ] + } cleave \ ##andn-vector new-insn + ] } + [ drop f ] + } cond ; + +M: ##andn-vector rewrite + dup src1>> vreg>expr vector-not? [ + { + [ dst>> ] + [ src1>> vreg>expr vector-not-src ] + [ src2>> ] + [ rep>> ] + } cleave \ ##and-vector new-insn + ] [ drop f ] if ; + +M: scalar>vector-expr simplify* + src>> vn>expr { + { [ dup vector>scalar-expr? ] [ src>> ] } + [ drop f ] + } cond ; + +M: shuffle-vector-imm-expr simplify* + [ src>> ] [ shuffle>> ] [ rep>> rep-length iota ] tri + sequence= [ drop f ] unless ; + diff --git a/basis/compiler/cfg/value-numbering/simplify/simplify.factor b/basis/compiler/cfg/value-numbering/simplify/simplify.factor index df3dc6aab9..7a95711b01 100644 --- a/basis/compiler/cfg/value-numbering/simplify/simplify.factor +++ b/basis/compiler/cfg/value-numbering/simplify/simplify.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors combinators classes math layouts -sequences math.vectors.simd.intrinsics +sequences compiler.cfg.instructions compiler.cfg.value-numbering.graph compiler.cfg.value-numbering.expressions ; @@ -130,16 +130,6 @@ M: box-displaced-alien-expr simplify* [ 2drop f ] } cond ; -M: scalar>vector-expr simplify* - src>> vn>expr { - { [ dup vector>scalar-expr? ] [ src>> ] } - [ drop f ] - } cond ; - -M: shuffle-vector-imm-expr simplify* - [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri - sequence= [ drop f ] unless ; - M: expr simplify* drop f ; : simplify ( expr -- vn ) diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index b404c4d4a4..ac992ff98d 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce compiler.cfg.ssa.destruction compiler.cfg.loop-detection compiler.cfg.representations compiler.cfg assocs vectors arrays -layouts literals namespaces alien ; +layouts literals namespaces alien compiler.cfg.value-numbering.simd ; IN: compiler.cfg.value-numbering.tests : trim-temps ( insns -- insns ) diff --git a/basis/math/vectors/simd/mirrors/mirrors.factor b/basis/math/vectors/simd/mirrors/mirrors.factor new file mode 100644 index 0000000000..e8a103d449 --- /dev/null +++ b/basis/math/vectors/simd/mirrors/mirrors.factor @@ -0,0 +1,3 @@ +USING: math.vectors.simd mirrors ; +IN: math.vectors.simd.mirrors +INSTANCE: simd-128 enumerated-sequence diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index c155c797ff..4953abb4ea 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -320,7 +320,7 @@ M: simd-128 n+v [ simd-with ] keep v+ ; inline M: simd-128 n-v [ simd-with ] keep v- ; inline M: simd-128 n*v [ simd-with ] keep v* ; inline M: simd-128 n/v [ simd-with ] keep v/ ; inline -M: simd-128 v+n over simd-with v+ +M: simd-128 v+n over simd-with v+ ; inline M: simd-128 v-n over simd-with v- ; inline M: simd-128 v*n over simd-with v* ; inline M: simd-128 v/n over simd-with v/ ; inline @@ -336,3 +336,8 @@ M: simd-128 vshuffle ( u perm -- v ) "compiler.tree.propagation.simd" require "compiler.cfg.intrinsics.simd" require +"compiler.cfg.value-numbering.simd" require + +"mirrors" vocab [ + "math.vectors.simd.mirrors" require +] when diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index f3dfcda18a..d524ba309f 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -97,7 +97,7 @@ M: object vbitor [ bitor ] 2map ; GENERIC: vbitxor ( u v -- w ) M: object vbitxor [ bitxor ] 2map ; GENERIC: vbitnot ( u -- w ) -M: object vbitnot [ bitnot ] 2map ; +M: object vbitnot [ bitnot ] map ; GENERIC# vbroadcast 1 ( u n -- v ) M:: object vbroadcast ( u n -- v ) u length n u nth u like ; @@ -133,6 +133,7 @@ GENERIC: (vmerge-tail) ( u v -- t ) M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ; GENERIC: (vmerge) ( u v -- h t ) +M: object (vmerge) [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline GENERIC: vmerge ( u v -- w ) diff --git a/basis/specialized-arrays/mirrors/mirrors.factor b/basis/specialized-arrays/mirrors/mirrors.factor index ee7953b501..eea9e83b58 100644 --- a/basis/specialized-arrays/mirrors/mirrors.factor +++ b/basis/specialized-arrays/mirrors/mirrors.factor @@ -4,5 +4,3 @@ USING: mirrors specialized-arrays math.vectors ; IN: specialized-arrays.mirrors INSTANCE: specialized-array enumerated-sequence -INSTANCE: simd-128 enumerated-sequence -INSTANCE: simd-256 enumerated-sequence diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index 711354d803..b6f7209cc6 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -2,8 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer -libc math math.vectors math.vectors.private -math.vectors.specialization namespaces +libc math math.vectors math.vectors.private namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader vocabs.parser vocabs.generated words fry combinators make ; @@ -69,8 +68,6 @@ TUPLE: A [ drop \ T bad-byte-array-length ] unless ; inline -M: A new-underlying drop byte-array>A ; - M: A clone [ underlying>> clone ] [ length>> ] bi ; inline M: A length length>> ; inline @@ -96,8 +93,6 @@ M: A resize M: A byte-length length \ T heap-size * ; inline -M: A element-type drop \ T ; inline - M: A direct-array-syntax drop \ A@ ; M: A pprint-delims drop \ A{ \ } ; @@ -109,8 +104,6 @@ SYNTAX: A@ scan-object scan-object suffix! ; INSTANCE: A specialized-array -A T c-type-boxed-class f specialize-vector-words - ;FUNCTOR GENERIC: (underlying-type) ( c-type -- c-type' ) From 8a8699ac984aed542b748ffe2f7fa1791d8a38f1 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 14 Nov 2009 20:59:03 -0600 Subject: [PATCH 15/46] backend fixups --- .../intrinsics/simd/backend/backend.factor | 60 ++--- .../compiler/cfg/intrinsics/simd/simd.factor | 220 ++++++++++-------- .../tree/propagation/simd/simd.factor | 3 +- basis/math/vectors/simd/simd-docs.factor | 37 +-- basis/math/vectors/simd/simd.factor | 67 ++++-- 5 files changed, 199 insertions(+), 188 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index 4fe9774282..90514c6cc9 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -1,5 +1,10 @@ ! (c)2009 Joe Groff bsd license -USING: accessors fry generalizations kernel locals math sequences +USING: accessors arrays classes combinators +combinators.short-circuit compiler.cfg.builder.blocks +compiler.cfg.registers compiler.cfg.stacks +compiler.cfg.stacks.local compiler.tree.propagation.info +cpu.architecture effects fry generalizations help.lint.checks +kernel locals macros math namespaces quotations sequences splitting words ; IN: compiler.cfg.intrinsics.simd.backend @@ -8,55 +13,51 @@ IN: compiler.cfg.intrinsics.simd.backend : can-has? ( quot -- ? ) [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline -GENERIC: create-can-has-word ( word -- word' ) +GENERIC: create-can-has ( word -- word' ) -PREDICATE: vector-op-word +PREDICATE: vector-op-word < word { [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ] - [ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ] + [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ] } 1&& ; : reps-word ( word -- word' ) name>> "^^" ?head drop "##" ?head drop "%" "-reps" surround "cpu.architecture" lookup ; -:: can-has-^^-quot ( word def effect -- def' ) +:: can-has-^^-quot ( word def effect -- quot ) effect in>> { "rep" } split1 [ length ] bi@ 1 + word reps-word effect out>> length f >quotation '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ; -:: can-has-^-quot ( word def effect -- def' ) +:: can-has-^-quot ( word def effect -- quot ) def create-can-has ; -M: object create-can-has ; +M: object create-can-has 1quotation ; -M: sequence create-can-has - [ create-can-has-word ] map ; +M: array create-can-has + [ create-can-has ] map concat ; +M: callable create-can-has + [ create-can-has ] map concat ; -: (create-can-has-word) ( word -- word' created? ) - name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" - 2dup lookup - [ 2nip f ] [ create t ] if* ; +: (can-has-word) ( word -- word' ) + name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ; -: (create-can-has-quot) ( word -- def effect ) - [ ] [ def>> ] [ stack-effect ] tri [ - { - { [ pick "^^" head? ] [ can-has-^^-quot ] } - { [ pick "##" head? ] [ can-has-^^-quot ] } - { [ pick "^" head? ] [ can-has-^-quot ] } - } cond - ] keep ; +: (can-has-quot) ( word -- quot ) + [ ] [ def>> ] [ stack-effect ] tri { + { [ pick name>> "^^" head? ] [ can-has-^^-quot ] } + { [ pick name>> "##" head? ] [ can-has-^^-quot ] } + { [ pick name>> "^" head? ] [ can-has-^-quot ] } + } cond ; M: vector-op-word create-can-has - [ (create-can-has-word) ] keep - '[ _ (create-can-has-quot) define-declared ] - [ nip ] if ; + dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ; GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair ) -M:: callable >can-has-cond +M:: callable >can-has-cond ( quot #pick #dup -- quotpair ) #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ; - + M:: pair >can-has-cond ( pair #pick #dup -- quotpair ) pair first2 :> ( class quot ) #pick class #dup quot create-can-has @@ -113,7 +114,7 @@ CONSTANT: [quaternary] -4 inc-d ] -:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ; +:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) params-quot trials op-quot literal-preds '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ; @@ -126,10 +127,11 @@ MACRO: emit-vv-vector-op ( trials -- ) MACRO: emit-vvvv-vector-op ( trials -- ) [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ; -MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- ) - literal-pred trials literal-pred trials +MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- ) + literal-pred imm-trials literal-pred var-trials '[ dup node-input-infos 2 tail-slice* first literal>> @ [ _ _ emit-vl-vector-op ] [ _ emit-vv-vector-op ] if ] ; + diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 1cf076af1d..512df6c129 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,18 +1,20 @@ ! Copyright (C) 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien byte-arrays fry classes.algebra -cpu.architecture kernel math sequences math.vectors -math.vectors.simd macros generalizations combinators -combinators.short-circuit arrays locals -compiler.tree.propagation.info compiler.cfg.builder.blocks +USING: accessors alien alien.c-types byte-arrays fry +classes.algebra cpu.architecture kernel math sequences +math.vectors math.vectors.simd math.vectors.simd.private +macros generalizations combinators combinators.short-circuit +arrays locals compiler.tree.propagation.info +compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers +compiler.cfg.intrinsics compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.simd.backend specialized-arrays ; FROM: alien.c-types => heap-size char short int longlong float double ; -SPECIALIZED-ARRAYS: char short int longlong float double ; +SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ; IN: compiler.cfg.intrinsics.simd ! compound vector ops @@ -69,8 +71,14 @@ IN: compiler.cfg.intrinsics.simd mask false rep ^^andn-vector rep ^^or-vector ; -: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) - order-cc { +: ^not-vector ( src rep -- dst ) + { + [ ^^not-vector ] + [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + } v-vector-op ; + +:: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) + cc order-cc { { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] } { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] } { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] } @@ -96,7 +104,7 @@ IN: compiler.cfg.intrinsics.simd [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ] reduce - not? [ rep generate-not-vector ] when + not? [ rep ^not-vector ] when ] if ; : ^compare-vector ( src1 src2 rep cc -- dst ) @@ -118,7 +126,7 @@ IN: compiler.cfg.intrinsics.simd { signed-int-vector-rep [| src rep | src src rep ^^merge-vector-head :> merged rep rep-component-type heap-size 8 * :> bits - merged bits rep widen-rep ^shr-vector-imm + merged bits rep widen-vector-rep ^^shr-vector-imm ] } { signed-int-vector-rep [| src rep | rep ^^zero-vector :> zero @@ -135,7 +143,7 @@ IN: compiler.cfg.intrinsics.simd { signed-int-vector-rep [| src rep | src src rep ^^merge-vector-tail :> merged rep rep-component-type heap-size 8 * :> bits - merged bits rep ^widened-shr-vector-imm + merged bits rep widen-vector-rep ^^shr-vector-imm ] } { signed-int-vector-rep [| src rep | rep ^^zero-vector :> zero @@ -144,7 +152,7 @@ IN: compiler.cfg.intrinsics.simd ] } } v-vector-op ; -: ^(sum-2) ( src rep -- dst ) +: ^(sum-vector-2) ( src rep -- dst ) { [ dupd ^^horizontal-add-vector ] [| src rep | @@ -154,7 +162,7 @@ IN: compiler.cfg.intrinsics.simd ] } v-vector-op ; -: ^(sum-4) ( src rep -- dst ) +: ^(sum-vector-4) ( src rep -- dst ) { [ [ dupd ^^horizontal-add-vector ] @@ -165,14 +173,14 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector ] } v-vector-op ; -: ^(sum-8) ( src rep -- dst ) +: ^(sum-vector-8) ( src rep -- dst ) { [ [ dupd ^^horizontal-add-vector ] @@ -184,19 +192,19 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector :> src'' - rep' widen-rep :> rep'' + rep' widen-vector-rep :> rep'' src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-tail :> tail'' head'' tail'' rep ^^add-vector ] } v-vector-op ; -: ^(sum-16) ( src rep -- dst ) +: ^(sum-vector-16) ( src rep -- dst ) { [ { @@ -211,17 +219,17 @@ IN: compiler.cfg.intrinsics.simd src src rep ^^merge-vector-tail :> tail head tail rep ^^add-vector :> src' - rep widen-rep :> rep' + rep widen-vector-rep :> rep' src' src' rep' ^^merge-vector-head :> head' src' src' rep' ^^merge-vector-tail :> tail' head' tail' rep ^^add-vector :> src'' - rep' widen-rep :> rep'' + rep' widen-vector-rep :> rep'' src'' src'' rep'' ^^merge-vector-head :> head'' src'' src'' rep'' ^^merge-vector-tail :> tail'' head'' tail'' rep ^^add-vector :> src''' - rep'' widen-rep :> rep''' + rep'' widen-vector-rep :> rep''' src''' src''' rep''' ^^merge-vector-head :> head''' src''' src''' rep''' ^^merge-vector-tail :> tail''' head''' tail''' rep ^^add-vector @@ -230,11 +238,11 @@ IN: compiler.cfg.intrinsics.simd : ^(sum-vector) ( src rep -- dst ) [ - rep-length { - { 2 [ ^(sum-2) ] } - { 4 [ ^(sum-4) ] } - { 8 [ ^(sum-8) ] } - { 16 [ ^(sum-16) ] } + dup rep-length { + { 2 [ ^(sum-vector-2) ] } + { 4 [ ^(sum-vector-4) ] } + { 8 [ ^(sum-vector-8) ] } + { 16 [ ^(sum-vector-16) ] } } case ] [ ^^vector>scalar ] bi ; @@ -244,11 +252,29 @@ IN: compiler.cfg.intrinsics.simd { int-vector-rep [| src rep | src rep ^unpack-vector-head :> head src rep ^unpack-vector-tail :> tail - rep widen-rep :> wide-rep + rep widen-vector-rep :> wide-rep head tail wide-rep ^^add-vector wide-rep ^(sum-vector) ] } } v-vector-op ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + +: ^shuffle-vector-imm ( src1 src2 rep -- dst ) + { + [ ^^shuffle-vector-imm ] + [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ] + } vl-vector-op ; + +: ^broadcast-vector ( src n rep -- dst ) + [ rep-length swap ] keep + ^shuffle-vector-imm ; + +: ^with-vector ( src rep -- dst ) + [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ; + +: ^select-vector ( src n rep -- dst ) + [ ^broadcast-vector ] keep ^^vector>scalar ; + ! intrinsic emitters : emit-simd-v+ ( node -- ) @@ -380,8 +406,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vnot ( node -- ) { - [ ^^not-vector ] - [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] + [ ^not-vector ] } emit-v-vector-op ; : emit-simd-vlshift ( node -- ) @@ -408,12 +433,9 @@ IN: compiler.cfg.intrinsics.simd [ ^^horizontal-shr-vector-imm ] } [ integer? ] emit-vl-vector-op ; -: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; - : emit-simd-vshuffle-elements ( node -- ) { - [ ^^shuffle-vector-imm ] - [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ] + [ ^shuffle-vector-imm ] } [ shuffle? ] emit-vl-vector-op ; : emit-simd-vshuffle-bytes ( node -- ) @@ -458,28 +480,28 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vany? ( node -- ) { - [ vcc-any ^test-vector ] + [ vcc-any ^^test-vector ] } emit-vv-vector-op ; : emit-simd-vall? ( node -- ) { - [ vcc-all ^test-vector ] + [ vcc-all ^^test-vector ] } emit-vv-vector-op ; : emit-simd-vnone? ( node -- ) { - [ vcc-none ^test-vector ] + [ vcc-none ^^test-vector ] } emit-vv-vector-op ; : emit-simd-v>float ( node -- ) { { float-vector-rep [ drop ] } { int-vector-rep [ ^^integer>float-vector ] } - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-v>integer ( node -- ) { { float-vector-rep [ ^^float>integer-vector ] } { int-vector-rep [ dup ] } - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-vpack-signed ( node -- ) { @@ -503,7 +525,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-with ( node -- ) { - [ ^^with-vector ] + [ ^with-vector ] } emit-v-vector-op ; : emit-simd-gather-2 ( node -- ) @@ -518,7 +540,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-select ( node -- ) { - [ ^^select-vector ] + [ ^select-vector ] } [ integer? ] emit-vl-vector-op ; : emit-alien-vector ( node -- ) @@ -540,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd inline-alien ] with { [ %alien-vector-reps member? ] } if-literals-match ; -: enable-simd ( -- ) - { - { (simd-v+) [ emit-simd-v+ ] } - { (simd-v-) [ emit-simd-v- ] } - { (simd-vneg) [ emit-simd-vneg ] } - { (simd-v+-) [ emit-simd-v+- ] } - { (simd-vs+) [ emit-simd-vs+ ] } - { (simd-vs-) [ emit-simd-vs- ] } - { (simd-vs*) [ emit-simd-vs* ] } - { (simd-v*) [ emit-simd-v* ] } - { (simd-v/) [ emit-simd-v/ ] } - { (simd-vmin) [ emit-simd-vmin ] } - { (simd-vmax) [ emit-simd-vmax ] } - { (simd-v.) [ emit-simd-v. ] } - { (simd-vsqrt) [ emit-simd-vsqrt ] } - { (simd-sum) [ emit-simd-sum ] } - { (simd-vabs) [ emit-simd-vabs ] } - { (simd-vbitand) [ emit-simd-vand ] } - { (simd-vbitandn) [ emit-simd-vandn ] } - { (simd-vbitor) [ emit-simd-vor ] } - { (simd-vbitxor) [ emit-simd-vxor ] } - { (simd-vbitnot) [ emit-simd-vnot ] } - { (simd-vand) [ emit-simd-vand ] } - { (simd-vandn) [ emit-simd-vandn ] } - { (simd-vor) [ emit-simd-vor ] } - { (simd-vxor) [ emit-simd-vxor ] } - { (simd-vnot) [ emit-simd-vnot ] } - { (simd-vlshift) [ emit-simd-vlshift ] } - { (simd-vrshift) [ emit-simd-vrshift ] } - { (simd-hlshift) [ emit-simd-hlshift ] } - { (simd-hrshift) [ emit-simd-hrshift ] } - { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } - { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } - { (simd-vmerge-head) [ emit-simd-vmerge-head ] } - { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } - { (simd-v<=) [ emit-simd-v<= ] } - { (simd-v<) [ emit-simd-v< ] } - { (simd-v=) [ emit-simd-v= ] } - { (simd-v>) [ emit-simd-v> ] } - { (simd-v>=) [ emit-simd-v>= ] } - { (simd-vunordered?) [ emit-simd-vunordered? ] } - { (simd-vany?) [ emit-simd-vany? ] } - { (simd-vall?) [ emit-simd-vall? ] } - { (simd-vnone?) [ emit-simd-vnone? ] } - { (simd-v>float) [ emit-simd-v>float ] } - { (simd-v>integer) [ emit-simd-v>integer ] } - { (simd-vpack-signed) [ emit-simd-vpack-signed ] } - { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } - { (simd-vunpack-head) [ emit-simd-vunpack-head ] } - { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } - { (simd-with) [ emit-simd-with ] } - { (simd-gather-2) [ emit-simd-gather-2 ] } - { (simd-gather-4) [ emit-simd-gather-4 ] } - { (simd-select) [ emit-simd-select ] } - { alien-vector [ emit-alien-vector ] } - { set-alien-vector [ emit-set-alien-vector ] } - } enable-intrinsics ; - -enable-simd +! : enable-simd ( -- ) +! { +! { (simd-v+) [ emit-simd-v+ ] } +! { (simd-v-) [ emit-simd-v- ] } +! { (simd-vneg) [ emit-simd-vneg ] } +! { (simd-v+-) [ emit-simd-v+- ] } +! { (simd-vs+) [ emit-simd-vs+ ] } +! { (simd-vs-) [ emit-simd-vs- ] } +! { (simd-vs*) [ emit-simd-vs* ] } +! { (simd-v*) [ emit-simd-v* ] } +! { (simd-v/) [ emit-simd-v/ ] } +! { (simd-vmin) [ emit-simd-vmin ] } +! { (simd-vmax) [ emit-simd-vmax ] } +! { (simd-v.) [ emit-simd-v. ] } +! { (simd-vsqrt) [ emit-simd-vsqrt ] } +! { (simd-sum) [ emit-simd-sum ] } +! { (simd-vabs) [ emit-simd-vabs ] } +! { (simd-vbitand) [ emit-simd-vand ] } +! { (simd-vbitandn) [ emit-simd-vandn ] } +! { (simd-vbitor) [ emit-simd-vor ] } +! { (simd-vbitxor) [ emit-simd-vxor ] } +! { (simd-vbitnot) [ emit-simd-vnot ] } +! { (simd-vand) [ emit-simd-vand ] } +! { (simd-vandn) [ emit-simd-vandn ] } +! { (simd-vor) [ emit-simd-vor ] } +! { (simd-vxor) [ emit-simd-vxor ] } +! { (simd-vnot) [ emit-simd-vnot ] } +! { (simd-vlshift) [ emit-simd-vlshift ] } +! { (simd-vrshift) [ emit-simd-vrshift ] } +! { (simd-hlshift) [ emit-simd-hlshift ] } +! { (simd-hrshift) [ emit-simd-hrshift ] } +! { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } +! { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } +! { (simd-vmerge-head) [ emit-simd-vmerge-head ] } +! { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } +! { (simd-v<=) [ emit-simd-v<= ] } +! { (simd-v<) [ emit-simd-v< ] } +! { (simd-v=) [ emit-simd-v= ] } +! { (simd-v>) [ emit-simd-v> ] } +! { (simd-v>=) [ emit-simd-v>= ] } +! { (simd-vunordered?) [ emit-simd-vunordered? ] } +! { (simd-vany?) [ emit-simd-vany? ] } +! { (simd-vall?) [ emit-simd-vall? ] } +! { (simd-vnone?) [ emit-simd-vnone? ] } +! { (simd-v>float) [ emit-simd-v>float ] } +! { (simd-v>integer) [ emit-simd-v>integer ] } +! { (simd-vpack-signed) [ emit-simd-vpack-signed ] } +! { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } +! { (simd-vunpack-head) [ emit-simd-vunpack-head ] } +! { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } +! { (simd-with) [ emit-simd-with ] } +! { (simd-gather-2) [ emit-simd-gather-2 ] } +! { (simd-gather-4) [ emit-simd-gather-4 ] } +! { (simd-select) [ emit-simd-select ] } +! { alien-vector [ emit-alien-vector ] } +! { set-alien-vector [ emit-set-alien-vector ] } +! } enable-intrinsics ; +! +! enable-simd diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1eac88598b..b71a34e938 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math -math.intervals math.vectors.simd ; +math.intervals math.vectors.simd math.vectors.simd.private ; IN: compiler.tree.propagation.simd { @@ -51,7 +51,6 @@ IN: compiler.tree.propagation.simd (simd-gather-2) (simd-gather-4) alien-vector - alien-vector-aligned } [ { byte-array } "default-output-classes" set-word-prop ] each : scalar-output-class ( rep -- class ) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 2fbe823965..d600b0bc24 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,6 +1,6 @@ USING: classes.tuple.private cpu.architecture help.markup help.syntax kernel.private math math.vectors -math.vectors.simd.intrinsics sequences ; +sequences ; IN: math.vectors.simd ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support" @@ -23,7 +23,7 @@ $nl $nl "SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." +"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." $nl "SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "." $nl @@ -36,26 +36,7 @@ $nl ARTICLE: "math.vectors.simd.types" "SIMD vector types" "Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." $nl -"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" -{ $subsections - POSTPONE: SIMD: - POSTPONE: SIMDS: -} -"The following scalar types are supported:" -{ $code - "char" - "uchar" - "short" - "ushort" - "int" - "uint" - "longlong" - "ulonglong" - "float" - "double" -} - -"The following vector types are generated from the above scalar types:" +"The following vector types are available:" { $code "char-16" "uchar-16" @@ -218,16 +199,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "math.vectors.simd.intrinsics" } ; -HELP: SIMD: -{ $syntax "SIMD: type" } -{ $values { "type" "a scalar C type" } } -{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ; - -HELP: SIMDS: -{ $syntax "SIMDS: type type type ... ;" } -{ $values { "type" "a scalar C type" } } -{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; - -{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words - ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 4953abb4ea..10305c673a 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,5 +1,9 @@ -! (c)2009 Slava Pestov, Joe Groff bsd license -USING: math.vectors math.vectors.private ; +USING: accessors alien.c-types byte-arrays classes combinators +cpu.architecture fry functors generalizations generic +generic.parser kernel lexer literals macros math math.functions +math.vectors math.vectors.private namespaces parser +prettyprint.custom quotations sequences sequences.private vocabs +vocabs.loader ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd @@ -8,8 +12,11 @@ DEFER: simd-with DEFER: simd-boa DEFER: simd-cast -> @ ] keep new-underlying ; inline +PRIVATE> +>> + + + : alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; + ;FUNCTOR SYNTAX: SIMD-128: - scan scan-word define-simd-128 ; + scan define-simd-128 ; -PRIVATE> >> +PRIVATE> + +>> SIMD-128: char-16 SIMD-128: uchar-16 @@ -176,16 +194,14 @@ SIMD-128: ulonglong-2 SIMD-128: float-4 SIMD-128: double-2 -ERROR: bad-simd-call word ; -ERROR: bad-simd-length got expected ; - : assert-positive ( x -- y ) ; ! SIMD vectors as sequences +M: simd-128 hashcode* underlying>> hashcode* ; inline M: simd-128 clone [ clone ] change-underlying ; inline M: simd-128 length simd-rep rep-length ; inline -M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline +M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline M: simd-128 c:byte-length drop 16 ; inline M: simd-128 new-sequence @@ -193,16 +209,13 @@ M: simd-128 new-sequence [ nip [ 16 (byte-array) ] make-underlying ] [ length bad-simd-length ] if ; inline -M: simd-128 equal? - [ v= vall? ] [ 2drop f ] if-vectors-match ; inline - M: simd-128 >pprint-sequence ; M: simd-128 pprint* pprint-object ; INSTANCE: simd-128 sequence ! Unboxers for SIMD operations - +<< v-op ( a quot: ( (a) rep -- (c) ) -- c ) [ simd-unbox ] dip 2curry make-underlying ; inline +: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c ) + [ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline + : simd-v->n-op ( a quot: ( (a) rep -- n ) -- n ) [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline @@ -241,7 +257,7 @@ INSTANCE: simd-128 sequence [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline : (simd-method-fallback) ( accum word -- accum ) - [ current-method get \ (call-next-method) [ ] 2sequence suffix! ] + [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ] dip suffix! ; SYNTAX: simd-vv->v-op @@ -252,6 +268,10 @@ SYNTAX: simd-vv->n-op \ (simd-vv->n-op) (simd-method-fallback) ; PRIVATE> +>> + +M: simd-128 equal? + [ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline ! SIMD constructors @@ -283,26 +303,26 @@ M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inl M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline -M: simd-128 sum [ (simd-sum) ] simd-vv->n-op ; inline +M: simd-128 sum [ (simd-sum) ] simd-v->n-op ; inline M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline -M: simd-128 vbitnot [ (simd-vbitnot) ] simd-vv->v-op ; inline +M: simd-128 vbitnot [ (simd-vbitnot) ] simd-v->v-op ; inline M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline -M: simd-128 vnot [ (simd-vnot) ] simd-vv->v-op ; inline +M: simd-128 vnot [ (simd-vnot) ] simd-v->v-op ; inline M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline -M: simd-128 vmerge-head [ (simd-vmerge-head) ] simd-vv->v-op ; inline -M: simd-128 vmerge-tail [ (simd-vmerge-tail) ] simd-vv->v-op ; inline +M: simd-128 (vmerge-head) [ (simd-vmerge-head) ] simd-vv->v-op ; inline +M: simd-128 (vmerge-tail) [ (simd-vmerge-tail) ] simd-vv->v-op ; inline M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline @@ -326,7 +346,6 @@ M: simd-128 v*n over simd-with v* ; inline M: simd-128 v/n over simd-with v/ ; inline M: simd-128 norm-sq dup v. assert-positive ; inline M: simd-128 norm norm-sq sqrt ; inline -M: simd-128 normalize dup norm v/n ; inline M: simd-128 distance v- norm ; inline ! misc From 243e5b43cebc7794220cb88ba4487fb75960a290 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 14 Nov 2009 22:25:00 -0600 Subject: [PATCH 16/46] fixes for stronger stack checker --- basis/math/vectors/simd/simd.factor | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 10305c673a..c02c713b48 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,9 +1,9 @@ -USING: accessors alien.c-types byte-arrays classes combinators -cpu.architecture fry functors generalizations generic +USING: accessors alien.c-types arrays byte-arrays classes combinators +cpu.architecture effects fry functors generalizations generic generic.parser kernel lexer literals macros math math.functions math.vectors math.vectors.private namespaces parser prettyprint.custom quotations sequences sequences.private vocabs -vocabs.loader ; +vocabs.loader words ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd @@ -140,6 +140,8 @@ N [ A-rep rep-length ] SET-NTH [ ELT dup c:c-setter c:array-accessor ] +BOA-EFFECT [ N "n" >array { "v" } ] + WHERE TUPLE: A < simd-128 ; @@ -159,10 +161,11 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline : A-with ( n -- v ) \ A new simd-with ; inline : A-cast ( v -- v' ) \ A new simd-cast ; inline -: A-boa ( ...n -- v ) \ A new simd-boa ; inline -M: A pprint-delims drop \ A{ \ } ; -SYNTAX: A{ \ } [ >A ] parse-literal ; +\ A-boa { \ A simd-boa } >quotation BOA-EFFECT define-inline + +! M: A pprint-delims drop \ A{ \ } ; +! SYNTAX: A{ \ } [ >A ] parse-literal ; c: byte-array >>class @@ -209,8 +212,8 @@ M: simd-128 new-sequence [ nip [ 16 (byte-array) ] make-underlying ] [ length bad-simd-length ] if ; inline -M: simd-128 >pprint-sequence ; -M: simd-128 pprint* pprint-object ; +! M: simd-128 >pprint-sequence ; +! M: simd-128 pprint* pprint-object ; INSTANCE: simd-128 sequence @@ -278,11 +281,11 @@ M: simd-128 equal? : simd-with ( n seq -- v ) [ (simd-with) ] simd-construct-op ; inline -MACRO: simd-boa ( seq -- ) - dup length { - { 2 [ '[ _ dup [ (simd-gather-2) ] simd-construct-op ] ] } - { 4 [ '[ _ dup [ (simd-gather-4) ] simd-construct-op ] ] } - [ '[ _ _ nsequence ] ] +MACRO: simd-boa ( class -- ) + new dup length { + { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] } + { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] } + [ swap '[ _ _ nsequence ] ] } case ; : simd-cast ( v seq -- v' ) From d56afe9c3d45c11a076e93732b1c5d65b82f0914 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 14 Nov 2009 23:43:22 -0600 Subject: [PATCH 17/46] compilation fixes --- .../intrinsics/simd/backend/backend.factor | 25 +++++++++++++------ .../compiler/cfg/intrinsics/simd/simd.factor | 8 +++--- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index 90514c6cc9..8f9fa801e2 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -13,6 +13,9 @@ IN: compiler.cfg.intrinsics.simd.backend : can-has? ( quot -- ? ) [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline +: can-has-rep? ( rep reps -- ) + member? \ can-has? [ and ] change ; inline + GENERIC: create-can-has ( word -- word' ) PREDICATE: vector-op-word < word @@ -27,19 +30,22 @@ PREDICATE: vector-op-word < word :: can-has-^^-quot ( word def effect -- quot ) effect in>> { "rep" } split1 [ length ] bi@ 1 + - word reps-word + word reps-word 1quotation effect out>> length f >quotation - '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ; + '[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ; :: can-has-^-quot ( word def effect -- quot ) - def create-can-has ; + def create-can-has first ; + +: map-concat-like ( seq quot -- seq' ) + '[ _ map ] [ concat-as ] bi ; inline M: object create-can-has 1quotation ; M: array create-can-has - [ create-can-has ] map concat ; + [ create-can-has ] map-concat-like 1quotation ; M: callable create-can-has - [ create-can-has ] map concat ; + [ create-can-has ] map-concat-like 1quotation ; : (can-has-word) ( word -- word' ) name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ; @@ -56,12 +62,12 @@ M: vector-op-word create-can-has GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair ) M:: callable >can-has-cond ( quot #pick #dup -- quotpair ) - #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ; + #dup quot create-can-has '[ _ ndup @ can-has? ] quot 2array ; M:: pair >can-has-cond ( pair #pick #dup -- quotpair ) pair first2 :> ( class quot ) #pick class #dup quot create-can-has - '[ _ npick _ instance? [ _ ndup _ can-has? ] dip and ] + '[ _ npick _ instance? [ _ ndup @ can-has? ] dip and ] quot 2array ; MACRO: v-vector-op ( trials -- ) @@ -82,6 +88,11 @@ MACRO: vvvv-vector-op ( trials -- ) \ can-has? [ and ] change f ; +: can-has-^^test-vector ( src rep vcc -- dst ) + [ drop ] 2dip drop %test-vector-reps member? + \ can-has? [ and ] change + f ; + ! Intrinsic code emission MACRO: if-literals-match ( quots -- ) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 512df6c129..5130ff36b7 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -481,15 +481,15 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vany? ( node -- ) { [ vcc-any ^^test-vector ] - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-vall? ( node -- ) { [ vcc-all ^^test-vector ] - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-vnone? ( node -- ) { [ vcc-none ^^test-vector ] - } emit-vv-vector-op ; + } emit-v-vector-op ; : emit-simd-v>float ( node -- ) { @@ -500,7 +500,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-v>integer ( node -- ) { { float-vector-rep [ ^^float>integer-vector ] } - { int-vector-rep [ dup ] } + { int-vector-rep [ drop ] } } emit-v-vector-op ; : emit-simd-vpack-signed ( node -- ) From cd2cf91b95f35e9bb3f99e7534454a6761b520cd Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 17 Nov 2009 06:53:22 -0600 Subject: [PATCH 18/46] start on tests for simd intrinsics --- .../cfg/intrinsics/simd/simd-tests.factor | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 basis/compiler/cfg/intrinsics/simd/simd-tests.factor diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor new file mode 100644 index 0000000000..c7d999f029 --- /dev/null +++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -0,0 +1,112 @@ +! (c)2009 Joe Groff bsd license +USING: assocs biassocs byte-arrays byte-arrays.hex classes +compiler.cfg.instructions compiler.cfg.intrinsics.simd +compiler.cfg.registers compiler.cfg.stacks.local compiler.tree +compiler.tree.propagation.info cpu.architecture fry kernel +locals make namespaces sequences system tools.test words ; +IN: compiler.cfg.intrinsics.simd.tests + +:: 1test-node ( rep -- node ) + T{ #call + { in-d { 1 2 3 4 } } + { out-d { 5 } } + { info H{ + { 1 T{ value-info { class byte-array } } } + { 2 T{ value-info { class byte-array } } } + { 3 T{ value-info { class byte-array } } } + { 4 T{ value-info { class word } { literal? t } { literal rep } } } + { 5 T{ value-info { class byte-array } } } + } } + } ; +:: 2test-node ( rep cc -- node ) + T{ #call + { in-d { 1 2 3 4 5 } } + { out-d { 6 } } + { info H{ + { 1 T{ value-info { class byte-array } } } + { 2 T{ value-info { class byte-array } } } + { 3 T{ value-info { class byte-array } } } + { 4 T{ value-info { class word } { literal? t } { literal rep } } } + { 5 T{ value-info { class word } { literal? t } { literal cc } } } + { 6 T{ value-info { class byte-array } } } + } } + } ; + +: test-compiler-env ( -- x ) + H{ } clone + T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at + H{ } clone \ local-peek-set pick set-at + H{ } clone \ replace-mapping pick set-at + H{ } \ locs>vregs pick set-at ; + +: make-classes ( quot -- seq ) + { } make [ class ] map ; inline + +: 1test-emit ( cpu rep quot -- node ) + [ + [ new cpu ] 2dip '[ + test-compiler-env [ _ 1test-node @ ] bind + ] with-variable + ] make-classes ; inline + +: 2test-emit ( cpu rep cc quot -- node ) + [ + [ new cpu ] 3dip '[ + test-compiler-env [ _ _ 2test-node @ ] bind + ] with-variable + ] make-classes ; inline + +TUPLE: scalar-cpu ; + +TUPLE: simple-ops-cpu ; +M: simple-ops-cpu %zero-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %add-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %sub-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %mul-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %div-vector-reps { float-4-rep } ; + +! v+ +[ { ##add-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] 1test-emit ] +unit-test + +! v- +[ { ##sub-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-v- ] 1test-emit ] +unit-test + +! vneg +[ { ##load-constant ##sub-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] 1test-emit ] +unit-test + +[ { ##zero-vector ##sub-vector } ] +[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] 1test-emit ] +unit-test + +! v* +[ { ##mul-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-v* ] 1test-emit ] +unit-test + +! v/ +[ { ##div-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] 1test-emit ] +unit-test + +TUPLE: addsub-cpu < simple-ops-cpu ; +M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ; + +! v+- +[ { ##add-sub-vector } ] +[ addsub-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ] +unit-test + +[ { ##load-constant ##xor-vector ##add-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ] +unit-test + +[ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ] +[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] 1test-emit ] +unit-test + From f545c5d3e5c812a968b0a5bd4c8514567b50ae63 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 18 Nov 2009 12:36:41 -0800 Subject: [PATCH 19/46] properly handle -vector-op and case words in simd.backend --- .../intrinsics/simd/backend/backend.factor | 64 +++++++++++++++++-- .../cfg/intrinsics/simd/simd-tests.factor | 9 ++- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index 8f9fa801e2..f2ba9af41d 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -1,11 +1,11 @@ ! (c)2009 Joe Groff bsd license -USING: accessors arrays classes combinators +USING: accessors arrays assocs classes combinators combinators.short-circuit compiler.cfg.builder.blocks compiler.cfg.registers compiler.cfg.stacks compiler.cfg.stacks.local compiler.tree.propagation.info cpu.architecture effects fry generalizations help.lint.checks kernel locals macros math namespaces quotations sequences -splitting words ; +splitting stack-checker words ; IN: compiler.cfg.intrinsics.simd.backend ! Selection of implementation based on available CPU instructions @@ -18,20 +18,25 @@ IN: compiler.cfg.intrinsics.simd.backend GENERIC: create-can-has ( word -- word' ) -PREDICATE: vector-op-word < word +PREDICATE: hat-word < word { - [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ] + [ name>> { [ "^" head? ] [ "##" head? ] } 1|| ] [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ] } 1&& ; +PREDICATE: vector-op-word < hat-word + name>> "-vector" swap subseq? ; + : reps-word ( word -- word' ) name>> "^^" ?head drop "##" ?head drop "%" "-reps" surround "cpu.architecture" lookup ; +SYMBOL: blub + :: can-has-^^-quot ( word def effect -- quot ) effect in>> { "rep" } split1 [ length ] bi@ 1 + word reps-word 1quotation - effect out>> length f >quotation + effect out>> length blub >quotation '[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ; :: can-has-^-quot ( word def effect -- quot ) @@ -57,6 +62,17 @@ M: callable create-can-has { [ pick name>> "^" head? ] [ can-has-^-quot ] } } cond ; +: (can-has-nop-quot) ( word -- quot ) + stack-effect in>> length '[ _ ndrop blub ] ; + +DEFER: can-has-words + +M: word create-can-has + can-has-words ?at drop 1quotation ; + +M: hat-word create-can-has + (can-has-nop-quot) ; + M: vector-op-word create-can-has dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ; @@ -86,12 +102,46 @@ MACRO: vvvv-vector-op ( trials -- ) : can-has-^(compare-vector) ( src1 src2 rep cc -- dst ) [ 2drop ] 2dip %compare-vector-reps member? \ can-has? [ and ] change - f ; + blub ; : can-has-^^test-vector ( src rep vcc -- dst ) [ drop ] 2dip drop %test-vector-reps member? \ can-has? [ and ] change - f ; + blub ; + +MACRO: can-has-case ( cases -- ) + dup first second infer in>> length 1 + + '[ _ ndrop f ] suffix '[ _ case ] ; + +GENERIC# >can-has-trial 1 ( obj #pick -- quot ) + +M: callable >can-has-trial + drop '[ _ can-has? ] ; +M: pair >can-has-trial + swap first2 dup infer in>> length + '[ _ npick _ instance? [ _ can-has? ] [ _ ndrop blub ] if ] ; + +MACRO: can-has-vector-op ( trials #pick #dup -- ) + [ '[ _ >can-has-trial ] map ] dip '[ _ _ n|| \ can-has? [ and ] change blub ] ; + +: can-has-v-vector-op ( trials -- ? ) + 1 2 can-has-vector-op ; inline +: can-has-vv-vector-op ( trials -- ? ) + 1 3 can-has-vector-op ; inline +: can-has-vv-cc-vector-op ( trials -- ? ) + 2 4 can-has-vector-op ; inline +: can-has-vvvv-vector-op ( trials -- ? ) + 1 5 can-has-vector-op ; inline + +CONSTANT: can-has-words + H{ + { case can-has-case } + { v-vector-op can-has-v-vector-op } + { vl-vector-op can-has-vv-vector-op } + { vv-vector-op can-has-vv-vector-op } + { vv-cc-vector-op can-has-vv-cc-vector-op } + { vvvv-vector-op can-has-vvvv-vector-op } + } ! Intrinsic code emission diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor index c7d999f029..fadabbe604 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -44,14 +44,14 @@ IN: compiler.cfg.intrinsics.simd.tests : 1test-emit ( cpu rep quot -- node ) [ - [ new cpu ] 2dip '[ + [ new \ cpu ] 2dip '[ test-compiler-env [ _ 1test-node @ ] bind ] with-variable ] make-classes ; inline : 2test-emit ( cpu rep cc quot -- node ) [ - [ new cpu ] 3dip '[ + [ new \ cpu ] 3dip '[ test-compiler-env [ _ _ 2test-node @ ] bind ] with-variable ] make-classes ; inline @@ -64,6 +64,11 @@ M: simple-ops-cpu %add-vector-reps { int-4-rep float-4-rep } ; M: simple-ops-cpu %sub-vector-reps { int-4-rep float-4-rep } ; M: simple-ops-cpu %mul-vector-reps { int-4-rep float-4-rep } ; M: simple-ops-cpu %div-vector-reps { float-4-rep } ; +M: simple-ops-cpu %not-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %andn-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %and-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %or-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %xor-vector-reps { int-4-rep float-4-rep } ; ! v+ [ { ##add-vector } ] From 658387505525b6a4fb9a741f72e0f73a1dbca2de Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 18 Nov 2009 14:39:39 -0800 Subject: [PATCH 20/46] tests for all simd intrinsics --- .../cfg/intrinsics/simd/simd-tests.factor | 459 ++++++++++++++++-- basis/cpu/architecture/architecture.factor | 5 + 2 files changed, 430 insertions(+), 34 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor index fadabbe604..27b3b38b01 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -1,12 +1,13 @@ ! (c)2009 Joe Groff bsd license -USING: assocs biassocs byte-arrays byte-arrays.hex classes -compiler.cfg.instructions compiler.cfg.intrinsics.simd -compiler.cfg.registers compiler.cfg.stacks.local compiler.tree +USING: arrays assocs biassocs byte-arrays byte-arrays.hex +classes compiler.cfg.comparisons compiler.cfg.instructions +compiler.cfg.intrinsics.simd compiler.cfg.registers +compiler.cfg.stacks.local compiler.tree compiler.tree.propagation.info cpu.architecture fry kernel locals make namespaces sequences system tools.test words ; IN: compiler.cfg.intrinsics.simd.tests -:: 1test-node ( rep -- node ) +:: test-node ( rep -- node ) T{ #call { in-d { 1 2 3 4 } } { out-d { 5 } } @@ -18,17 +19,18 @@ IN: compiler.cfg.intrinsics.simd.tests { 5 T{ value-info { class byte-array } } } } } } ; -:: 2test-node ( rep cc -- node ) + +:: test-node-literal ( lit rep -- node ) + lit class :> lit-class T{ #call - { in-d { 1 2 3 4 5 } } - { out-d { 6 } } + { in-d { 1 2 3 4 } } + { out-d { 5 } } { info H{ { 1 T{ value-info { class byte-array } } } { 2 T{ value-info { class byte-array } } } - { 3 T{ value-info { class byte-array } } } + { 3 T{ value-info { class lit-class } { literal? t } { literal lit } } } { 4 T{ value-info { class word } { literal? t } { literal rep } } } - { 5 T{ value-info { class word } { literal? t } { literal cc } } } - { 6 T{ value-info { class byte-array } } } + { 5 T{ value-info { class byte-array } } } } } } ; @@ -37,66 +39,85 @@ IN: compiler.cfg.intrinsics.simd.tests T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at H{ } clone \ local-peek-set pick set-at H{ } clone \ replace-mapping pick set-at - H{ } \ locs>vregs pick set-at ; + H{ } \ locs>vregs pick set-at + H{ } clone \ peek-sets pick set-at + H{ } clone \ replace-sets pick set-at + H{ } clone \ kill-sets pick set-at ; : make-classes ( quot -- seq ) { } make [ class ] map ; inline -: 1test-emit ( cpu rep quot -- node ) +: test-emit ( cpu rep quot -- node ) [ [ new \ cpu ] 2dip '[ - test-compiler-env [ _ 1test-node @ ] bind + test-compiler-env [ _ test-node @ ] bind ] with-variable ] make-classes ; inline -: 2test-emit ( cpu rep cc quot -- node ) +: test-emit-literal ( cpu lit rep quot -- node ) [ [ new \ cpu ] 3dip '[ - test-compiler-env [ _ _ 2test-node @ ] bind + test-compiler-env [ _ _ test-node-literal @ ] bind ] with-variable ] make-classes ; inline +CONSTANT: signed-reps + { char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep } +CONSTANT: all-reps + { + char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep + uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep + } + TUPLE: scalar-cpu ; TUPLE: simple-ops-cpu ; -M: simple-ops-cpu %zero-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %add-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %sub-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %mul-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %div-vector-reps { float-4-rep } ; -M: simple-ops-cpu %not-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %andn-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %and-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %or-vector-reps { int-4-rep float-4-rep } ; -M: simple-ops-cpu %xor-vector-reps { int-4-rep float-4-rep } ; +M: simple-ops-cpu %zero-vector-reps all-reps ; +M: simple-ops-cpu %fill-vector-reps all-reps ; +M: simple-ops-cpu %add-vector-reps all-reps ; +M: simple-ops-cpu %sub-vector-reps all-reps ; +M: simple-ops-cpu %mul-vector-reps all-reps ; +M: simple-ops-cpu %div-vector-reps all-reps ; +M: simple-ops-cpu %andn-vector-reps all-reps ; +M: simple-ops-cpu %and-vector-reps all-reps ; +M: simple-ops-cpu %or-vector-reps all-reps ; +M: simple-ops-cpu %xor-vector-reps all-reps ; +M: simple-ops-cpu %merge-vector-reps all-reps ; +M: simple-ops-cpu %sqrt-vector-reps all-reps ; +M: simple-ops-cpu %test-vector-reps all-reps ; +M: simple-ops-cpu %signed-pack-vector-reps all-reps ; +M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ; +M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ; +M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ; +M: simple-ops-cpu %alien-vector-reps all-reps ; ! v+ [ { ##add-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ] unit-test ! v- [ { ##sub-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-v- ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ] unit-test ! vneg [ { ##load-constant ##sub-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ] unit-test [ { ##zero-vector ##sub-vector } ] -[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] 1test-emit ] +[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ] unit-test ! v* [ { ##mul-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-v* ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ] unit-test ! v/ [ { ##div-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ] unit-test TUPLE: addsub-cpu < simple-ops-cpu ; @@ -104,14 +125,384 @@ M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ; ! v+- [ { ##add-sub-vector } ] -[ addsub-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ] +[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ] unit-test [ { ##load-constant ##xor-vector ##add-vector } ] -[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ] +[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ] unit-test [ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ] -[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] 1test-emit ] +[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ] unit-test +TUPLE: saturating-cpu < simple-ops-cpu ; +M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ; +M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ; +M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ; + +! vs+ +[ { ##add-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +[ { ##add-vector } ] +[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +[ { ##saturated-add-vector } ] +[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ] +unit-test + +! vs- +[ { ##sub-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +[ { ##sub-vector } ] +[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +[ { ##saturated-sub-vector } ] +[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ] +unit-test + +! vs* +[ { ##mul-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +[ { ##mul-vector } ] +[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +[ { ##saturated-mul-vector } ] +[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ] +unit-test + +TUPLE: minmax-cpu < simple-ops-cpu ; +M: minmax-cpu %min-vector-reps signed-reps ; +M: minmax-cpu %max-vector-reps signed-reps ; +M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ; +M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ; + +TUPLE: compare-cpu < simple-ops-cpu ; +M: compare-cpu %compare-vector-reps drop signed-reps ; +M: compare-cpu %compare-vector-ccs nip f 2array 1array f ; + +! vmin +[ { ##min-vector } ] +[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ] +unit-test + +[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ] +[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ] +unit-test + +! vmax +[ { ##max-vector } ] +[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ] +unit-test + +[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ] +[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ] +unit-test + +TUPLE: dot-cpu < simple-ops-cpu ; +M: dot-cpu %dot-vector-reps { float-4-rep } ; + +TUPLE: horizontal-cpu < simple-ops-cpu ; +M: horizontal-cpu %horizontal-add-vector-reps signed-reps ; +M: horizontal-cpu %unpack-vector-head-reps signed-reps ; +M: horizontal-cpu %unpack-vector-tail-reps signed-reps ; + +! v. +[ { ##dot-vector } ] +[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +[ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ] +[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +[ { + ##mul-vector + ##merge-vector-head ##merge-vector-tail ##add-vector + ##merge-vector-head ##merge-vector-tail ##add-vector + ##vector>scalar +} ] +[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ] +unit-test + +! vsqrt +[ { ##sqrt-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ] +unit-test + +! sum +[ { ##horizontal-add-vector ##vector>scalar } ] +[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ] +unit-test + +[ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ] +[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ] +unit-test + +[ { + ##unpack-vector-head ##unpack-vector-tail ##add-vector + ##horizontal-add-vector + ##vector>scalar +} ] +[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ] +unit-test + +[ { + ##unpack-vector-head ##unpack-vector-tail ##add-vector + ##horizontal-add-vector ##horizontal-add-vector + ##vector>scalar +} ] +[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ] +unit-test + +[ { + ##unpack-vector-head ##unpack-vector-tail ##add-vector + ##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector + ##vector>scalar +} ] +[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ] +unit-test + +TUPLE: abs-cpu < simple-ops-cpu ; +M: abs-cpu %abs-vector-reps signed-reps ; + +! vabs +[ { } ] +[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +[ { ##abs-vector } ] +[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +[ { ##load-constant ##andn-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +[ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } ] +[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ] +unit-test + +! vand +[ { ##and-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ] +unit-test + +! vandn +[ { ##andn-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ] +unit-test + +! vor +[ { ##or-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ] +unit-test + +! vxor +[ { ##xor-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ] +unit-test + +TUPLE: not-cpu < simple-ops-cpu ; +M: not-cpu %not-vector-reps signed-reps ; + +! vnot +[ { ##not-vector } ] +[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ] +unit-test + +[ { ##fill-vector ##xor-vector } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ] +unit-test + +TUPLE: shift-cpu < simple-ops-cpu ; +M: shift-cpu %shl-vector-reps signed-reps ; +M: shift-cpu %shr-vector-reps signed-reps ; + +TUPLE: shift-imm-cpu < simple-ops-cpu ; +M: shift-imm-cpu %shl-vector-imm-reps signed-reps ; +M: shift-imm-cpu %shr-vector-imm-reps signed-reps ; + +TUPLE: horizontal-shift-cpu < simple-ops-cpu ; +M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ; +M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ; + +! vlshift +[ { ##shl-vector-imm } ] +[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ] +unit-test + +[ { ##shl-vector } ] +[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ] +unit-test + +! vrshift +[ { ##shr-vector-imm } ] +[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ] +unit-test + +[ { ##shr-vector } ] +[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ] +unit-test + +! hlshift +[ { ##horizontal-shl-vector-imm } ] +[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ] +unit-test + +! hrshift +[ { ##horizontal-shr-vector-imm } ] +[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ] +unit-test + +TUPLE: shuffle-imm-cpu < simple-ops-cpu ; +M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ; + +TUPLE: shuffle-cpu < simple-ops-cpu ; +M: shuffle-cpu %shuffle-vector-reps signed-reps ; + +! vshuffle-elements +[ { ##load-constant ##shuffle-vector } ] +[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ] +unit-test + +[ { ##shuffle-vector-imm } ] +[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ] +unit-test + +! vshuffle-bytes +[ { ##shuffle-vector } ] +[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ] +unit-test + +! vmerge-head +[ { ##merge-vector-head } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ] +unit-test + +! vmerge-tail +[ { ##merge-vector-tail } ] +[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ] +unit-test + +! v<= etc. +[ { ##compare-vector } ] +[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +[ { ##min-vector ##compare-vector } ] +[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +[ { ##load-constant ##xor-vector ##xor-vector ##compare-vector } ] +[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ] +unit-test + +! vany? etc. +[ { ##test-vector } ] +[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ] +unit-test + +TUPLE: convert-cpu < simple-ops-cpu ; +M: convert-cpu %integer>float-vector-reps { int-4-rep } ; +M: convert-cpu %float>integer-vector-reps { float-4-rep } ; + +! v>float +[ { } ] +[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ] +unit-test + +[ { ##integer>float-vector } ] +[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ] +unit-test + +! v>integer +[ { } ] +[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ] +unit-test + +[ { ##float>integer-vector } ] +[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ] +unit-test + +! vpack-signed +[ { ##signed-pack-vector } ] +[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ] +unit-test + +! vpack-unsigned +[ { ##unsigned-pack-vector } ] +[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ] +unit-test + +TUPLE: unpack-head-cpu < simple-ops-cpu ; +M: unpack-head-cpu %unpack-vector-head-reps all-reps ; +TUPLE: unpack-cpu < unpack-head-cpu ; +M: unpack-cpu %unpack-vector-tail-reps all-reps ; + +! vunpack-head +[ { ##unpack-vector-head } ] +[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +[ { ##zero-vector ##merge-vector-head } ] +[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +[ { ##merge-vector-head ##shr-vector-imm } ] +[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +[ { ##zero-vector ##compare-vector ##merge-vector-head } ] +[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ] +unit-test + +! vunpack-tail +[ { ##unpack-vector-tail } ] +[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +[ { ##tail>head-vector ##unpack-vector-head } ] +[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +[ { ##zero-vector ##merge-vector-tail } ] +[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +[ { ##merge-vector-tail ##shr-vector-imm } ] +[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +[ { ##zero-vector ##compare-vector ##merge-vector-tail } ] +[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ] +unit-test + +! with +[ { ##scalar>vector ##shuffle-vector-imm } ] +[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ] +unit-test + +! gather-2 +[ { ##gather-vector-2 } ] +[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ] +unit-test + +! gather-4 +[ { ##gather-vector-4 } ] +[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ] +unit-test + +! select +[ { ##shuffle-vector-imm ##vector>scalar } ] +[ shuffle-imm-cpu int-4-rep [ emit-simd-select ] test-emit ] +unit-test diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index b5bb0157af..f4d150033b 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -115,6 +115,7 @@ scalar-rep ; { uchar-16-rep ushort-8-rep } { ushort-8-rep uint-4-rep } { uint-4-rep ulonglong-2-rep } + { float-4-rep double-2-rep } } at ; ! Register classes @@ -385,6 +386,10 @@ M: object %shr-vector-imm-reps { } ; M: object %horizontal-shl-vector-imm-reps { } ; M: object %horizontal-shr-vector-imm-reps { } ; +ALIAS: %merge-vector-head-reps %merge-vector-reps +ALIAS: %merge-vector-tail-reps %merge-vector-reps +ALIAS: %tail>head-vector-reps %unpack-vector-head-reps + HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src -- ) HOOK: %box-alien cpu ( dst src temp -- ) From a3e4ecfc7d93ec4fae18a9adf75b39f1b5f58bd6 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 18 Nov 2009 20:32:05 -0800 Subject: [PATCH 21/46] enable simd intrinsics and fix first-pass compiler errors --- .../compiler/cfg/intrinsics/simd/simd.factor | 118 ++++++------- basis/math/vectors/simd/simd-tests.factor | 161 ++++++++---------- basis/math/vectors/simd/simd.factor | 37 ++-- 3 files changed, 156 insertions(+), 160 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 5130ff36b7..54f105ca02 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -562,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd inline-alien ] with { [ %alien-vector-reps member? ] } if-literals-match ; -! : enable-simd ( -- ) -! { -! { (simd-v+) [ emit-simd-v+ ] } -! { (simd-v-) [ emit-simd-v- ] } -! { (simd-vneg) [ emit-simd-vneg ] } -! { (simd-v+-) [ emit-simd-v+- ] } -! { (simd-vs+) [ emit-simd-vs+ ] } -! { (simd-vs-) [ emit-simd-vs- ] } -! { (simd-vs*) [ emit-simd-vs* ] } -! { (simd-v*) [ emit-simd-v* ] } -! { (simd-v/) [ emit-simd-v/ ] } -! { (simd-vmin) [ emit-simd-vmin ] } -! { (simd-vmax) [ emit-simd-vmax ] } -! { (simd-v.) [ emit-simd-v. ] } -! { (simd-vsqrt) [ emit-simd-vsqrt ] } -! { (simd-sum) [ emit-simd-sum ] } -! { (simd-vabs) [ emit-simd-vabs ] } -! { (simd-vbitand) [ emit-simd-vand ] } -! { (simd-vbitandn) [ emit-simd-vandn ] } -! { (simd-vbitor) [ emit-simd-vor ] } -! { (simd-vbitxor) [ emit-simd-vxor ] } -! { (simd-vbitnot) [ emit-simd-vnot ] } -! { (simd-vand) [ emit-simd-vand ] } -! { (simd-vandn) [ emit-simd-vandn ] } -! { (simd-vor) [ emit-simd-vor ] } -! { (simd-vxor) [ emit-simd-vxor ] } -! { (simd-vnot) [ emit-simd-vnot ] } -! { (simd-vlshift) [ emit-simd-vlshift ] } -! { (simd-vrshift) [ emit-simd-vrshift ] } -! { (simd-hlshift) [ emit-simd-hlshift ] } -! { (simd-hrshift) [ emit-simd-hrshift ] } -! { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } -! { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } -! { (simd-vmerge-head) [ emit-simd-vmerge-head ] } -! { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } -! { (simd-v<=) [ emit-simd-v<= ] } -! { (simd-v<) [ emit-simd-v< ] } -! { (simd-v=) [ emit-simd-v= ] } -! { (simd-v>) [ emit-simd-v> ] } -! { (simd-v>=) [ emit-simd-v>= ] } -! { (simd-vunordered?) [ emit-simd-vunordered? ] } -! { (simd-vany?) [ emit-simd-vany? ] } -! { (simd-vall?) [ emit-simd-vall? ] } -! { (simd-vnone?) [ emit-simd-vnone? ] } -! { (simd-v>float) [ emit-simd-v>float ] } -! { (simd-v>integer) [ emit-simd-v>integer ] } -! { (simd-vpack-signed) [ emit-simd-vpack-signed ] } -! { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } -! { (simd-vunpack-head) [ emit-simd-vunpack-head ] } -! { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } -! { (simd-with) [ emit-simd-with ] } -! { (simd-gather-2) [ emit-simd-gather-2 ] } -! { (simd-gather-4) [ emit-simd-gather-4 ] } -! { (simd-select) [ emit-simd-select ] } -! { alien-vector [ emit-alien-vector ] } -! { set-alien-vector [ emit-set-alien-vector ] } -! } enable-intrinsics ; -! -! enable-simd +: enable-simd ( -- ) + { + { (simd-v+) [ emit-simd-v+ ] } + { (simd-v-) [ emit-simd-v- ] } + { (simd-vneg) [ emit-simd-vneg ] } + { (simd-v+-) [ emit-simd-v+- ] } + { (simd-vs+) [ emit-simd-vs+ ] } + { (simd-vs-) [ emit-simd-vs- ] } + { (simd-vs*) [ emit-simd-vs* ] } + { (simd-v*) [ emit-simd-v* ] } + { (simd-v/) [ emit-simd-v/ ] } + { (simd-vmin) [ emit-simd-vmin ] } + { (simd-vmax) [ emit-simd-vmax ] } + { (simd-v.) [ emit-simd-v. ] } + { (simd-vsqrt) [ emit-simd-vsqrt ] } + { (simd-sum) [ emit-simd-sum ] } + { (simd-vabs) [ emit-simd-vabs ] } + { (simd-vbitand) [ emit-simd-vand ] } + { (simd-vbitandn) [ emit-simd-vandn ] } + { (simd-vbitor) [ emit-simd-vor ] } + { (simd-vbitxor) [ emit-simd-vxor ] } + { (simd-vbitnot) [ emit-simd-vnot ] } + { (simd-vand) [ emit-simd-vand ] } + { (simd-vandn) [ emit-simd-vandn ] } + { (simd-vor) [ emit-simd-vor ] } + { (simd-vxor) [ emit-simd-vxor ] } + { (simd-vnot) [ emit-simd-vnot ] } + { (simd-vlshift) [ emit-simd-vlshift ] } + { (simd-vrshift) [ emit-simd-vrshift ] } + { (simd-hlshift) [ emit-simd-hlshift ] } + { (simd-hrshift) [ emit-simd-hrshift ] } + { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] } + { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] } + { (simd-vmerge-head) [ emit-simd-vmerge-head ] } + { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] } + { (simd-v<=) [ emit-simd-v<= ] } + { (simd-v<) [ emit-simd-v< ] } + { (simd-v=) [ emit-simd-v= ] } + { (simd-v>) [ emit-simd-v> ] } + { (simd-v>=) [ emit-simd-v>= ] } + { (simd-vunordered?) [ emit-simd-vunordered? ] } + { (simd-vany?) [ emit-simd-vany? ] } + { (simd-vall?) [ emit-simd-vall? ] } + { (simd-vnone?) [ emit-simd-vnone? ] } + { (simd-v>float) [ emit-simd-v>float ] } + { (simd-v>integer) [ emit-simd-v>integer ] } + { (simd-vpack-signed) [ emit-simd-vpack-signed ] } + { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] } + { (simd-vunpack-head) [ emit-simd-vunpack-head ] } + { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] } + { (simd-with) [ emit-simd-with ] } + { (simd-gather-2) [ emit-simd-gather-2 ] } + { (simd-gather-4) [ emit-simd-gather-4 ] } + { (simd-select) [ emit-simd-select ] } + { alien-vector [ emit-alien-vector ] } + { set-alien-vector [ emit-set-alien-vector ] } + } enable-intrinsics ; + +enable-simd diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 46cced3cb7..1fb947921c 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -3,22 +3,13 @@ effects fry io kernel kernel.private math math.functions math.private math.vectors math.vectors.simd math.vectors.simd.private prettyprint random sequences system tools.test vocabs assocs compiler.cfg.debugger words -locals math.vectors.specialization combinators cpu.architecture -math.vectors.conversion.backend -math.vectors.simd.intrinsics namespaces byte-arrays alien +locals combinators cpu.architecture namespaces byte-arrays alien specialized-arrays classes.struct eval classes.algebra sets quotations math.constants compiler.units ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAY: c:float -SIMD: c:char -SIMDS: c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double ; IN: math.vectors.simd.tests -! Make sure the functor doesn't generate bogus vocabularies -2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times - -[ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test - ! Test type propagation [ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test @@ -38,10 +29,6 @@ IN: math.vectors.simd.tests [ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test -[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test - -[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test - ! Test puns; only on x86 cpu x86? [ [ double-2{ 4 1024 } ] [ @@ -55,26 +42,76 @@ CONSTANT: simd-classes { char-16 uchar-16 - char-32 - uchar-32 short-8 ushort-8 - short-16 - ushort-16 int-4 uint-4 - int-8 - uint-8 longlong-2 ulonglong-2 - longlong-4 - ulonglong-4 float-4 - float-8 double-2 - double-4 } +SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ; + +CONSTANT: vector-words + H{ + { [v-] { +vector+ +vector+ -> +vector+ } } + { distance { +vector+ +vector+ -> +nonnegative+ } } + { n*v { +scalar+ +vector+ -> +vector+ } } + { n+v { +scalar+ +vector+ -> +vector+ } } + { n-v { +scalar+ +vector+ -> +vector+ } } + { n/v { +scalar+ +vector+ -> +vector+ } } + { norm { +vector+ -> +nonnegative+ } } + { norm-sq { +vector+ -> +nonnegative+ } } + { normalize { +vector+ -> +vector+ } } + { v* { +vector+ +vector+ -> +vector+ } } + { vs* { +vector+ +vector+ -> +vector+ } } + { v*n { +vector+ +scalar+ -> +vector+ } } + { v+ { +vector+ +vector+ -> +vector+ } } + { vs+ { +vector+ +vector+ -> +vector+ } } + { v+- { +vector+ +vector+ -> +vector+ } } + { v+n { +vector+ +scalar+ -> +vector+ } } + { v- { +vector+ +vector+ -> +vector+ } } + { vneg { +vector+ -> +vector+ } } + { vs- { +vector+ +vector+ -> +vector+ } } + { v-n { +vector+ +scalar+ -> +vector+ } } + { v. { +vector+ +vector+ -> +scalar+ } } + { v/ { +vector+ +vector+ -> +vector+ } } + { v/n { +vector+ +scalar+ -> +vector+ } } + { vceiling { +vector+ -> +vector+ } } + { vfloor { +vector+ -> +vector+ } } + { vmax { +vector+ +vector+ -> +vector+ } } + { vmin { +vector+ +vector+ -> +vector+ } } + { vneg { +vector+ -> +vector+ } } + { vtruncate { +vector+ -> +vector+ } } + { sum { +vector+ -> +scalar+ } } + { vabs { +vector+ -> +vector+ } } + { vsqrt { +vector+ -> +vector+ } } + { vbitand { +vector+ +vector+ -> +vector+ } } + { vbitandn { +vector+ +vector+ -> +vector+ } } + { vbitor { +vector+ +vector+ -> +vector+ } } + { vbitxor { +vector+ +vector+ -> +vector+ } } + { vbitnot { +vector+ -> +vector+ } } + { vand { +vector+ +vector+ -> +vector+ } } + { vandn { +vector+ +vector+ -> +vector+ } } + { vor { +vector+ +vector+ -> +vector+ } } + { vxor { +vector+ +vector+ -> +vector+ } } + { vnot { +vector+ -> +vector+ } } + { vlshift { +vector+ +scalar+ -> +vector+ } } + { vrshift { +vector+ +scalar+ -> +vector+ } } + { (vmerge-head) { +vector+ +vector+ -> +vector+ } } + { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } + { v<= { +vector+ +vector+ -> +vector+ } } + { v< { +vector+ +vector+ -> +vector+ } } + { v= { +vector+ +vector+ -> +vector+ } } + { v> { +vector+ +vector+ -> +vector+ } } + { v>= { +vector+ +vector+ -> +vector+ } } + { vunordered? { +vector+ +vector+ -> +vector+ } } + } + +: vector-word-inputs ( schema -- seq ) { -> } split first ; + : with-ctors ( -- seq ) simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ; @@ -166,26 +203,15 @@ CONSTANT: simd-classes : remove-boolean-words ( alist -- alist' ) boolean-ops unique assoc-diff ; -: remove-special-words ( alist -- alist' ) - ! These have their own tests later - { - hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast - vany? vall? vnone? - (v>float) (v>integer) - (vpack-signed) (vpack-unsigned) - (vunpack-head) (vunpack-tail) - } unique assoc-diff ; - : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip float = [ remove-integer-words ] [ remove-float-words ] if - remove-boolean-words - remove-special-words ; + remove-boolean-words ; : check-vector-ops ( class elt-class compare-quot -- ) [ [ nip ops-to-check ] 2keep - '[ first2 inputs _ _ check-vector-op ] + '[ first2 vector-word-inputs _ _ check-vector-op ] ] dip check-optimizer ; inline : (approx=) ( x y -- ? ) @@ -427,27 +453,6 @@ TUPLE: inconsistent-vector-test bool branch ; [ t f f ] [ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test -[ f t t ] -[ float-8{ t t t t t t t t } { float-8 } test-vector-tests ] unit-test -[ f t f ] -[ float-8{ f t t t t f t t } { float-8 } test-vector-tests ] unit-test -[ t f f ] -[ float-8{ f f f f f f f f } { float-8 } test-vector-tests ] unit-test - -[ f t t ] -[ double-4{ t t t t } { double-4 } test-vector-tests ] unit-test -[ f t f ] -[ double-4{ f t t f } { double-4 } test-vector-tests ] unit-test -[ t f f ] -[ double-4{ f f f f } { double-4 } test-vector-tests ] unit-test - -[ f t t ] -[ int-8{ t t t t t t t t } { int-8 } test-vector-tests ] unit-test -[ f t f ] -[ int-8{ f t t t t f f f } { int-8 } test-vector-tests ] unit-test -[ t f f ] -[ int-8{ f f f f f f f f } { int-8 } test-vector-tests ] unit-test - "== Checking element access" print ! Test element access -- it should box bignums for int-4 on x86 @@ -467,14 +472,6 @@ TUPLE: inconsistent-vector-test bool branch ; [ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test [ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test -[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test -[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test -[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test - -[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test -[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test -[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test - "== Checking broadcast" print : test-broadcast ( seq -- failures ) [ length >array ] keep @@ -488,14 +485,6 @@ TUPLE: inconsistent-vector-test bool branch ; [ { } ] [ longlong-2{ 1 2 } test-broadcast ] unit-test [ { } ] [ ulonglong-2{ 1 2 } test-broadcast ] unit-test -[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-broadcast ] unit-test -[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test -[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test - -[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test -[ { } ] [ longlong-4{ 1 2 3 4 } test-broadcast ] unit-test -[ { } ] [ ulonglong-4{ 1 2 3 4 } test-broadcast ] unit-test - ! Make sure we use the fallback in the correct situations [ int-4{ 3 3 3 3 } ] [ int-4{ 12 34 3 17 } 2 [ { int-4 fixnum } declare vbroadcast ] compile-call ] unit-test @@ -529,37 +518,37 @@ TUPLE: inconsistent-vector-test bool branch ; STRUCT: simd-struct { x float-4 } { y longlong-2 } -{ z double-4 } -{ w int-8 } ; +{ z double-2 } +{ w int-4 } ; [ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test [ float-4{ 1 2 3 4 } longlong-2{ 2 1 } - double-4{ 4 3 2 1 } - int-8{ 1 2 3 4 5 6 7 8 } + double-2{ 4 3 } + int-4{ 1 2 3 4 } ] [ simd-struct float-4{ 1 2 3 4 } >>x longlong-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - int-8{ 1 2 3 4 5 6 7 8 } >>w + double-2{ 4 3 } >>z + int-4{ 1 2 3 4 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] unit-test [ float-4{ 1 2 3 4 } longlong-2{ 2 1 } - double-4{ 4 3 2 1 } - int-8{ 1 2 3 4 5 6 7 8 } + double-2{ 4 3 } + int-4{ 1 2 3 4 } ] [ [ simd-struct float-4{ 1 2 3 4 } >>x longlong-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - int-8{ 1 2 3 4 5 6 7 8 } >>w + double-2{ 4 3 } >>z + int-4{ 1 2 3 4 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] compile-call ] unit-test @@ -570,8 +559,8 @@ STRUCT: simd-struct ! CSSA bug [ 8000000 ] [ - int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 } - [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call + int-4{ 1000 1000 1000 1000 } + [ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call ] unit-test ! Coalescing was too aggressive diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index c02c713b48..532e1a2232 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -123,6 +123,8 @@ GENERIC: simd-rep ( simd -- rep ) element ] 2dip @@ -162,10 +163,14 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline : A-with ( n -- v ) \ A new simd-with ; inline : A-cast ( v -- v' ) \ A new simd-cast ; inline -\ A-boa { \ A simd-boa } >quotation BOA-EFFECT define-inline +\ A-boa \ A new N { + { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] } + { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] } + [ swap '[ _ _ nsequence ] ] +} case BOA-EFFECT define-inline -! M: A pprint-delims drop \ A{ \ } ; -! SYNTAX: A{ \ } [ >A ] parse-literal ; +M: A pprint-delims drop \ A{ \ } ; +SYNTAX: A{ \ } [ >A ] parse-literal ; c: byte-array >>class @@ -186,17 +191,6 @@ PRIVATE> >> -SIMD-128: char-16 -SIMD-128: uchar-16 -SIMD-128: short-8 -SIMD-128: ushort-8 -SIMD-128: int-4 -SIMD-128: uint-4 -SIMD-128: longlong-2 -SIMD-128: ulonglong-2 -SIMD-128: float-4 -SIMD-128: double-2 - : assert-positive ( x -- y ) ; ! SIMD vectors as sequences @@ -351,6 +345,19 @@ M: simd-128 norm-sq dup v. assert-positive ; inline M: simd-128 norm norm-sq sqrt ; inline M: simd-128 distance v- norm ; inline +! SIMD instances + +SIMD-128: char-16 +SIMD-128: uchar-16 +SIMD-128: short-8 +SIMD-128: ushort-8 +SIMD-128: int-4 +SIMD-128: uint-4 +SIMD-128: longlong-2 +SIMD-128: ulonglong-2 +SIMD-128: float-4 +SIMD-128: double-2 + ! misc M: simd-128 vshuffle ( u perm -- v ) From 59d85f7ad62f3dd1154d4b0e37a2ae8d10fbef11 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 18 Nov 2009 21:29:51 -0800 Subject: [PATCH 22/46] fix primitive emit fallback for simd intrinsics --- .../intrinsics/simd/backend/backend.factor | 8 +++- .../cfg/intrinsics/simd/simd-tests.factor | 46 +++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index f2ba9af41d..1a229672b9 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -3,7 +3,7 @@ USING: accessors arrays assocs classes combinators combinators.short-circuit compiler.cfg.builder.blocks compiler.cfg.registers compiler.cfg.stacks compiler.cfg.stacks.local compiler.tree.propagation.info -cpu.architecture effects fry generalizations help.lint.checks +cpu.architecture effects fry generalizations kernel locals macros math namespaces quotations sequences splitting stack-checker words ; IN: compiler.cfg.intrinsics.simd.backend @@ -145,6 +145,12 @@ CONSTANT: can-has-words ! Intrinsic code emission +MACRO: check-elements ( quots -- ) + [ length '[ _ firstn ] ] + [ '[ _ spread ] ] + [ length 1 - \ and [ ] like ] + tri 3append ; + MACRO: if-literals-match ( quots -- ) [ length ] [ ] [ length ] tri ! n quots n diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor index 27b3b38b01..2d38e541a4 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -1,9 +1,9 @@ ! (c)2009 Joe Groff bsd license USING: arrays assocs biassocs byte-arrays byte-arrays.hex -classes compiler.cfg.comparisons compiler.cfg.instructions +classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions compiler.cfg.intrinsics.simd compiler.cfg.registers -compiler.cfg.stacks.local compiler.tree -compiler.tree.propagation.info cpu.architecture fry kernel +compiler.cfg.stacks.height compiler.cfg.stacks.local compiler.tree +compiler.tree.propagation.info cpu.architecture fry hashtables kernel locals make namespaces sequences system tools.test words ; IN: compiler.cfg.intrinsics.simd.tests @@ -34,8 +34,25 @@ IN: compiler.cfg.intrinsics.simd.tests } } } ; +: test-node-nonliteral-rep ( -- node ) + T{ #call + { in-d { 1 2 3 4 } } + { out-d { 5 } } + { info H{ + { 1 T{ value-info { class byte-array } } } + { 2 T{ value-info { class byte-array } } } + { 3 T{ value-info { class byte-array } } } + { 4 T{ value-info { class object } } } + { 5 T{ value-info { class byte-array } } } + } } + } ; + : test-compiler-env ( -- x ) H{ } clone + T{ basic-block { id 0 } } + [ \ basic-block pick set-at ] + [ 0 swap associate \ ds-heights pick set-at ] + [ 0 swap associate \ rs-heights pick set-at ] tri T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at H{ } clone \ local-peek-set pick set-at H{ } clone \ replace-mapping pick set-at @@ -61,6 +78,13 @@ IN: compiler.cfg.intrinsics.simd.tests ] with-variable ] make-classes ; inline +: test-emit-nonliteral-rep ( cpu quot -- node ) + [ + [ new \ cpu ] dip '[ + test-compiler-env [ test-node-nonliteral-rep @ ] bind + ] with-variable + ] make-classes ; inline + CONSTANT: signed-reps { char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep } CONSTANT: all-reps @@ -504,5 +528,19 @@ unit-test ! select [ { ##shuffle-vector-imm ##vector>scalar } ] -[ shuffle-imm-cpu int-4-rep [ emit-simd-select ] test-emit ] +[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ] unit-test + +! test with nonliteral/invalid reps +[ { ##inc-d ##branch } ] +[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ] +unit-test + +[ { ##branch } ] +[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ] +unit-test + +[ { ##branch } ] +[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ] +unit-test + From c07f3212f9f718a09ebf39037ce07af815bc0882 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 18 Nov 2009 22:18:42 -0800 Subject: [PATCH 23/46] fix effect of set-alien-vector --- basis/math/vectors/simd/simd.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 532e1a2232..84dfce065e 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -87,8 +87,8 @@ PRIVATE> PRIVATE> -: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; -: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ; +: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; +: set-alien-vector ( value c-ptr n rep -- ) \ set-alien-vector bad-simd-call ; Date: Thu, 19 Nov 2009 11:29:15 -0800 Subject: [PATCH 24/46] move methods back into simd functor so the intrinsics can compile --- basis/math/vectors/simd/simd.factor | 264 +++++++++++++--------------- 1 file changed, 127 insertions(+), 137 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 84dfce065e..5d96df8b7f 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -118,13 +118,61 @@ GENERIC: simd-element-type ( obj -- c-type ) GENERIC: simd-rep ( simd -- rep ) << +: assert-positive ( x -- y ) ; + : rep-length ( rep -- n ) 16 swap rep-component-type heap-size /i ; foldable +>> +<< > ] bi ; inline + +: v->v-op ( a rep quot: ( (a) rep -- (c) ) fallback-quot -- c ) + drop [ simd-unbox ] 2dip 2curry make-underlying ; inline + +: vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c ) + drop [ simd-unbox ] 3dip 3curry make-underlying ; inline + +: v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n ) + drop [ underlying>> ] 2dip call ; inline + +: (vv->v-op) ( a b rep quot: ( (a) (b) rep -- (c) ) -- c ) + [ [ simd-unbox ] [ underlying>> ] bi* ] 2dip 3curry make-underlying ; inline + +: (vv->n-op) ( a b rep quot: ( (a) (b) rep -- n ) -- n ) + [ [ underlying>> ] bi@ ] 2dip 3curry call ; inline + +: vv->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) + [ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline + +: vv'->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) + [ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors ; inline + +: vv->n-op ( a b rep quot: ( (a) (b) rep -- n ) fallback-quot -- n ) + [ '[ _ (vv->n-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline + +PRIVATE> +>> + +<< +A ] unless ; inline : A-with ( n -- v ) \ A new simd-with ; inline : A-cast ( v -- v' ) \ A new simd-cast ; inline +! SIMD vectors as sequences + +M: A hashcode* underlying>> hashcode* ; inline +M: A clone [ clone ] change-underlying ; inline +M: A length drop N ; inline +M: A nth-unsafe swap \ A-rep (simd-select) ; inline +M: A c:byte-length drop 16 ; inline + +M: A new-sequence + 2dup length = + [ nip [ 16 (byte-array) ] make-underlying ] + [ length bad-simd-length ] if ; inline + +M: A equal? + \ A [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline + +! SIMD primitive operations + +M: A v+ \ A [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline +M: A v- \ A [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline +M: A vneg \ A [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline +M: A v+- \ A [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline +M: A vs+ \ A [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline +M: A vs- \ A [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline +M: A vs* \ A [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline +M: A v* \ A [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline +M: A v/ \ A [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline +M: A vmin \ A [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline +M: A vmax \ A [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline +M: A v. \ A [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline +M: A vsqrt \ A [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline +M: A sum \ A [ (simd-sum) ] [ call-next-method ] v->n-op ; inline +M: A vabs \ A [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline +M: A vbitand \ A [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline +M: A vbitandn \ A [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline +M: A vbitor \ A [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline +M: A vbitxor \ A [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline +M: A vbitnot \ A [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline +M: A vand \ A [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline +M: A vandn \ A [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline +M: A vor \ A [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline +M: A vxor \ A [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline +M: A vnot \ A [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline +M: A vlshift \ A [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline +M: A vrshift \ A [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline +M: A hlshift \ A [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline +M: A hrshift \ A [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline +M: A vshuffle-elements \ A [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline +M: A vshuffle-bytes \ A [ (simd-vshuffle-bytes) ] [ call-next-method ] vv->v-op ; inline +M: A (vmerge-head) \ A [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline +M: A (vmerge-tail) \ A [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline +M: A v<= \ A [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline +M: A v< \ A [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline +M: A v= \ A [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline +M: A v> \ A [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline +M: A v>= \ A [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline +M: A vunordered? \ A [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline +M: A vany? \ A [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline +M: A vall? \ A [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline +M: A vnone? \ A [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline + +! SIMD high-level specializations + +M: A vbroadcast [ swap nth ] keep simd-with ; inline +M: A n+v [ simd-with ] keep v+ ; inline +M: A n-v [ simd-with ] keep v- ; inline +M: A n*v [ simd-with ] keep v* ; inline +M: A n/v [ simd-with ] keep v/ ; inline +M: A v+n over simd-with v+ ; inline +M: A v-n over simd-with v- ; inline +M: A v*n over simd-with v* ; inline +M: A v/n over simd-with v/ ; inline +M: A norm-sq dup v. assert-positive ; inline +M: A norm norm-sq sqrt ; inline +M: A distance v- norm ; inline + +! M: simd-128 >pprint-sequence ; +! M: simd-128 pprint* pprint-object ; + \ A-boa \ A new N { { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] } { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] } @@ -191,85 +318,8 @@ PRIVATE> >> -: assert-positive ( x -- y ) ; - -! SIMD vectors as sequences - -M: simd-128 hashcode* underlying>> hashcode* ; inline -M: simd-128 clone [ clone ] change-underlying ; inline -M: simd-128 length simd-rep rep-length ; inline -M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline -M: simd-128 c:byte-length drop 16 ; inline - -M: simd-128 new-sequence - 2dup length = - [ nip [ 16 (byte-array) ] make-underlying ] - [ length bad-simd-length ] if ; inline - -! M: simd-128 >pprint-sequence ; -! M: simd-128 pprint* pprint-object ; - INSTANCE: simd-128 sequence -! Unboxers for SIMD operations -<< -> ] [ simd-rep ] tri ; inline - -: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c ) - [ simd-unbox ] dip 2curry make-underlying ; inline - -: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c ) - [ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline - -: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n ) - [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline - -: ((simd-vv->v-op)) ( a b quot: ( (a) (b) rep -- (c) ) -- c ) - [ simd-unbox ] [ underlying>> swap ] [ 3curry ] tri* make-underlying ; inline - -: ((simd-vv->n-op)) ( a b quot: ( (a) (b) rep -- n ) -- n ) - [ [ underlying>> ] [ simd-rep ] bi ] - [ underlying>> swap ] [ ] tri* call ; inline - -: (simd-vv->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) - [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors-match ; inline - -: (simd-vv'->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c ) - [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors ; inline - -: (simd-vv->n-op) ( a b quot: ( (a) (b) rep -- n ) fallback-quot -- n ) - [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline - -: (simd-method-fallback) ( accum word -- accum ) - [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ] - dip suffix! ; - -SYNTAX: simd-vv->v-op - \ (simd-vv->v-op) (simd-method-fallback) ; -SYNTAX: simd-vv'->v-op - \ (simd-vv'->v-op) (simd-method-fallback) ; -SYNTAX: simd-vv->n-op - \ (simd-vv->n-op) (simd-method-fallback) ; - -PRIVATE> ->> - -M: simd-128 equal? - [ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline - ! SIMD constructors : simd-with ( n seq -- v ) @@ -285,66 +335,6 @@ MACRO: simd-boa ( class -- ) : simd-cast ( v seq -- v' ) [ underlying>> ] dip new-underlying ; inline -! SIMD primitive operations - -M: simd-128 v+ [ (simd-v+) ] simd-vv->v-op ; inline -M: simd-128 v- [ (simd-v-) ] simd-vv->v-op ; inline -M: simd-128 vneg [ (simd-vneg) ] simd-v->v-op ; inline -M: simd-128 v+- [ (simd-v+-) ] simd-vv->v-op ; inline -M: simd-128 vs+ [ (simd-vs+) ] simd-vv->v-op ; inline -M: simd-128 vs- [ (simd-vs-) ] simd-vv->v-op ; inline -M: simd-128 vs* [ (simd-vs*) ] simd-vv->v-op ; inline -M: simd-128 v* [ (simd-v*) ] simd-vv->v-op ; inline -M: simd-128 v/ [ (simd-v/) ] simd-vv->v-op ; inline -M: simd-128 vmin [ (simd-vmin) ] simd-vv->v-op ; inline -M: simd-128 vmax [ (simd-vmax) ] simd-vv->v-op ; inline -M: simd-128 v. [ (simd-v.) ] simd-vv->n-op ; inline -M: simd-128 vsqrt [ (simd-vsqrt) ] simd-v->v-op ; inline -M: simd-128 sum [ (simd-sum) ] simd-v->n-op ; inline -M: simd-128 vabs [ (simd-vabs) ] simd-v->v-op ; inline -M: simd-128 vbitand [ (simd-vbitand) ] simd-vv->v-op ; inline -M: simd-128 vbitandn [ (simd-vbitandn) ] simd-vv->v-op ; inline -M: simd-128 vbitor [ (simd-vbitor) ] simd-vv->v-op ; inline -M: simd-128 vbitxor [ (simd-vbitxor) ] simd-vv->v-op ; inline -M: simd-128 vbitnot [ (simd-vbitnot) ] simd-v->v-op ; inline -M: simd-128 vand [ (simd-vand) ] simd-vv->v-op ; inline -M: simd-128 vandn [ (simd-vandn) ] simd-vv->v-op ; inline -M: simd-128 vor [ (simd-vor) ] simd-vv->v-op ; inline -M: simd-128 vxor [ (simd-vxor) ] simd-vv->v-op ; inline -M: simd-128 vnot [ (simd-vnot) ] simd-v->v-op ; inline -M: simd-128 vlshift [ (simd-vlshift) ] simd-vn->v-op ; inline -M: simd-128 vrshift [ (simd-vrshift) ] simd-vn->v-op ; inline -M: simd-128 hlshift [ (simd-hlshift) ] simd-vn->v-op ; inline -M: simd-128 hrshift [ (simd-hrshift) ] simd-vn->v-op ; inline -M: simd-128 vshuffle-elements [ (simd-vshuffle-elements) ] simd-vn->v-op ; inline -M: simd-128 vshuffle-bytes [ (simd-vshuffle-bytes) ] simd-vv->v-op ; inline -M: simd-128 (vmerge-head) [ (simd-vmerge-head) ] simd-vv->v-op ; inline -M: simd-128 (vmerge-tail) [ (simd-vmerge-tail) ] simd-vv->v-op ; inline -M: simd-128 v<= [ (simd-v<=) ] simd-vv->v-op ; inline -M: simd-128 v< [ (simd-v<) ] simd-vv->v-op ; inline -M: simd-128 v= [ (simd-v=) ] simd-vv->v-op ; inline -M: simd-128 v> [ (simd-v>) ] simd-vv->v-op ; inline -M: simd-128 v>= [ (simd-v>=) ] simd-vv->v-op ; inline -M: simd-128 vunordered? [ (simd-vunordered?) ] simd-vv->v-op ; inline -M: simd-128 vany? [ (simd-vany?) ] simd-v->n-op ; inline -M: simd-128 vall? [ (simd-vall?) ] simd-v->n-op ; inline -M: simd-128 vnone? [ (simd-vnone?) ] simd-v->n-op ; inline - -! SIMD high-level specializations - -M: simd-128 vbroadcast [ swap nth ] keep simd-with ; inline -M: simd-128 n+v [ simd-with ] keep v+ ; inline -M: simd-128 n-v [ simd-with ] keep v- ; inline -M: simd-128 n*v [ simd-with ] keep v* ; inline -M: simd-128 n/v [ simd-with ] keep v/ ; inline -M: simd-128 v+n over simd-with v+ ; inline -M: simd-128 v-n over simd-with v- ; inline -M: simd-128 v*n over simd-with v* ; inline -M: simd-128 v/n over simd-with v/ ; inline -M: simd-128 norm-sq dup v. assert-positive ; inline -M: simd-128 norm norm-sq sqrt ; inline -M: simd-128 distance v- norm ; inline - ! SIMD instances SIMD-128: char-16 From 152b0d2df59a74a115a18444cc8b79fcb949c1a3 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 19 Nov 2009 11:53:46 -0800 Subject: [PATCH 25/46] break simd intrinsics into a separate vocab so they can be intrinsified before the simd methods compile --- .../compiler/cfg/intrinsics/simd/simd.factor | 2 +- .../cfg/value-numbering/simd/simd.factor | 2 +- .../tree/propagation/simd/simd.factor | 2 +- basis/cpu/architecture/architecture.factor | 5 +- basis/math/vectors/simd/simd.factor | 101 ++++-------------- 5 files changed, 27 insertions(+), 85 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 54f105ca02..5f4b71a846 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.c-types byte-arrays fry classes.algebra cpu.architecture kernel math sequences -math.vectors math.vectors.simd math.vectors.simd.private +math.vectors math.vectors.simd.intrinsics macros generalizations combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks diff --git a/basis/compiler/cfg/value-numbering/simd/simd.factor b/basis/compiler/cfg/value-numbering/simd/simd.factor index 9bb4453313..16d38bc5bb 100644 --- a/basis/compiler/cfg/value-numbering/simd/simd.factor +++ b/basis/compiler/cfg/value-numbering/simd/simd.factor @@ -4,7 +4,7 @@ USING: accessors combinators combinators.short-circuit arrays fry kernel layouts math namespaces sequences cpu.architecture math.bitwise math.order classes vectors locals make alien.c-types io.binary grouping -math.vectors.simd +math.vectors.simd.intrinsics compiler.cfg compiler.cfg.registers compiler.cfg.comparisons diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index b71a34e938..679cc09273 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math -math.intervals math.vectors.simd math.vectors.simd.private ; +math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd { diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index f4d150033b..86d68caa66 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2006, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays assocs generic kernel kernel.private +USING: accessors alien.c-types arrays assocs generic kernel kernel.private math memory namespaces make sequences layouts system hashtables classes alien byte-arrays combinators words sets fry ; IN: cpu.architecture @@ -160,6 +160,9 @@ M: ulonglong-scalar-rep rep-size drop 8 ; GENERIC: rep-component-type ( rep -- n ) +: rep-length ( rep -- n ) + 16 swap rep-component-type heap-size /i ; foldable + ! Methods defined in alien.c-types GENERIC: scalar-rep-of ( rep -- rep' ) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 5d96df8b7f..9265df7343 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,18 +1,12 @@ USING: accessors alien.c-types arrays byte-arrays classes combinators cpu.architecture effects fry functors generalizations generic generic.parser kernel lexer literals macros math math.functions -math.vectors math.vectors.private namespaces parser +math.vectors math.vectors.private math.vectors.simd.intrinsics namespaces parser prettyprint.custom quotations sequences sequences.private vocabs vocabs.loader words ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd -DEFER: vconvert -DEFER: simd-with -DEFER: simd-boa -DEFER: simd-cast - -ERROR: bad-simd-call word ; ERROR: bad-simd-length got expected ; << @@ -30,68 +24,6 @@ PRIVATE> ) ( a b rep -- c ) \ v> bad-simd-call ; -: (simd-v>=) ( a b rep -- c ) \ v>= bad-simd-call ; -: (simd-vunordered?) ( a b rep -- c ) \ vunordered? bad-simd-call ; -: (simd-vany?) ( a rep -- ? ) \ vany? bad-simd-call ; -: (simd-vall?) ( a rep -- ? ) \ vall? bad-simd-call ; -: (simd-vnone?) ( a rep -- ? ) \ vnone? bad-simd-call ; -: (simd-v>float) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-v>integer) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-vpack-signed) ( a b rep -- c ) \ vconvert bad-simd-call ; -: (simd-vpack-unsigned) ( a b rep -- c ) \ vconvert bad-simd-call ; -: (simd-vunpack-head) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-vunpack-tail) ( a rep -- c ) \ vconvert bad-simd-call ; -: (simd-with) ( n rep -- v ) \ simd-with bad-simd-call ; -: (simd-gather-2) ( m n rep -- v ) \ simd-boa bad-simd-call ; -: (simd-gather-4) ( m n o p rep -- v ) \ simd-boa bad-simd-call ; -: (simd-select) ( a n rep -- n ) \ nth bad-simd-call ; - -PRIVATE> - -: alien-vector ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ; -: set-alien-vector ( value c-ptr n rep -- ) \ set-alien-vector bad-simd-call ; - -> - << A ] unless ; inline M: A hashcode* underlying>> hashcode* ; inline M: A clone [ clone ] change-underlying ; inline M: A length drop N ; inline -M: A nth-unsafe swap \ A-rep (simd-select) ; inline +M: A nth-unsafe + swap { + { 0 [ 0 \ A-rep (simd-select) ] } + { 1 [ 1 \ A-rep (simd-select) ] } + { 2 [ 2 \ A-rep (simd-select) ] } + { 3 [ 3 \ A-rep (simd-select) ] } + { 4 [ 4 \ A-rep (simd-select) ] } + { 5 [ 5 \ A-rep (simd-select) ] } + { 6 [ 6 \ A-rep (simd-select) ] } + { 7 [ 7 \ A-rep (simd-select) ] } + { 8 [ 8 \ A-rep (simd-select) ] } + { 9 [ 9 \ A-rep (simd-select) ] } + { 10 [ 10 \ A-rep (simd-select) ] } + { 11 [ 11 \ A-rep (simd-select) ] } + { 12 [ 12 \ A-rep (simd-select) ] } + { 13 [ 13 \ A-rep (simd-select) ] } + { 14 [ 14 \ A-rep (simd-select) ] } + { 15 [ 15 \ A-rep (simd-select) ] } + } case ; inline M: A c:byte-length drop 16 ; inline M: A new-sequence @@ -353,10 +296,6 @@ SIMD-128: double-2 M: simd-128 vshuffle ( u perm -- v ) vshuffle-bytes ; inline -"compiler.tree.propagation.simd" require -"compiler.cfg.intrinsics.simd" require -"compiler.cfg.value-numbering.simd" require - "mirrors" vocab [ "math.vectors.simd.mirrors" require ] when From 7f546daead278cf3b293dd24b57c9321bd45996b Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 19 Nov 2009 12:15:24 -0800 Subject: [PATCH 26/46] fill in longlong cases for %scalar>integer (won't work on 32bit yet!) --- basis/cpu/x86/x86.factor | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index a55489dcee..b752935f9c 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1323,8 +1323,15 @@ M: x86 %shr-vector-imm-reps %shr-vector-reps ; M: x86 %integer>scalar drop MOVD ; +! XXX the longlong versions won't work on x86.32 M:: x86 %scalar>integer ( dst src rep -- ) rep { + { longlong-scalar-rep [ + dst src MOVD + ] } + { ulonglong-scalar-rep [ + dst src MOVD + ] } { int-scalar-rep [ dst 32-bit-version-of src MOVD dst dst 32-bit-version-of From 6794d1465245b3d6323b559504e9aad18601d21b Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 19 Nov 2009 12:17:14 -0800 Subject: [PATCH 27/46] typo --- basis/math/vectors/simd/simd.factor | 84 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 9265df7343..0e40e787df 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -172,48 +172,48 @@ M: A equal? ! SIMD primitive operations -M: A v+ \ A [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline -M: A v- \ A [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline -M: A vneg \ A [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline -M: A v+- \ A [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline -M: A vs+ \ A [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline -M: A vs- \ A [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline -M: A vs* \ A [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline -M: A v* \ A [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline -M: A v/ \ A [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline -M: A vmin \ A [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline -M: A vmax \ A [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline -M: A v. \ A [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline -M: A vsqrt \ A [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline -M: A sum \ A [ (simd-sum) ] [ call-next-method ] v->n-op ; inline -M: A vabs \ A [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline -M: A vbitand \ A [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline -M: A vbitandn \ A [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline -M: A vbitor \ A [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline -M: A vbitxor \ A [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline -M: A vbitnot \ A [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline -M: A vand \ A [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline -M: A vandn \ A [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline -M: A vor \ A [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline -M: A vxor \ A [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline -M: A vnot \ A [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline -M: A vlshift \ A [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline -M: A vrshift \ A [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline -M: A hlshift \ A [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline -M: A hrshift \ A [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline -M: A vshuffle-elements \ A [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline -M: A vshuffle-bytes \ A [ (simd-vshuffle-bytes) ] [ call-next-method ] vv->v-op ; inline -M: A (vmerge-head) \ A [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline -M: A (vmerge-tail) \ A [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline -M: A v<= \ A [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline -M: A v< \ A [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline -M: A v= \ A [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline -M: A v> \ A [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline -M: A v>= \ A [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline -M: A vunordered? \ A [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline -M: A vany? \ A [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline -M: A vall? \ A [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline -M: A vnone? \ A [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline +M: A v+ \ A-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline +M: A v- \ A-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline +M: A vneg \ A-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline +M: A v+- \ A-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline +M: A vs+ \ A-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline +M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline +M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline +M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline +M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline +M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline +M: A vmax \ A-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline +M: A v. \ A-rep [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline +M: A vsqrt \ A-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline +M: A sum \ A-rep [ (simd-sum) ] [ call-next-method ] v->n-op ; inline +M: A vabs \ A-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline +M: A vbitand \ A-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline +M: A vbitandn \ A-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline +M: A vbitor \ A-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline +M: A vbitxor \ A-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline +M: A vbitnot \ A-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline +M: A vand \ A-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline +M: A vandn \ A-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline +M: A vor \ A-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline +M: A vxor \ A-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline +M: A vnot \ A-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline +M: A vlshift \ A-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline +M: A vrshift \ A-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline +M: A hlshift \ A-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline +M: A hrshift \ A-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline +M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline +M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv->v-op ; inline +M: A (vmerge-head) \ A-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline +M: A (vmerge-tail) \ A-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline +M: A v<= \ A-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline +M: A v< \ A-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline +M: A v= \ A-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline +M: A v> \ A-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline +M: A v>= \ A-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline +M: A vunordered? \ A-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline +M: A vany? \ A-rep [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline +M: A vall? \ A-rep [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline +M: A vnone? \ A-rep [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline ! SIMD high-level specializations From d94ffe6d786eaff0c2497e303826ded907b80e53 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 23 Nov 2009 21:24:55 -0800 Subject: [PATCH 28/46] scalar fallbacks for simd intrinsics --- .../tree/propagation/simd/simd.factor | 133 +++++----- basis/cpu/architecture/architecture.factor | 15 +- .../vectors/simd/intrinsics/intrinsics.factor | 232 ++++++++++++++++++ basis/math/vectors/simd/simd-docs.factor | 3 +- basis/math/vectors/simd/simd.factor | 3 + 5 files changed, 327 insertions(+), 59 deletions(-) create mode 100644 basis/math/vectors/simd/intrinsics/intrinsics.factor diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 679cc09273..93eb2a8ecc 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,57 +1,76 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators fry sequences -compiler.tree.propagation.info cpu.architecture kernel words math +USING: accessors byte-arrays combinators continuations fry sequences +compiler.tree.propagation.info cpu.architecture kernel words make math math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd -{ - (simd-v+) - (simd-v-) - (simd-vneg) - (simd-vabs) - (simd-v+-) - (simd-v*) - (simd-v/) - (simd-vmin) - (simd-vmax) - (simd-sum) - (simd-vsqrt) - (simd-vbitand) - (simd-vbitandn) - (simd-vbitor) - (simd-vbitxor) - (simd-vbitnot) - (simd-vand) - (simd-vandn) - (simd-vor) - (simd-vxor) - (simd-vnot) - (simd-vlshift) - (simd-vrshift) - (simd-hlshift) - (simd-hrshift) - (simd-vshuffle-bytes) - (simd-vshuffle-elements) - (simd-vmerge-head) - (simd-vmerge-tail) - (simd-v>float) - (simd-v>integer) - (simd-vpack-signed) - (simd-vpack-unsigned) - (simd-vunpack-head) - (simd-vunpack-tail) - (simd-v<=) - (simd-v<) - (simd-v=) - (simd-v>) - (simd-v>=) - (simd-vunordered?) - (simd-with) - (simd-gather-2) - (simd-gather-4) - alien-vector -} [ { byte-array } "default-output-classes" set-word-prop ] each +CONSTANT: vector>vector-intrinsics + { + (simd-v+) + (simd-v-) + (simd-vneg) + (simd-v+-) + (simd-vs+) + (simd-vs-) + (simd-vs*) + (simd-v*) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-vsqrt) + (simd-vabs) + (simd-vbitand) + (simd-vbitandn) + (simd-vbitor) + (simd-vbitxor) + (simd-vbitnot) + (simd-vand) + (simd-vandn) + (simd-vor) + (simd-vxor) + (simd-vnot) + (simd-vlshift) + (simd-vrshift) + (simd-hlshift) + (simd-hrshift) + (simd-vshuffle-elements) + (simd-vshuffle-bytes) + (simd-vmerge-head) + (simd-vmerge-tail) + (simd-v<=) + (simd-v<) + (simd-v=) + (simd-v>) + (simd-v>=) + (simd-vunordered?) + (simd-v>float) + (simd-v>integer) + (simd-vpack-signed) + (simd-vpack-unsigned) + (simd-vunpack-head) + (simd-vunpack-tail) + (simd-with) + (simd-gather-2) + (simd-gather-4) + alien-vector + } + +CONSTANT: vector-other-intrinsics + { + (simd-v.) + (simd-sum) + (simd-vany?) + (simd-vall?) + (simd-vnone?) + (simd-select) + set-alien-vector + } + +: vector-intrinsics ( -- x ) + vector>vector-intrinsics vector-other-intrinsics append ; + +vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each : scalar-output-class ( rep -- class ) dup literal?>> [ @@ -79,12 +98,16 @@ IN: compiler.tree.propagation.simd real [0,inf] value-info-intersect ] "outputs" set-word-prop -! If SIMD is not available, inline alien-vector and set-alien-vector -! to get a speedup +: try-intrinsic ( node intrinsic-quot -- ? ) + '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ; + : inline-unless-intrinsic ( word -- ) - dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ] + dup '[ + _ swap over "intrinsic" word-prop + ! word node intrinsic + [ try-intrinsic [ drop f ] [ def>> ] if ] + [ def>> ] if* + ] "custom-inlining" set-word-prop ; -\ alien-vector inline-unless-intrinsic - -\ set-alien-vector inline-unless-intrinsic +vector-intrinsics [ inline-unless-intrinsic ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 86d68caa66..6631cec189 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -105,7 +105,7 @@ scalar-rep ; { ushort-scalar-rep short-scalar-rep } { uint-scalar-rep int-scalar-rep } { ulonglong-scalar-rep longlong-scalar-rep } - } ?at drop ; + } ?at drop ; foldable : widen-vector-rep ( rep -- rep' ) { @@ -116,7 +116,18 @@ scalar-rep ; { ushort-8-rep uint-4-rep } { uint-4-rep ulonglong-2-rep } { float-4-rep double-2-rep } - } at ; + } at ; foldable + +: narrow-vector-rep ( rep -- rep' ) + { + { short-8-rep char-16-rep } + { int-4-rep short-8-rep } + { longlong-2-rep int-4-rep } + { ushort-8-rep uchar-16-rep } + { uint-4-rep ushort-8-rep } + { ulonglong-2-rep uint-4-rep } + { double-2-rep float-4-rep } + } at ; foldable ! Register classes SINGLETONS: int-regs float-regs ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor new file mode 100644 index 0000000000..a236db00c9 --- /dev/null +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -0,0 +1,232 @@ +! (c)2009 Slava Pestov, Joe Groff bsd license +USING: accessors alien alien.c-types alien.data combinators +cords cpu.architecture fry generalizations kernel libc locals +math math.libm math.order math.ranges math.vectors sequences +sequences.private specialized-arrays vocabs.loader ; +QUALIFIED-WITH: alien.c-types c +SPECIALIZED-ARRAYS: + c:char c:short c:int c:longlong + c:uchar c:ushort c:uint c:ulonglong + c:float c:double ; +IN: math.vectors.simd.intrinsics + +: assert-positive ( x -- y ) ; + +bitwise-vector-rep ( rep -- rep' ) + { + { float-4-rep [ uint-4-rep ] } + { double-2-rep [ ulonglong-2-rep ] } + [ ] + } case ; foldable + +: >uint-vector-rep ( rep -- rep' ) + { + { longlong-2-rep [ ulonglong-2-rep ] } + { int-4-rep [ uint-4-rep ] } + { short-8-rep [ ushort-8-rep ] } + { char-16-rep [ uchar-16-rep ] } + [ ] + } case ; foldable + +: >int-vector-rep ( rep -- rep' ) + { + { float-4-rep [ int-4-rep ] } + { double-2-rep [ longlong-2-rep ] } + } case ; foldable + +: >float-vector-rep ( rep -- rep' ) + { + { int-4-rep [ float-4-rep ] } + { longlong-2-rep [ double-2-rep ] } + } case ; foldable + +: [byte>rep-array] ( rep -- class ) + { + { char-16-rep [ [ byte-array>char-array ] ] } + { uchar-16-rep [ [ byte-array>uchar-array ] ] } + { short-8-rep [ [ byte-array>short-array ] ] } + { ushort-8-rep [ [ byte-array>ushort-array ] ] } + { int-4-rep [ [ byte-array>int-array ] ] } + { uint-4-rep [ [ byte-array>uint-array ] ] } + { longlong-2-rep [ [ byte-array>longlong-array ] ] } + { ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] } + { float-4-rep [ [ byte-array>float-array ] ] } + { double-2-rep [ [ byte-array>double-array ] ] } + } case ; foldable + +: [>rep-array] ( rep -- class ) + { + { char-16-rep [ [ >char-array ] ] } + { uchar-16-rep [ [ >uchar-array ] ] } + { short-8-rep [ [ >short-array ] ] } + { ushort-8-rep [ [ >ushort-array ] ] } + { int-4-rep [ [ >int-array ] ] } + { uint-4-rep [ [ >uint-array ] ] } + { longlong-2-rep [ [ >longlong-array ] ] } + { ulonglong-2-rep [ [ >ulonglong-array ] ] } + { float-4-rep [ [ >float-array ] ] } + { double-2-rep [ [ >double-array ] ] } + } case ; foldable + +: [] ( rep -- class ) + { + { char-16-rep [ [ 16 (char-array) ] ] } + { uchar-16-rep [ [ 16 (uchar-array) ] ] } + { short-8-rep [ [ 8 (short-array) ] ] } + { ushort-8-rep [ [ 8 (ushort-array) ] ] } + { int-4-rep [ [ 4 (int-array) ] ] } + { uint-4-rep [ [ 4 (uint-array) ] ] } + { longlong-2-rep [ [ 2 (longlong-array) ] ] } + { ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] } + { float-4-rep [ [ 4 (float-array) ] ] } + { double-2-rep [ [ 2 (double-array) ] ] } + } case ; foldable + +: rep-tf-values ( rep -- t f ) + float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ; + +: >rep-array ( a rep -- a' ) + [byte>rep-array] call( a -- a' ) ; inline +: 2>rep-array ( a b rep -- a' b' ) + [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline +: ( rep -- a' ) + [] call( -- a' ) ; inline + +: components-map ( a rep quot -- c ) + [ >rep-array ] dip map underlying>> ; inline +: components-2map ( a b rep quot -- c ) + [ 2>rep-array ] dip 2map underlying>> ; inline +: components-reduce ( a rep quot -- x ) + [ >rep-array [ ] ] dip map-reduce ; inline + +: bitwise-components-map ( a rep quot -- c ) + [ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline +: bitwise-components-2map ( a b rep quot -- c ) + [ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline +: bitwise-components-reduce ( a rep quot -- x ) + [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline + +:: (vshuffle) ( a elts rep -- c ) + a rep >rep-array :> a' + rep :> c' + elts [| from to | + from a' nth-unsafe + rep rep-length 1 - bitand + to c' set-nth-unsafe + ] each-index + c' underlying>> ; inline + +PRIVATE> + +: (simd-v+) ( a b rep -- c ) [ + ] components-2map ; +: (simd-v-) ( a b rep -- c ) [ - ] components-2map ; +: (simd-vneg) ( a rep -- c ) [ neg ] components-map ; +:: (simd-v+-) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + 0 rep length 1 - 2 [| n | + n a' nth-unsafe n b' nth-unsafe - + n c' set-nth-unsafe + + n 1 + a' nth-unsafe n 1 + b' nth-unsafe + + n 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +: (simd-vs+) ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ; +: (simd-vs-) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; +: (simd-vs*) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; +: (simd-v*) ( a b rep -- c ) [ * ] components-2map ; +: (simd-v/) ( a b rep -- c ) [ / ] components-2map ; +: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ; +: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ; +: (simd-v.) ( a b rep -- n ) + [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep + 1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ; +: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ; +: (simd-sum) ( a rep -- n ) [ + ] components-reduce ; +: (simd-vabs) ( a rep -- c ) [ abs ] components-map ; +: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; +: (simd-vbitandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; +: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; +: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; +: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; +: (simd-vandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; +: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; +: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; +: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ; +: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ; +: (simd-hlshift) ( a n rep -- c ) + drop tail-slice 16 0 pad-tail ; +: (simd-hrshift) ( a n rep -- c ) + drop head-slice 16 0 pad-head ; +: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ; +: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ; +:: (simd-vmerge-head) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + rep rep-length 2 /i iota [| n | + n a' nth-unsafe n 2 * c' set-nth-unsafe + n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +:: (simd-vmerge-tail) ( a b rep -- c ) + a b rep 2>rep-array :> ( a' b' ) + rep :> c' + rep rep-length 2 /i :> len + len iota [| n | + n len + a' nth-unsafe n 2 * c' set-nth-unsafe + n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe + ] each + c' underlying>> ; +: (simd-v<=) ( a b rep -- c ) + dup rep-tf-values '[ <= _ _ ? ] components-2map ; +: (simd-v<) ( a b rep -- c ) + dup rep-tf-values '[ < _ _ ? ] components-2map ; +: (simd-v=) ( a b rep -- c ) + dup rep-tf-values '[ = _ _ ? ] components-2map ; +: (simd-v>) ( a b rep -- c ) + dup rep-tf-values '[ > _ _ ? ] components-2map ; +: (simd-v>=) ( a b rep -- c ) + dup rep-tf-values '[ >= _ _ ? ] components-2map ; +: (simd-vunordered?) ( a b rep -- c ) + dup rep-tf-values '[ unordered? _ _ ? ] components-2map ; +: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ; +: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ; +: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ; +: (simd-v>float) ( a rep -- c ) + [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ; +: (simd-v>integer) ( a rep -- c ) + [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ; +: (simd-vpack-signed) ( a b rep -- c ) + [ 2>rep-array cord-append ] + [ narrow-vector-rep [ [] ] [ rep-component-type ] bi ] bi + '[ _ c-type-clamp ] swap map-as ; +: (simd-vpack-unsigned) ( a b rep -- c ) + [ 2>rep-array cord-append ] + [ narrow-vector-rep >uint-vector-rep [ [] ] [ rep-component-type ] bi ] bi + '[ _ c-type-clamp ] swap map-as ; +: (simd-vunpack-head) ( a rep -- c ) + [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi + [ head-slice ] dip call( a' -- c' ) underlying>> ; +: (simd-vunpack-tail) ( a rep -- c ) + [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi + [ tail-slice ] dip call( a' -- c' ) underlying>> ; +: (simd-with) ( n rep -- v ) + [ rep-length iota swap '[ _ ] ] [ ] bi replicate-as ; +: (simd-gather-2) ( m n rep -- v ) [ 2 set-firstn ] keep underlying>> ; +: (simd-gather-4) ( m n o p rep -- v ) [ 4 set-firstn ] keep underlying>> ; +: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ; + +: alien-vector ( c-ptr n rep -- value ) + [ swap ] dip rep-size memory>byte-array ; +: set-alien-vector ( value c-ptr n rep -- ) + [ swap swap ] dip rep-size memcpy ; + +"compiler.cfg.intrinsics.simd" require +"compiler.tree.propagation.simd" require +"compiler.cfg.value-numbering.simd" require + diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d600b0bc24..8aeea4267d 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,5 +1,5 @@ USING: classes.tuple.private cpu.architecture help.markup -help.syntax kernel.private math math.vectors +help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics sequences ; IN: math.vectors.simd @@ -163,7 +163,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" { $list "They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result." "They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor." - { "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." } } "The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD." $nl diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 0e40e787df..5289f3f393 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -95,6 +95,9 @@ DEFER: simd-construct-op PRIVATE> >> +DEFER: simd-with +DEFER: simd-cast + << Date: Tue, 24 Nov 2009 11:37:28 -0800 Subject: [PATCH 29/46] fix simd intrinsic compilation --- basis/compiler/cfg/builder/builder.factor | 6 ++++++ .../cfg/intrinsics/simd/backend/backend.factor | 4 +++- basis/compiler/cfg/intrinsics/simd/simd.factor | 16 ++++++++-------- basis/compiler/tree/propagation/simd/simd.factor | 16 ++++++++++++---- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/basis/compiler/cfg/builder/builder.factor b/basis/compiler/cfg/builder/builder.factor index cf6215c5cd..f1b3447fc7 100755 --- a/basis/compiler/cfg/builder/builder.factor +++ b/basis/compiler/cfg/builder/builder.factor @@ -45,6 +45,12 @@ SYMBOL: loops end-stack-analysis ] with-scope ; inline +: with-dummy-cfg-builder ( node quot -- ) + [ + [ V{ } clone procedures ] 2dip + '[ _ t t [ _ call( node -- ) ] with-cfg-builder ] with-variable + ] { } make drop ; + GENERIC: emit-node ( node -- ) : emit-nodes ( nodes -- ) diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor index 1a229672b9..cf61a560d2 100644 --- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor +++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor @@ -151,6 +151,8 @@ MACRO: check-elements ( quots -- ) [ length 1 - \ and [ ] like ] tri 3append ; +ERROR: bad-simd-intrinsic node ; + MACRO: if-literals-match ( quots -- ) [ length ] [ ] [ length ] tri ! n quots n @@ -165,7 +167,7 @@ MACRO: if-literals-match ( quots -- ) ! node literals quot [ _ firstn ] dip call drop - ] [ 2drop emit-primitive ] if + ] [ 2drop bad-simd-intrinsic ] if ] ; CONSTANT: [unary] [ ds-drop ds-pop ] diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 5f4b71a846..a96a0b7cb3 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -77,14 +77,6 @@ IN: compiler.cfg.intrinsics.simd [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] } v-vector-op ; -:: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) - cc order-cc { - { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] } - { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^^compare-vector ] } - { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] } - { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^^compare-vector ] } - } case ; - :: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst ) {cc,swap} first2 :> ( cc swap? ) swap? @@ -107,6 +99,14 @@ IN: compiler.cfg.intrinsics.simd not? [ rep ^not-vector ] when ] if ; +:: ^minmax-compare-vector ( src1 src2 rep cc -- dst ) + cc order-cc { + { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] } + { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] } + { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] } + { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] } + } case ; + : ^compare-vector ( src1 src2 rep cc -- dst ) { [ ^(compare-vector) ] diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 93eb2a8ecc..6002b15c1c 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators continuations fry sequences -compiler.tree.propagation.info cpu.architecture kernel words make math -math.intervals math.vectors.simd.intrinsics ; +USING: accessors assocs byte-arrays combinators compiler.cfg.builder +continuations fry sequences compiler.tree.propagation.info +cpu.architecture kernel words make math math.intervals +math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd CONSTANT: vector>vector-intrinsics @@ -98,8 +99,15 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop real [0,inf] value-info-intersect ] "outputs" set-word-prop +: clone-with-value-infos ( node -- node' ) + clone dup in-d>> [ dup value-info ] H{ } map>assoc >>info ; + : try-intrinsic ( node intrinsic-quot -- ? ) - '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ; + '[ + _ clone-with-value-infos + _ with-dummy-cfg-builder + t + ] [ drop f ] recover ; : inline-unless-intrinsic ( word -- ) dup '[ From 9c388bf7813cf2f291c0eea22a24cd796e2ce8b0 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 12:50:27 -0800 Subject: [PATCH 30/46] update compiler.cfg.intrinsics.simd tests --- .../cfg/intrinsics/simd/simd-tests.factor | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor index 2d38e541a4..c2e233902e 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor @@ -1,10 +1,11 @@ ! (c)2009 Joe Groff bsd license USING: arrays assocs biassocs byte-arrays byte-arrays.hex classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions -compiler.cfg.intrinsics.simd compiler.cfg.registers -compiler.cfg.stacks.height compiler.cfg.stacks.local compiler.tree -compiler.tree.propagation.info cpu.architecture fry hashtables kernel -locals make namespaces sequences system tools.test words ; +compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend +compiler.cfg.registers compiler.cfg.stacks.height +compiler.cfg.stacks.local compiler.tree compiler.tree.propagation.info +cpu.architecture fry hashtables kernel locals make namespaces sequences +system tools.test words ; IN: compiler.cfg.intrinsics.simd.tests :: test-node ( rep -- node ) @@ -532,15 +533,12 @@ unit-test unit-test ! test with nonliteral/invalid reps -[ { ##inc-d ##branch } ] [ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ] -unit-test +[ bad-simd-intrinsic? ] must-fail-with -[ { ##branch } ] [ simple-ops-cpu f [ emit-simd-v+ ] test-emit ] -unit-test +[ bad-simd-intrinsic? ] must-fail-with -[ { ##branch } ] [ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ] -unit-test +[ bad-simd-intrinsic? ] must-fail-with From c98eb8494337e128263b7f8cdbf0fa0db2011623 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 18:30:12 -0800 Subject: [PATCH 31/46] make math.vectors.simd tests pass again --- .../compiler/cfg/intrinsics/simd/simd.factor | 9 +- .../vectors/simd/intrinsics/intrinsics.factor | 28 +++--- basis/math/vectors/simd/simd-tests.factor | 9 +- basis/math/vectors/simd/simd.factor | 91 +++++++------------ basis/math/vectors/vectors.factor | 6 -- 5 files changed, 57 insertions(+), 86 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index a96a0b7cb3..109ac6ce8e 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -253,14 +253,15 @@ IN: compiler.cfg.intrinsics.simd src rep ^unpack-vector-head :> head src rep ^unpack-vector-tail :> tail rep widen-vector-rep :> wide-rep - head tail wide-rep ^^add-vector wide-rep ^(sum-vector) + head tail wide-rep ^^add-vector wide-rep + ^(sum-vector) ] } } v-vector-op ; : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; -: ^shuffle-vector-imm ( src1 src2 rep -- dst ) - { +: ^shuffle-vector-imm ( src1 shuffle rep -- dst ) + [ rep-length 0 pad-tail ] keep { [ ^^shuffle-vector-imm ] [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ] } vl-vector-op ; @@ -358,7 +359,7 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-v. ( node -- ) { [ ^^dot-vector ] - [ [ ^^mul-vector ] [ ^sum-vector ] bi ] + { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] } } emit-vv-vector-op ; : emit-simd-vsqrt ( node -- ) diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index a236db00c9..187c6db586 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -112,8 +112,8 @@ IN: math.vectors.simd.intrinsics a rep >rep-array :> a' rep :> c' elts [| from to | - from a' nth-unsafe - rep rep-length 1 - bitand + from rep rep-length 1 - bitand + a' nth-unsafe to c' set-nth-unsafe ] each-index c' underlying>> ; inline @@ -134,9 +134,12 @@ PRIVATE> n 1 + c' set-nth-unsafe ] each c' underlying>> ; -: (simd-vs+) ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ; -: (simd-vs-) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; -: (simd-vs*) ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ; +: (simd-vs+) ( a b rep -- c ) + dup rep-component-type '[ + _ c-type-clamp ] components-2map ; +: (simd-vs-) ( a b rep -- c ) + dup rep-component-type '[ - _ c-type-clamp ] components-2map ; +: (simd-vs*) ( a b rep -- c ) + dup rep-component-type '[ * _ c-type-clamp ] components-2map ; : (simd-v*) ( a b rep -- c ) [ * ] components-2map ; : (simd-v/) ( a b rep -- c ) [ / ] components-2map ; : (simd-vmin) ( a b rep -- c ) [ min ] components-2map ; @@ -160,9 +163,9 @@ PRIVATE> : (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ; : (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ; : (simd-hlshift) ( a n rep -- c ) - drop tail-slice 16 0 pad-tail ; + drop head-slice* 16 0 pad-head ; : (simd-hrshift) ( a n rep -- c ) - drop head-slice 16 0 pad-head ; + drop tail-slice 16 0 pad-tail ; : (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ; : (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ; :: (simd-vmerge-head) ( a b rep -- c ) @@ -198,17 +201,17 @@ PRIVATE> : (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ; : (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ; : (simd-v>float) ( a rep -- c ) - [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ; + [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ; : (simd-v>integer) ( a rep -- c ) - [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ; + [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ; : (simd-vpack-signed) ( a b rep -- c ) [ 2>rep-array cord-append ] [ narrow-vector-rep [ [] ] [ rep-component-type ] bi ] bi - '[ _ c-type-clamp ] swap map-as ; + '[ _ c-type-clamp ] swap map-as underlying>> ; : (simd-vpack-unsigned) ( a b rep -- c ) [ 2>rep-array cord-append ] [ narrow-vector-rep >uint-vector-rep [ [] ] [ rep-component-type ] bi ] bi - '[ _ c-type-clamp ] swap map-as ; + '[ _ c-type-clamp ] swap map-as underlying>> ; : (simd-vunpack-head) ( a rep -- c ) [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi [ head-slice ] dip call( a' -- c' ) underlying>> ; @@ -216,7 +219,8 @@ PRIVATE> [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi [ tail-slice ] dip call( a' -- c' ) underlying>> ; : (simd-with) ( n rep -- v ) - [ rep-length iota swap '[ _ ] ] [ ] bi replicate-as ; + [ rep-length iota swap '[ _ ] ] [ ] bi replicate-as + underlying>> ; : (simd-gather-2) ( m n rep -- v ) [ 2 set-firstn ] keep underlying>> ; : (simd-gather-4) ( m n o p rep -- v ) [ 4 set-firstn ] keep underlying>> ; : (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 1fb947921c..b590589345 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -5,7 +5,8 @@ math.vectors.simd.private prettyprint random sequences system tools.test vocabs assocs compiler.cfg.debugger words locals combinators cpu.architecture namespaces byte-arrays alien specialized-arrays classes.struct eval classes.algebra sets -quotations math.constants compiler.units ; +quotations math.constants compiler.units splitting ; +FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAY: c:float IN: math.vectors.simd.tests @@ -261,8 +262,8 @@ simd-classes&reps [ : check-boolean-ops ( class elt-class compare-quot -- seq ) [ - [ boolean-ops [ dup word-schema ] { } map>assoc ] 2dip - '[ first2 inputs _ _ check-boolean-op ] + [ boolean-ops [ dup vector-words at ] { } map>assoc ] 2dip + '[ first2 vector-word-inputs _ _ check-boolean-op ] ] dip check-optimizer ; inline simd-classes&reps [ @@ -558,7 +559,7 @@ STRUCT: simd-struct [ ] [ char-16 new 1array stack. ] unit-test ! CSSA bug -[ 8000000 ] [ +[ 4000000 ] [ int-4{ 1000 1000 1000 1000 } [ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call ] unit-test diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 5289f3f393..bde69b5dbd 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -49,6 +49,9 @@ TUPLE: simd-128 GENERIC: simd-element-type ( obj -- c-type ) GENERIC: simd-rep ( simd -- rep ) +M: object simd-element-type drop f ; +M: object simd-rep drop f ; + << > ] bi ; inline @@ -74,6 +74,9 @@ DEFER: simd-construct-op : vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c ) drop [ simd-unbox ] 3dip 3curry make-underlying ; inline +: vn->n-op ( a n rep quot: ( (a) n rep -- n ) fallback-quot -- n ) + drop [ underlying>> ] 3dip call ; inline + : v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n ) drop [ underlying>> ] 2dip call ; inline @@ -95,9 +98,6 @@ DEFER: simd-construct-op PRIVATE> >> -DEFER: simd-with -DEFER: simd-cast - << A ] unless ; inline -: A-with ( n -- v ) \ A new simd-with ; inline -: A-cast ( v -- v' ) \ A new simd-cast ; inline +: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline +: A-cast ( v -- v' ) underlying>> \ A boa ; inline ! SIMD vectors as sequences @@ -145,24 +146,7 @@ M: A hashcode* underlying>> hashcode* ; inline M: A clone [ clone ] change-underlying ; inline M: A length drop N ; inline M: A nth-unsafe - swap { - { 0 [ 0 \ A-rep (simd-select) ] } - { 1 [ 1 \ A-rep (simd-select) ] } - { 2 [ 2 \ A-rep (simd-select) ] } - { 3 [ 3 \ A-rep (simd-select) ] } - { 4 [ 4 \ A-rep (simd-select) ] } - { 5 [ 5 \ A-rep (simd-select) ] } - { 6 [ 6 \ A-rep (simd-select) ] } - { 7 [ 7 \ A-rep (simd-select) ] } - { 8 [ 8 \ A-rep (simd-select) ] } - { 9 [ 9 \ A-rep (simd-select) ] } - { 10 [ 10 \ A-rep (simd-select) ] } - { 11 [ 11 \ A-rep (simd-select) ] } - { 12 [ 12 \ A-rep (simd-select) ] } - { 13 [ 13 \ A-rep (simd-select) ] } - { 14 [ 14 \ A-rep (simd-select) ] } - { 15 [ 15 \ A-rep (simd-select) ] } - } case ; inline + swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline M: A c:byte-length drop 16 ; inline M: A new-sequence @@ -171,7 +155,7 @@ M: A new-sequence [ length bad-simd-length ] if ; inline M: A equal? - \ A [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline + \ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline ! SIMD primitive operations @@ -205,7 +189,7 @@ M: A vrshift \ A-rep [ (simd-vrshift) ] [ call-next-method ] M: A hlshift \ A-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline M: A hrshift \ A-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline -M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv->v-op ; inline +M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline M: A (vmerge-head) \ A-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline M: A (vmerge-tail) \ A-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline M: A v<= \ A-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline @@ -220,15 +204,15 @@ M: A vnone? \ A-rep [ (simd-vnone?) ] [ call-next-method ] ! SIMD high-level specializations -M: A vbroadcast [ swap nth ] keep simd-with ; inline -M: A n+v [ simd-with ] keep v+ ; inline -M: A n-v [ simd-with ] keep v- ; inline -M: A n*v [ simd-with ] keep v* ; inline -M: A n/v [ simd-with ] keep v/ ; inline -M: A v+n over simd-with v+ ; inline -M: A v-n over simd-with v- ; inline -M: A v*n over simd-with v* ; inline -M: A v/n over simd-with v/ ; inline +M: A vbroadcast swap nth A-with ; inline +M: A n+v [ A-with ] dip v+ ; inline +M: A n-v [ A-with ] dip v- ; inline +M: A n*v [ A-with ] dip v* ; inline +M: A n/v [ A-with ] dip v/ ; inline +M: A v+n A-with v+ ; inline +M: A v-n A-with v- ; inline +M: A v*n A-with v* ; inline +M: A v/n A-with v/ ; inline M: A norm-sq dup v. assert-positive ; inline M: A norm norm-sq sqrt ; inline M: A distance v- norm ; inline @@ -236,11 +220,13 @@ M: A distance v- norm ; inline ! M: simd-128 >pprint-sequence ; ! M: simd-128 pprint* pprint-object ; -\ A-boa \ A new N { - { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] } - { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] } - [ swap '[ _ _ nsequence ] ] -} case BOA-EFFECT define-inline +\ A-boa +[ COERCER N napply ] N { + { 2 [ [ A-rep (simd-gather-2) A boa ] ] } + { 4 [ [ A-rep (simd-gather-4) A boa ] ] } + [ \ A new '[ _ _ nsequence ] ] +} case compose +BOA-EFFECT define-inline M: A pprint-delims drop \ A{ \ } ; SYNTAX: A{ \ } [ >A ] parse-literal ; @@ -248,7 +234,7 @@ SYNTAX: A{ \ } [ >A ] parse-literal ; c: byte-array >>class A >>boxed-class - [ A-rep alien-vector \ A boa ] >>getter + [ A-rep alien-vector A boa ] >>getter [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter 16 >>size 16 >>align @@ -266,21 +252,6 @@ PRIVATE> INSTANCE: simd-128 sequence -! SIMD constructors - -: simd-with ( n seq -- v ) - [ (simd-with) ] simd-construct-op ; inline - -MACRO: simd-boa ( class -- ) - new dup length { - { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] } - { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] } - [ swap '[ _ _ nsequence ] ] - } case ; - -: simd-cast ( v seq -- v' ) - [ underlying>> ] dip new-underlying ; inline - ! SIMD instances SIMD-128: char-16 diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index d524ba309f..c0b129e6d2 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -108,10 +108,6 @@ M: object vshuffle-elements swap [ '[ _ nth ] ] keep map-as ; GENERIC# vshuffle-bytes 1 ( u perm -- v ) -M: object vshuffle-bytes - underlying>> [ - swap [ '[ 15 bitand _ nth ] ] keep map-as - ] curry change-underlying ; GENERIC: vshuffle ( u perm -- v ) M: array vshuffle ( u perm -- v ) @@ -123,9 +119,7 @@ GENERIC# vrshift 1 ( u n -- w ) M: object vrshift neg '[ _ shift ] map ; GENERIC# hlshift 1 ( u n -- w ) -M: object hlshift '[ _ prepend 16 head ] change-underlying ; GENERIC# hrshift 1 ( u n -- w ) -M: object hrshift '[ _ append 16 tail* ] change-underlying ; GENERIC: (vmerge-head) ( u v -- h ) M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ; From d344023b1c3f4b20bbe7d318d68f591b893c7d9e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 20:53:40 -0800 Subject: [PATCH 32/46] update math.vectors.conversion --- .../vectors/conversion/backend/backend.factor | 21 --------- .../conversion/conversion-tests.factor | 46 +++++-------------- .../math/vectors/conversion/conversion.factor | 30 ++++++++---- .../vectors/simd/intrinsics/intrinsics.factor | 8 ++-- basis/math/vectors/simd/simd.factor | 4 +- 5 files changed, 37 insertions(+), 72 deletions(-) delete mode 100644 basis/math/vectors/conversion/backend/backend.factor diff --git a/basis/math/vectors/conversion/backend/backend.factor b/basis/math/vectors/conversion/backend/backend.factor deleted file mode 100644 index d47fab1b0e..0000000000 --- a/basis/math/vectors/conversion/backend/backend.factor +++ /dev/null @@ -1,21 +0,0 @@ -! (c)Joe Groff bsd license -USING: accessors alien.c-types arrays assocs classes combinators -cords fry kernel math math.vectors sequences ; -IN: math.vectors.conversion.backend - -: saturate-map-as ( v quot result -- w ) - [ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline - -: (v>float) ( i to-type -- f ) - [ >float ] swap new map-as ; -: (v>integer) ( f to-type -- i ) - [ >integer ] swap new map-as ; -: (vpack-signed) ( a b to-type -- ab ) - [ cord-append [ ] ] dip new saturate-map-as ; -: (vpack-unsigned) ( a b to-type -- ab ) - [ cord-append [ ] ] dip new saturate-map-as ; -: (vunpack-head) ( ab to-type -- a ) - [ dup length 2 /i head-slice ] dip new like ; -: (vunpack-tail) ( ab to-type -- b ) - [ dup length 2 /i tail-slice ] dip new like ; - diff --git a/basis/math/vectors/conversion/conversion-tests.factor b/basis/math/vectors/conversion/conversion-tests.factor index 0f48b47756..c91bdb369e 100644 --- a/basis/math/vectors/conversion/conversion-tests.factor +++ b/basis/math/vectors/conversion/conversion-tests.factor @@ -3,16 +3,6 @@ USING: accessors arrays compiler continuations generalizations kernel kernel.private locals math.vectors.conversion math.vectors.simd sequences stack-checker tools.test ; FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ; -SIMD: uchar -SIMD: char -SIMD: ushort -SIMD: short -SIMD: uint -SIMD: int -SIMD: ulonglong -SIMD: longlong -SIMD: float -SIMD: double IN: math.vectors.conversion.tests ERROR: optimized-vconvert-inconsistent @@ -59,12 +49,12 @@ MACRO:: test-vconvert ( from-type to-type -- ) [ double-2{ -5.0 1.0 } ] [ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test -[ longlong-4{ -5 1 2 6 } ] -[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test +[ longlong-2{ -5 1 } ] +[ double-2{ -5.0 1.0 } double-2 longlong-2 test-vconvert ] unit-test ! TODO we should be able to do double->int pack -! [ int-8{ -5 1 2 6 12 34 -56 78 } ] -[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ] +! [ int-4{ -5 1 12 34 } ] +[ double-2{ -5.0 1.0 } double-2{ 12.0 34.0 } double-2 int-4 test-vconvert ] [ error>> bad-vconvert? ] must-fail-with [ float-4{ -1.25 2.0 3.0 -4.0 } ] @@ -76,10 +66,10 @@ MACRO:: test-vconvert ( from-type to-type -- ) [ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ] [ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test -[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ] +[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ] [ - int-8{ -1 2 3 -40000 3 2 1 0 } - int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert + int-4{ -1 2 3 -40000 } + int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test [ ushort-8{ 0 2 3 0 5 60000 0 65535 } ] @@ -97,15 +87,6 @@ MACRO:: test-vconvert ( from-type to-type -- ) uchar-16 ushort-8 test-vconvert ] unit-test -! TODO we should be able to do 256->128 pack -! [ float-4{ -1.25 2.0 3.0 -4.0 } ] -[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ] -[ error>> bad-vconvert? ] must-fail-with - -! [ int-4{ -1 2 3 -4 } ] -[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ] -[ error>> bad-vconvert? ] must-fail-with - [ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ] [ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test @@ -121,8 +102,8 @@ MACRO:: test-vconvert ( from-type to-type -- ) [ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ] [ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test -[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ] -[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test +[ longlong-2{ 1 2 } longlong-2{ 3 4 } ] +[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test [ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ] [ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test @@ -130,13 +111,8 @@ MACRO:: test-vconvert ( from-type to-type -- ) [ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ] [ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test -[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ] -[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test - -! TODO we should be able to do 128->256 unpack -! [ longlong-4{ 1 2 3 4 } ] -[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ] -[ error>> bad-vconvert? ] must-fail-with +[ longlong-2{ 1 2 } longlong-2{ 3 4 } ] +[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test ! TODO we should be able to do multi-tier pack/unpack ! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ] diff --git a/basis/math/vectors/conversion/conversion.factor b/basis/math/vectors/conversion/conversion.factor index fd58b11dc8..50bb9c8726 100644 --- a/basis/math/vectors/conversion/conversion.factor +++ b/basis/math/vectors/conversion/conversion.factor @@ -1,7 +1,7 @@ ! (c)Joe Groff bsd license USING: accessors alien.c-types arrays assocs classes combinators combinators.short-circuit cords fry kernel locals math -math.vectors math.vectors.conversion.backend sequences ; +math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ; FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ; IN: math.vectors.conversion @@ -30,11 +30,11 @@ ERROR: bad-vconvert-input value expected-type ; } { [ from-element float-type? ] - [ [ to-type (v>integer) ] ] + [ from-type new simd-rep '[ underlying>> _ (simd-v>integer) to-type boa ] ] } { [ to-element float-type? ] - [ [ to-type (v>float) ] ] + [ from-type new simd-rep '[ underlying>> _ (simd-v>float) to-type boa ] ] } } cond [ from-type check-vconvert-type ] prepose ; @@ -47,10 +47,18 @@ ERROR: bad-vconvert-input value expected-type ; } 0|| [ from-type to-type bad-vconvert ] when ; :: [[vpack-unsigned]] ( from-type to-type -- quot ) - [ [ from-type check-vconvert-type ] bi@ to-type (vpack-unsigned) ] ; + from-type new simd-rep + '[ + [ from-type check-vconvert-type underlying>> ] bi@ + _ (simd-vpack-unsigned) to-type boa + ] ; :: [[vpack-signed]] ( from-type to-type -- quot ) - [ [ from-type check-vconvert-type ] bi@ to-type (vpack-signed) ] ; + from-type new simd-rep + '[ + [ from-type check-vconvert-type underlying>> ] bi@ + _ (simd-vpack-signed) to-type boa + ] ; :: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot ) from-size to-size /i log2 :> steps @@ -68,9 +76,11 @@ ERROR: bad-vconvert-input value expected-type ; } 0|| [ from-type to-type bad-vconvert ] when ; :: [[vunpack]] ( from-type to-type -- quot ) - [ - from-type check-vconvert-type - [ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi + from-type new simd-rep + '[ + from-type check-vconvert-type underlying>> _ + [ (simd-vunpack-head) to-type boa ] + [ (simd-vunpack-tail) to-type boa ] 2bi ] ; :: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot ) @@ -81,8 +91,8 @@ ERROR: bad-vconvert-input value expected-type ; PRIVATE> MACRO:: vconvert ( from-type to-type -- ) - from-type new [ element-type ] [ byte-length ] bi :> ( from-element from-length ) - to-type new [ element-type ] [ byte-length ] bi :> ( to-element to-length ) + from-type new [ simd-element-type ] [ byte-length ] bi :> ( from-element from-length ) + to-type new [ simd-element-type ] [ byte-length ] bi :> ( to-element to-length ) from-element heap-size :> from-size to-element heap-size :> to-size diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 187c6db586..d2e0305fa3 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -201,16 +201,16 @@ PRIVATE> : (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ; : (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ; : (simd-v>float) ( a rep -- c ) - [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ; + [ >rep-array [ >float ] ] [ >float-vector-rep ] bi map-as underlying>> ; : (simd-v>integer) ( a rep -- c ) - [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ; + [ >rep-array [ >integer ] ] [ >int-vector-rep ] bi map-as underlying>> ; : (simd-vpack-signed) ( a b rep -- c ) [ 2>rep-array cord-append ] - [ narrow-vector-rep [ [] ] [ rep-component-type ] bi ] bi + [ narrow-vector-rep [ ] [ rep-component-type ] bi ] bi '[ _ c-type-clamp ] swap map-as underlying>> ; : (simd-vpack-unsigned) ( a b rep -- c ) [ 2>rep-array cord-append ] - [ narrow-vector-rep >uint-vector-rep [ [] ] [ rep-component-type ] bi ] bi + [ narrow-vector-rep >uint-vector-rep [ ] [ rep-component-type ] bi ] bi '[ _ c-type-clamp ] swap map-as underlying>> ; : (simd-vunpack-head) ( a rep -- c ) [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index bde69b5dbd..e89edd3de3 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -217,8 +217,8 @@ M: A norm-sq dup v. assert-positive ; inline M: A norm norm-sq sqrt ; inline M: A distance v- norm ; inline -! M: simd-128 >pprint-sequence ; -! M: simd-128 pprint* pprint-object ; +M: A >pprint-sequence ; +M: A pprint* pprint-object ; \ A-boa [ COERCER N napply ] N { From 3f5b3c2c1bfa9b6b9394a5d53a11984895f988f4 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 22:20:43 -0800 Subject: [PATCH 33/46] fix math.vectors.conversion help-lint --- .../vectors/conversion/conversion-docs.factor | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/basis/math/vectors/conversion/conversion-docs.factor b/basis/math/vectors/conversion/conversion-docs.factor index 9fe5ac4c17..7f2a349c52 100644 --- a/basis/math/vectors/conversion/conversion-docs.factor +++ b/basis/math/vectors/conversion/conversion-docs.factor @@ -22,7 +22,7 @@ HELP: vconvert } { $description "Converts SIMD vectors of " { $snippet "from-type" } " to " { $snippet "to-type" } ". The number of inputs and outputs depends on the relationship of the two types:" { $list -{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-8" } " to " { $snippet "float-8" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." } +{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "float-4" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." } { "Likewise, if " { $snippet "to-type" } " is an integer vector type with the same byte length and element count as the floating-point vector type " { $snippet "from-type" } ", " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and truncates its elements to integers, outputting one vector of " { $snippet "to-type" } "." } { "If " { $snippet "to-type" } " is a vector type with the same byte length as and twice the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "ushort-8" } ", from " { $snippet "double-2" } " to " { $snippet "float-4" } ", or from " { $snippet "short-8" } " to " { $snippet "char-16" } "), " { $snippet "vconvert" } " takes two vectors of " { $snippet "from-type" } " and packs them into one vector of " { $snippet "to-type" } ", saturating values too large or small to be representable as elements of " { $snippet "to-type" } "." } { "If " { $snippet "to-type" } " is a vector type with the same byte length as and half the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "ushort-8" } " to " { $snippet "int-4" } ", from " { $snippet "float-4" } " to " { $snippet "double-2" } ", or from " { $snippet "char-16" } " to " { $snippet "short-8" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and unpacks it into two vectors of " { $snippet "to-type" } "." } @@ -39,26 +39,23 @@ HELP: vconvert "Conversion between integer and float vectors:" { $example """USING: alien.c-types math.vectors.conversion math.vectors.simd prettyprint ; -SIMDS: int float longlong double ; -int-8{ 0 1 2 3 4 5 6 7 } int-8 float-8 vconvert . +int-4{ 0 1 2 3 } int-4 float-4 vconvert . double-2{ 1.25 3.75 } double-2 longlong-2 vconvert .""" -"""float-8{ 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 } +"""float-4{ 0.0 1.0 2.0 3.0 } longlong-2{ 1 3 }""" } "Packing conversions:" { $example """USING: alien.c-types math.vectors.conversion math.vectors.simd prettyprint ; -SIMDS: ushort int float double ; int-4{ -8 70000 6000 50 } int-4{ 4 3 2 -1 } int-4 ushort-8 vconvert . -double-4{ 0.0 1.5 1.0e100 2.0 } -double-4{ -1.0e100 0.0 1.0 2.0 } double-4 float-8 vconvert .""" +double-2{ 0.0 1.0e100 } +double-2{ -1.0e100 0.0 } double-2 float-4 vconvert .""" """ushort-8{ 0 65535 6000 50 4 3 2 0 } -float-8{ 0.0 1.5 1/0. 2.0 -1/0. 0.0 1.0 2.0 }""" } +float-4{ 0.0 1/0. -1/0. 0.0 }""" } "Unpacking conversions:" { $example """USING: alien.c-types kernel math.vectors.conversion math.vectors.simd prettyprint ; -SIMDS: uchar short ; uchar-16{ 8 70 60 50 4 30 200 1 9 10 110 102 133 143 115 0 } uchar-16 short-8 vconvert [ . ] bi@""" From 9128f1f160e4098e19dfeeeac1c03f0125d57c58 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 22:23:22 -0800 Subject: [PATCH 34/46] remove mentions of 256-bit vectors from math.vectors.simd --- basis/math/vectors/simd/simd-docs.factor | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 8aeea4267d..98a7b9273c 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -19,9 +19,9 @@ $nl ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" "At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type." $nl -"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")." +"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } ")." $nl -"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." +"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } ") and integer SIMD (all types). Integer SIMD is missing a few features; in particular, the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." $nl "SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." $nl @@ -40,24 +40,14 @@ $nl { $code "char-16" "uchar-16" - "char-32" - "uchar-32" "short-8" "ushort-8" - "short-16" - "ushort-16" "int-4" "uint-4" - "int-8" - "uint-8" "longlong-2" "ulonglong-2" - "longlong-4" - "ulonglong-4" "float-4" - "float-8" "double-2" - "double-4" } ; ARTICLE: "math.vectors.simd.words" "SIMD vector words" @@ -88,8 +78,8 @@ SIMD: double SYMBOLS: x y ; [ - double-4{ 1.5 2.0 3.7 0.4 } x set - double-4{ 1.5 2.0 3.7 0.4 } y set + float-4{ 1.5 2.0 3.7 0.4 } x set + float-4{ 1.5 2.0 3.7 0.4 } y set x get y get v+ ] optimizer-report.""" } "The following word benefits from SIMD optimization, because it begins with an unsafe declaration:" @@ -183,7 +173,7 @@ $nl ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives" "No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks." $nl -"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ; +"In particular, horizontal operations on " { $snippet "float-4" } " vectors are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ; ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." From 50f7dff4228860f041e372fc61ebef9c6b97052d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 22:44:12 -0800 Subject: [PATCH 35/46] change name of 'unsign-rep' to more sensible 'signed-rep' --- basis/compiler/cfg/intrinsics/simd/simd.factor | 8 ++++---- basis/cpu/architecture/architecture.factor | 2 +- basis/cpu/x86/x86.factor | 18 +++++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 109ac6ce8e..845902c2e6 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -20,7 +20,7 @@ IN: compiler.cfg.intrinsics.simd ! compound vector ops : sign-bit-mask ( rep -- byte-array ) - unsign-rep { + signed-rep { { char-16-rep [ uchar-array{ HEX: 80 HEX: 80 HEX: 80 HEX: 80 HEX: 80 HEX: 80 HEX: 80 HEX: 80 @@ -48,7 +48,7 @@ IN: compiler.cfg.intrinsics.simd } case ; : ^load-add-sub-vector ( rep -- dst ) - unsign-rep { + signed-rep { { float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] } { double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] } { char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] } @@ -115,7 +115,7 @@ IN: compiler.cfg.intrinsics.simd rep sign-bit-mask ^^load-constant :> sign-bits src1 sign-bits rep ^^xor-vector src2 sign-bits rep ^^xor-vector - rep unsign-rep cc ^(compare-vector) + rep signed-rep cc ^(compare-vector) ] } } vv-cc-vector-op ; @@ -247,7 +247,7 @@ IN: compiler.cfg.intrinsics.simd ] [ ^^vector>scalar ] bi ; : ^sum-vector ( src rep -- dst ) - unsign-rep { + signed-rep { { float-vector-rep [ ^(sum-vector) ] } { int-vector-rep [| src rep | src rep ^unpack-vector-head :> head diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 6631cec189..9158379f70 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -95,7 +95,7 @@ double-rep vector-rep scalar-rep ; -: unsign-rep ( rep -- rep' ) +: signed-rep ( rep -- rep' ) { { uint-4-rep int-4-rep } { ulonglong-2-rep longlong-2-rep } diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index b752935f9c..b1735b88f2 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -650,7 +650,7 @@ M: x86 %fill-vector-reps } available-reps ; ! M:: x86 %broadcast-vector ( dst src rep -- ) -! rep unsign-rep { +! rep signed-rep { ! { float-4-rep [ ! dst src float-4-rep %copy ! dst dst { 0 0 0 0 } SHUFPS @@ -687,7 +687,7 @@ M: x86 %fill-vector-reps ! } available-reps ; M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) - rep unsign-rep { + rep signed-rep { { float-4-rep [ dst src1 float-4-rep %copy dst src2 UNPCKLPS @@ -710,7 +710,7 @@ M: x86 %gather-vector-4-reps } available-reps ; M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) - rep unsign-rep { + rep signed-rep { { double-2-rep [ dst src1 double-2-rep %copy dst src2 MOVLHPS @@ -763,7 +763,7 @@ M: x86 %gather-vector-2-reps M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- ) dst src rep %copy - dst shuffle rep unsign-rep { + dst shuffle rep signed-rep { { double-2-rep [ >float-4-shuffle float-4-shuffle ] } { float-4-rep [ float-4-shuffle ] } { int-4-rep [ int-4-shuffle ] } @@ -786,7 +786,7 @@ M: x86 %shuffle-vector-reps M: x86 %merge-vector-head [ two-operand ] keep - unsign-rep { + signed-rep { { double-2-rep [ MOVLHPS ] } { float-4-rep [ UNPCKLPS ] } { longlong-2-rep [ PUNPCKLQDQ ] } @@ -797,7 +797,7 @@ M: x86 %merge-vector-head M: x86 %merge-vector-tail [ two-operand ] keep - unsign-rep { + signed-rep { { double-2-rep [ UNPCKHPD ] } { float-4-rep [ UNPCKHPS ] } { longlong-2-rep [ PUNPCKHQDQ ] } @@ -826,7 +826,7 @@ M: x86 %signed-pack-vector-reps M: x86 %unsigned-pack-vector [ two-operand ] keep - unsign-rep { + signed-rep { { int-4-rep [ PACKUSDW ] } { short-8-rep [ PACKUSWB ] } } case ; @@ -896,7 +896,7 @@ M: x86 %float>integer-vector-reps } case ; :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- ) - rep unsign-rep :> rep' + rep signed-rep :> rep' dst src rep' { { longlong-2-rep [ int64 call ] } { int-4-rep [ int32 call ] } @@ -1173,7 +1173,7 @@ M: x86 %dot-vector-reps M: x86 %horizontal-add-vector ( dst src1 src2 rep -- ) [ two-operand ] keep - unsign-rep { + signed-rep { { float-4-rep [ HADDPS ] } { double-2-rep [ HADDPD ] } { int-4-rep [ PHADDD ] } From 46f0aa64215131739b08c64ac3aa88f844a6c6a2 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Tue, 24 Nov 2009 22:56:42 -0800 Subject: [PATCH 36/46] move cords to sequences.cords --- basis/math/vectors/conversion/conversion.factor | 6 ++++-- basis/math/vectors/simd/intrinsics/intrinsics.factor | 6 +++--- basis/{ => sequences}/cords/authors.txt | 0 basis/{ => sequences}/cords/cords-tests.factor | 4 ++-- basis/{ => sequences}/cords/cords.factor | 2 +- basis/{ => sequences}/cords/summary.txt | 0 basis/{ => sequences}/cords/tags.txt | 0 7 files changed, 10 insertions(+), 8 deletions(-) rename basis/{ => sequences}/cords/authors.txt (100%) rename basis/{ => sequences}/cords/cords-tests.factor (62%) rename basis/{ => sequences}/cords/cords.factor (98%) rename basis/{ => sequences}/cords/summary.txt (100%) rename basis/{ => sequences}/cords/tags.txt (100%) diff --git a/basis/math/vectors/conversion/conversion.factor b/basis/math/vectors/conversion/conversion.factor index 50bb9c8726..6148962ee0 100644 --- a/basis/math/vectors/conversion/conversion.factor +++ b/basis/math/vectors/conversion/conversion.factor @@ -1,8 +1,10 @@ ! (c)Joe Groff bsd license USING: accessors alien.c-types arrays assocs classes combinators -combinators.short-circuit cords fry kernel locals math +combinators.short-circuit fry kernel locals math math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ; -FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ; +FROM: alien.c-types => + char uchar short ushort int uint longlong ulonglong + float double ; IN: math.vectors.conversion ERROR: bad-vconvert from-type to-type ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index d2e0305fa3..30db6d5e13 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -1,8 +1,8 @@ ! (c)2009 Slava Pestov, Joe Groff bsd license USING: accessors alien alien.c-types alien.data combinators -cords cpu.architecture fry generalizations kernel libc locals -math math.libm math.order math.ranges math.vectors sequences -sequences.private specialized-arrays vocabs.loader ; +sequences.cords cpu.architecture fry generalizations kernel +libc locals math math.libm math.order math.ranges math.vectors +sequences sequences.private specialized-arrays vocabs.loader ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAYS: c:char c:short c:int c:longlong diff --git a/basis/cords/authors.txt b/basis/sequences/cords/authors.txt similarity index 100% rename from basis/cords/authors.txt rename to basis/sequences/cords/authors.txt diff --git a/basis/cords/cords-tests.factor b/basis/sequences/cords/cords-tests.factor similarity index 62% rename from basis/cords/cords-tests.factor rename to basis/sequences/cords/cords-tests.factor index 898e4e51c8..2999365926 100644 --- a/basis/cords/cords-tests.factor +++ b/basis/sequences/cords/cords-tests.factor @@ -1,5 +1,5 @@ -USING: cords strings tools.test kernel sequences ; -IN: cords.tests +USING: sequences.cords strings tools.test kernel sequences ; +IN: sequences.cords.tests [ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test [ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test diff --git a/basis/cords/cords.factor b/basis/sequences/cords/cords.factor similarity index 98% rename from basis/cords/cords.factor rename to basis/sequences/cords/cords.factor index ad17da9652..4b88432313 100644 --- a/basis/cords/cords.factor +++ b/basis/sequences/cords/cords.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs sequences sorting binary-search math math.order arrays combinators kernel ; -IN: cords +IN: sequences.cords Date: Tue, 24 Nov 2009 23:18:01 -0800 Subject: [PATCH 37/46] streamline cords, add a functor for making specialized cords --- basis/sequences/cords/cords-tests.factor | 1 - basis/sequences/cords/cords.factor | 87 ++++++++---------------- 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/basis/sequences/cords/cords-tests.factor b/basis/sequences/cords/cords-tests.factor index 2999365926..fb9c440733 100644 --- a/basis/sequences/cords/cords-tests.factor +++ b/basis/sequences/cords/cords-tests.factor @@ -2,4 +2,3 @@ USING: sequences.cords strings tools.test kernel sequences ; IN: sequences.cords.tests [ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test -[ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor index 4b88432313..f183e4fd2d 100644 --- a/basis/sequences/cords/cords.factor +++ b/basis/sequences/cords/cords.factor @@ -1,72 +1,43 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs sequences sorting binary-search math -math.order arrays combinators kernel ; +math.order arrays classes combinators kernel functors ; IN: sequences.cords -> length ] [ second>> length ] bi + ; inline +M: cord length + [ head>> length ] [ tail>> length ] bi + ; inline -M: simple-cord virtual-exemplar first>> ; inline +M: cord virtual-exemplar head>> ; inline -M: simple-cord virtual@ - 2dup first>> length < - [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline - -TUPLE: multi-cord - { count read-only } { seqs read-only } ; - -M: multi-cord length count>> ; inline - -M: multi-cord virtual@ - dupd - seqs>> [ first <=> ] with search nip - [ first - ] [ second ] bi ; inline - -M: multi-cord virtual-exemplar - seqs>> [ f ] [ first second ] if-empty ; inline - -: ( seqs -- cord ) - dup length 2 = [ - first2 simple-cord boa - ] [ - [ 0 [ length + ] accumulate ] keep zip multi-cord boa - ] if ; inline - -PRIVATE> - -UNION: cord simple-cord multi-cord ; +M: cord virtual@ + 2dup head>> length < + [ head>> ] [ [ head>> length - ] [ tail>> ] bi ] if ; inline INSTANCE: cord virtual-sequence -INSTANCE: multi-cord virtual-sequence +GENERIC: cord-append ( seq1 seq2 -- cord ) -: cord-append ( seq1 seq2 -- cord ) - { - { [ over empty? ] [ nip ] } - { [ dup empty? ] [ drop ] } - { [ 2dup [ cord? ] both? ] [ [ seqs>> values ] bi@ append ] } - { [ over cord? ] [ [ seqs>> values ] dip suffix ] } - { [ dup cord? ] [ seqs>> values swap prefix ] } - [ 2array ] - } cond ; inline +M: object cord-append + generic-cord boa ; inline -: cord-concat ( seqs -- cord ) - { - { [ dup empty? ] [ drop f ] } - { [ dup length 1 = ] [ first ] } - [ - [ - { - { [ dup cord? ] [ seqs>> values ] } - { [ dup empty? ] [ drop { } ] } - [ 1array ] - } cond - ] map concat - ] - } cond ; inline +FUNCTOR: define-specialized-cord ( T C -- ) + +T-cord DEFINES-CLASS ${C} + +WHERE + +TUPLE: T-cord + { head T read-only } { tail T read-only } ; +INSTANCE: T-cord cord + +M: T cord-append + 2dup [ T instance? ] both? + [ T-cord boa ] [ generic-cord boa ] if ; inline + +;FUNCTOR From 5f32a6d7265896f497650c80d1afdbdedbc06b3b Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 15:44:57 -0800 Subject: [PATCH 38/46] vector op specializations on cords --- basis/sequences/cords/cords.factor | 74 +++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor index f183e4fd2d..e59858677b 100644 --- a/basis/sequences/cords/cords.factor +++ b/basis/sequences/cords/cords.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs sequences sorting binary-search math -math.order arrays classes combinators kernel functors ; +USING: accessors assocs sequences sorting binary-search fry math +math.order arrays classes combinators kernel functors math.functions +math.vectors ; IN: sequences.cords MIXIN: cord @@ -41,3 +42,72 @@ M: T cord-append [ T-cord boa ] [ generic-cord boa ] if ; inline ;FUNCTOR + +: cord-map ( cord quot -- cord' ) + [ [ head>> ] dip call ] + [ [ tail>> ] dip call ] 2bi cord-append ; inline + +: cord-2map ( cord cord quot -- cord' ) + [ [ [ head>> ] bi@ ] dip call ] + [ [ [ tail>> ] bi@ ] dip call ] 3bi cord-append ; inline + +: cord-both ( cord quot -- h t ) + [ [ head>> ] [ tail>> ] bi ] dip bi@ ; inline + +: cord-2both ( cord cord quot -- h t ) + [ [ [ head>> ] bi@ ] dip call ] + [ [ [ tail>> ] bi@ ] dip call ] 3bi ; inline + +M: cord v+ [ v+ ] cord-2map ; inline +M: cord v- [ v- ] cord-2map ; inline +M: cord vneg [ vneg ] cord-map ; inline +M: cord v+- [ v+- ] cord-2map ; inline +M: cord vs+ [ vs+ ] cord-2map ; inline +M: cord vs- [ vs- ] cord-2map ; inline +M: cord vs* [ vs* ] cord-2map ; inline +M: cord v* [ v* ] cord-2map ; inline +M: cord v/ [ v/ ] cord-2map ; inline +M: cord vmin [ vmin ] cord-2map ; inline +M: cord vmax [ vmax ] cord-2map ; inline +M: cord v. [ v. ] cord-2both + ; inline +M: cord vsqrt [ vsqrt ] cord-map ; inline +M: cord sum [ sum ] cord-both + ; inline +M: cord vabs [ vabs ] cord-map ; inline +M: cord vbitand [ vbitand ] cord-2map ; inline +M: cord vbitandn [ vbitandn ] cord-2map ; inline +M: cord vbitor [ vbitor ] cord-2map ; inline +M: cord vbitxor [ vbitxor ] cord-2map ; inline +M: cord vbitnot [ vbitnot ] cord-map ; inline +M: cord vand [ vand ] cord-2map ; inline +M: cord vandn [ vandn ] cord-2map ; inline +M: cord vor [ vor ] cord-2map ; inline +M: cord vxor [ vxor ] cord-2map ; inline +M: cord vnot [ vnot ] cord-map ; inline +M: cord vlshift '[ _ vlshift ] cord-map ; inline +M: cord vrshift '[ _ vrshift ] cord-map ; inline +M: cord (vmerge-head) [ head>> ] bi@ (vmerge) cord-append ; inline +M: cord (vmerge-tail) [ tail>> ] bi@ (vmerge) cord-append ; inline +M: cord v<= [ v<= ] cord-2map ; inline +M: cord v< [ v< ] cord-2map ; inline +M: cord v= [ v= ] cord-2map ; inline +M: cord v> [ v> ] cord-2map ; inline +M: cord v>= [ v>= ] cord-2map ; inline +M: cord vunordered? [ vunordered? ] cord-2map ; inline +M: cord vany? [ vany? ] cord-both or ; inline +M: cord vall? [ vall? ] cord-both and ; inline +M: cord vnone? [ vnone? ] cord-both and ; inline + +M: cord n+v [ n+v ] with cord-map ; inline +M: cord n-v [ n-v ] with cord-map ; inline +M: cord n*v [ n*v ] with cord-map ; inline +M: cord n/v [ n/v ] with cord-map ; inline +M: cord v+n '[ _ v+n ] cord-map ; inline +M: cord v-n '[ _ v-n ] cord-map ; inline +M: cord v*n '[ _ v*n ] cord-map ; inline +M: cord v/n '[ _ v/n ] cord-map ; inline + +M: cord norm-sq dup v. ; inline +M: cord norm norm-sq sqrt ; inline +M: cord distance v- norm ; inline + + From 578a0e3fd5c99e96c73d0c1ea78f653e3e2d7dec Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 16:36:34 -0800 Subject: [PATCH 39/46] math.vectors.simd.cords vocab that defines vector cords --- basis/math/vectors/simd/cords/cords.factor | 87 ++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 basis/math/vectors/simd/cords/cords.factor diff --git a/basis/math/vectors/simd/cords/cords.factor b/basis/math/vectors/simd/cords/cords.factor new file mode 100644 index 0000000000..e099f6e830 --- /dev/null +++ b/basis/math/vectors/simd/cords/cords.factor @@ -0,0 +1,87 @@ +USING: accessors alien.c-types arrays byte-arrays +cpu.architecture effects functors generalizations kernel lexer +math math.vectors.simd math.vectors.simd.intrinsics parser +prettyprint.custom quotations sequences sequences.cords words ; +IN: math.vectors.simd.cords + +<< +A/2 IS >${A/2} +A/2-boa IS ${A/2}-boa +A/2-with IS ${A/2}-with +A/2-cast IS ${A/2}-cast + +>A DEFINES >${A} +A-boa DEFINES ${A}-boa +A-with DEFINES ${A}-with +A-cast DEFINES ${A}-cast +A{ DEFINES ${A}{ + +N [ A-rep rep-length ] +BOA-EFFECT [ N 2 * "n" >array { "v" } ] + +WHERE + +: >A ( seq -- A ) + [ N head >A/2 ] + [ N tail >A/2 ] bi cord-append ; + +\ A-boa +{ N ndip A/2-boa cord-append } { A/2-boa } >quotation prefix >quotation +BOA-EFFECT define-inline + +: A-with ( n -- v ) + [ A/2-with ] [ A/2-with ] bi cord-append ; + +: A-cast ( v -- v' ) + [ A/2-cast ] cord-map ; + +M: A >pprint-sequence ; +M: A pprint* pprint-object ; + +M: A pprint-delims drop \ A{ \ } ; +SYNTAX: A{ \ } [ >A ] parse-literal ; + + + byte-array >>class + A >>boxed-class + [ + [ A-rep alien-vector A/2 boa ] + [ 16 + A-rep alien-vector A/2 boa ] 2bi cord-append + ] >>getter + [ + [ [ head>> underlying>> ] 2dip A-rep set-alien-vector ] + [ [ tail>> underlying>> ] 2dip 16 + A-rep set-alien-vector ] 3bi + ] >>setter + 32 >>size + 16 >>align + A-rep >>rep +\ A typedef + +;FUNCTOR + +: define-simd-128-cord ( A/2 T -- ) + [ define-specialized-cord ] + [ create-in (define-simd-128-cord) ] 2bi ; + +SYNTAX: SIMD-128-CORD: + scan-word scan define-simd-128-cord ; + +PRIVATE> +>> + +SIMD-128-CORD: char-16 char-32 +SIMD-128-CORD: uchar-16 uchar-32 +SIMD-128-CORD: short-8 short-16 +SIMD-128-CORD: ushort-8 ushort-16 +SIMD-128-CORD: int-4 int-8 +SIMD-128-CORD: uint-4 uint-8 +SIMD-128-CORD: longlong-2 longlong-4 +SIMD-128-CORD: ulonglong-2 ulonglong-4 +SIMD-128-CORD: float-4 float-8 +SIMD-128-CORD: double-2 double-4 + From cb8f757c72edce832656238378e6cb2368b8cd35 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 18:02:27 -0800 Subject: [PATCH 40/46] update benchmark.nbody-simd and benchmark.raytracer-simd --- extra/benchmark/nbody-simd/nbody-simd.factor | 5 ++--- extra/benchmark/raytracer-simd/raytracer-simd.factor | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor index 6648c52639..2797558a4b 100644 --- a/extra/benchmark/nbody-simd/nbody-simd.factor +++ b/extra/benchmark/nbody-simd/nbody-simd.factor @@ -2,9 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien.c-types fry kernel locals math math.constants math.functions math.vectors math.vectors.simd -prettyprint combinators.smart sequences hints classes.struct -specialized-arrays ; -SIMD: double +math.vectors.simd.cords prettyprint combinators.smart sequences +hints classes.struct specialized-arrays ; IN: benchmark.nbody-simd : solar-mass ( -- x ) 4 pi sq * ; inline diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor index 5a3c232b5a..45407e5ad2 100644 --- a/extra/benchmark/raytracer-simd/raytracer-simd.factor +++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor @@ -3,10 +3,9 @@ USING: arrays accessors io io.files io.files.temp io.encodings.binary kernel math math.constants math.functions -math.vectors math.vectors.simd math.parser make sequences -sequences.private words hints classes.struct ; +math.vectors math.vectors.simd math.vectors.simd.cords math.parser +make sequences sequences.private words hints classes.struct ; QUALIFIED-WITH: alien.c-types c -SIMD: c:double IN: benchmark.raytracer-simd ! parameters From c916c7c85670f73ac68bb1d75e359d3c17862cee Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 20:06:11 -0800 Subject: [PATCH 41/46] don't try to rewrite redundant test-vector-exprs --- .../cfg/value-numbering/rewrite/rewrite.factor | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 746fe0e5ea..0fa0314c3e 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -42,6 +42,14 @@ M: insn rewrite drop f ; ] [ drop f ] if ; inline : general-compare-expr? ( insn -- ? ) + { + [ compare-expr? ] + [ compare-imm-expr? ] + [ compare-float-unordered-expr? ] + [ compare-float-ordered-expr? ] + } 1|| ; + +: general-or-vector-compare-expr? ( insn -- ? ) { [ compare-expr? ] [ compare-imm-expr? ] @@ -52,7 +60,7 @@ M: insn rewrite drop f ; : rewrite-boolean-comparison? ( insn -- ? ) dup ##branch-t? [ - src1>> vreg>expr general-compare-expr? + src1>> vreg>expr general-or-vector-compare-expr? ] [ drop f ] if ; inline : >compare-expr< ( expr -- in1 in2 cc ) From 4027002db7474eb37633c5e8eccb8eff85876ac0 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 20:21:54 -0800 Subject: [PATCH 42/46] remove SIMD:s --- basis/io/mmap/mmap-docs.factor | 1 - basis/math/bitwise/bitwise-tests.factor | 1 - basis/math/vectors/simd/simd-docs.factor | 4 ---- extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor | 1 - extra/benchmark/simd-1/simd-1.factor | 1 - extra/benchmark/terrain-generation/terrain-generation.factor | 1 - extra/gpu/demos/bunny/bunny.factor | 1 - extra/grid-meshes/grid-meshes.factor | 1 - extra/math/matrices/simd/simd-tests.factor | 1 - extra/math/matrices/simd/simd.factor | 1 - extra/terrain/terrain.factor | 1 - 11 files changed, 14 deletions(-) diff --git a/basis/io/mmap/mmap-docs.factor b/basis/io/mmap/mmap-docs.factor index 33ba6850a5..3eabfc4e7f 100644 --- a/basis/io/mmap/mmap-docs.factor +++ b/basis/io/mmap/mmap-docs.factor @@ -87,7 +87,6 @@ ARTICLE: "io.mmap.examples" "Memory-mapped file examples" "Normalize a file containing packed quadrupes of floats:" { $code "USING: kernel io.mmap math.vectors math.vectors.simd" "sequences specialized-arrays ;" - "SIMD: float" "SPECIALIZED-ARRAY: float-4" "" "\"mydata.dat\" float-4 [" diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index d10e4ccc87..a5919d3ec3 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -41,7 +41,6 @@ CONSTANT: b 2 [ 0 ] [ BIN: 0 bit-count ] unit-test [ 1 ] [ BIN: 1 bit-count ] unit-test -SIMD: uint SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: uint-4 diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 98a7b9273c..540838bdd5 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -74,7 +74,6 @@ $nl { $code """USING: compiler.tree.debugger math.vectors math.vectors.simd ; -SIMD: double SYMBOLS: x y ; [ @@ -86,7 +85,6 @@ SYMBOLS: x y ; { $code """USING: compiler.tree.debugger kernel.private math.vectors math.vectors.simd ; -SIMD: float IN: simd-demo : interpolate ( v a b -- w ) @@ -100,7 +98,6 @@ $nl { $code """USING: compiler.tree.debugger hints math.vectors math.vectors.simd ; -SIMD: float IN: simd-demo : interpolate ( v a b -- w ) @@ -116,7 +113,6 @@ $nl "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" { $code """USING: compiler.tree.debugger math.vectors math.vectors.simd ; -SIMD: float IN: simd-demo STRUCT: actor diff --git a/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor b/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor index 1b57bb902f..563bf4558c 100644 --- a/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor +++ b/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor @@ -1,7 +1,6 @@ USING: kernel locals math math.matrices.simd math.order math.vectors math.vectors.simd prettyprint sequences typed ; QUALIFIED-WITH: alien.c-types c -SIMD: c:float IN: benchmark.3d-matrix-vector : v2min ( xy -- xx ) diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor index ff0cb98a00..e20b82c3c4 100644 --- a/extra/benchmark/simd-1/simd-1.factor +++ b/extra/benchmark/simd-1/simd-1.factor @@ -3,7 +3,6 @@ USING: kernel io math math.functions math.parser math.vectors math.vectors.simd sequences specialized-arrays ; QUALIFIED-WITH: alien.c-types c -SIMD: c:float SPECIALIZED-ARRAY: float-4 IN: benchmark.simd-1 diff --git a/extra/benchmark/terrain-generation/terrain-generation.factor b/extra/benchmark/terrain-generation/terrain-generation.factor index b158dba5dd..41c1152cbd 100644 --- a/extra/benchmark/terrain-generation/terrain-generation.factor +++ b/extra/benchmark/terrain-generation/terrain-generation.factor @@ -1,7 +1,6 @@ ! (c)Joe Groff bsd license USING: io kernel math.vectors.simd terrain.generation threads ; FROM: alien.c-types => float ; -SIMD: float IN: benchmark.terrain-generation : terrain-generation-benchmark ( -- ) diff --git a/extra/gpu/demos/bunny/bunny.factor b/extra/gpu/demos/bunny/bunny.factor index 09853263ce..ea91e226a8 100755 --- a/extra/gpu/demos/bunny/bunny.factor +++ b/extra/gpu/demos/bunny/bunny.factor @@ -11,7 +11,6 @@ specialized-vectors ; FROM: alien.c-types => float ; SPECIALIZED-ARRAY: float SPECIALIZED-VECTOR: uint -SIMD: float IN: gpu.demos.bunny GLSL-SHADER-FILE: bunny-vertex-shader vertex-shader "bunny.v.glsl" diff --git a/extra/grid-meshes/grid-meshes.factor b/extra/grid-meshes/grid-meshes.factor index ebde0b2641..47f649868e 100644 --- a/extra/grid-meshes/grid-meshes.factor +++ b/extra/grid-meshes/grid-meshes.factor @@ -3,7 +3,6 @@ USING: accessors alien.data.map arrays destructors fry grouping kernel math math.ranges math.vectors.simd opengl opengl.gl sequences sequences.product specialized-arrays ; FROM: alien.c-types => float ; -SIMD: float SPECIALIZED-ARRAY: float-4 IN: grid-meshes diff --git a/extra/math/matrices/simd/simd-tests.factor b/extra/math/matrices/simd/simd-tests.factor index 25482c8e1e..b27abcae67 100644 --- a/extra/math/matrices/simd/simd-tests.factor +++ b/extra/math/matrices/simd/simd-tests.factor @@ -3,7 +3,6 @@ USING: classes.struct math.matrices.simd math.vectors.simd math literals math.constants math.functions specialized-arrays tools.test ; QUALIFIED-WITH: alien.c-types c FROM: math.matrices => m~ ; -SIMD: c:float SPECIALIZED-ARRAY: float-4 IN: math.matrices.simd.tests diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 97290964eb..4e1fd0e96c 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -4,7 +4,6 @@ math math.combinatorics math.functions math.matrices.simd math.vectors math.vectors.simd sequences sequences.private specialized-arrays typed ; QUALIFIED-WITH: alien.c-types c -SIMD: c:float SPECIALIZED-ARRAY: float-4 IN: math.matrices.simd diff --git a/extra/terrain/terrain.factor b/extra/terrain/terrain.factor index f1da877c3e..55d54d3be1 100644 --- a/extra/terrain/terrain.factor +++ b/extra/terrain/terrain.factor @@ -11,7 +11,6 @@ math.matrices.simd noise ui.gestures combinators.short-circuit destructors grid-meshes math.vectors.simd ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAY: c:float -SIMD: c:float IN: terrain CONSTANT: FOV $[ 2.0 sqrt 1 + ] From 7da80f65e736b9d3f54e491eb49c55acd289052d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Nov 2009 20:24:09 -0800 Subject: [PATCH 43/46] trick math.vectors.simd into making nicer quotations --- basis/math/vectors/simd/simd.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index e89edd3de3..7213286e15 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -234,8 +234,8 @@ SYNTAX: A{ \ } [ >A ] parse-literal ; c: byte-array >>class A >>boxed-class - [ A-rep alien-vector A boa ] >>getter - [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter + { A-rep alien-vector A boa } >quotation >>getter + { [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter 16 >>size 16 >>align A-rep >>rep From 0795c60b83bf657b4c103a68546961d865e4db64 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 26 Nov 2009 11:15:35 -0800 Subject: [PATCH 44/46] "norm" doesn't need to be generic, "norm-sq sqrt" always works --- basis/math/vectors/simd/simd.factor | 3 +-- basis/math/vectors/vectors.factor | 5 ++--- basis/sequences/cords/cords.factor | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 7213286e15..036ff22f78 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -214,8 +214,7 @@ M: A v-n A-with v- ; inline M: A v*n A-with v* ; inline M: A v/n A-with v/ ; inline M: A norm-sq dup v. assert-positive ; inline -M: A norm norm-sq sqrt ; inline -M: A distance v- norm ; inline +M: A distance v- norm ; inline M: A >pprint-sequence ; M: A pprint* pprint-object ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index c0b129e6d2..a69a99c64b 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -199,8 +199,7 @@ M: object v. [ conjugate * ] [ + ] 2map-reduce ; GENERIC: norm-sq ( v -- x ) M: object norm-sq [ absq ] [ + ] map-reduce ; -GENERIC: norm ( v -- x ) -M: object norm norm-sq sqrt ; +: norm ( v -- x ) norm-sq sqrt ; inline : normalize ( u -- v ) dup norm v/n ; inline @@ -240,7 +239,7 @@ PRIVATE> HINTS: M\ object vneg { array } ; HINTS: M\ object norm-sq { array } ; -HINTS: M\ object norm { array } ; +HINTS: norm { array } ; HINTS: M\ object distance { array array } ; HINTS: M\ object n*v { object array } ; diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor index e59858677b..fca005fa6e 100644 --- a/basis/sequences/cords/cords.factor +++ b/basis/sequences/cords/cords.factor @@ -106,8 +106,7 @@ M: cord v-n '[ _ v-n ] cord-map ; inline M: cord v*n '[ _ v*n ] cord-map ; inline M: cord v/n '[ _ v/n ] cord-map ; inline -M: cord norm-sq dup v. ; inline -M: cord norm norm-sq sqrt ; inline +M: cord norm-sq [ norm-sq ] cord-both + ; inline M: cord distance v- norm ; inline From ac5d3d56525c25260faf3ff1e9ab6d83a68b4943 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 26 Nov 2009 11:15:46 -0800 Subject: [PATCH 45/46] remove SIMDS: --- basis/math/vectors/vectors-docs.factor | 1 - basis/random/sfmt/sfmt.factor | 1 - extra/alien/data/map/map-tests.factor | 1 - extra/noise/noise.factor | 1 - extra/terrain/generation/generation.factor | 1 - 5 files changed, 5 deletions(-) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index b831ac7dbe..6ef7f9ca50 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -436,7 +436,6 @@ HELP: vshuffle { $example "USING: alien.c-types combinators math.vectors math.vectors.simd" "namespaces prettyprint prettyprint.config ;" - "SIMDS: int uchar ;" "IN: scratchpad" "" ": endian-swap ( size -- vector )" diff --git a/basis/random/sfmt/sfmt.factor b/basis/random/sfmt/sfmt.factor index 55606217c9..146db91172 100644 --- a/basis/random/sfmt/sfmt.factor +++ b/basis/random/sfmt/sfmt.factor @@ -4,7 +4,6 @@ USING: accessors alien.c-types kernel locals math math.ranges math.bitwise math.vectors math.vectors.simd random sequences specialized-arrays sequences.private classes.struct combinators.short-circuit fry ; -SIMDS: uchar uint ; SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: uint-4 IN: random.sfmt diff --git a/extra/alien/data/map/map-tests.factor b/extra/alien/data/map/map-tests.factor index 7a492ab0c5..b97a356e6e 100644 --- a/extra/alien/data/map/map-tests.factor +++ b/extra/alien/data/map/map-tests.factor @@ -3,7 +3,6 @@ USING: alien.data.map fry generalizations kernel locals math.vectors math.vectors.conversion math math.vectors.simd sequences specialized-arrays tools.test ; FROM: alien.c-types => uchar short int float ; -SIMDS: float int short uchar ; SPECIALIZED-ARRAYS: int float float-4 uchar-16 ; IN: alien.data.map.tests diff --git a/extra/noise/noise.factor b/extra/noise/noise.factor index 91e040d35f..a27cc186a0 100644 --- a/extra/noise/noise.factor +++ b/extra/noise/noise.factor @@ -4,7 +4,6 @@ math.libm math.matrices.simd math.vectors math.vectors.conversion math.vectors.s memoize random random.mersenne-twister sequences sequences.private specialized-arrays typed ; QUALIFIED-WITH: alien.c-types c -SIMDS: c:float c:int c:short c:ushort c:uchar ; SPECIALIZED-ARRAYS: c:float c:uchar float-4 uchar-16 ; IN: noise diff --git a/extra/terrain/generation/generation.factor b/extra/terrain/generation/generation.factor index 86f532bada..3ed4af3b1d 100644 --- a/extra/terrain/generation/generation.factor +++ b/extra/terrain/generation/generation.factor @@ -3,7 +3,6 @@ combinators.smart fry grouping images kernel math math.matrices.simd math.order math.vectors noise random sequences math.vectors.simd typed ; FROM: alien.c-types => float uchar ; -SIMDS: float uchar ; IN: terrain.generation CONSTANT: terrain-segment-size { 512 512 } From 66d0cafa94963c9a8464e4cdb9fba7f7f659a5dc Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 26 Nov 2009 13:28:40 -0800 Subject: [PATCH 46/46] fix buggy simd intrinsics --- basis/alien/c-types/c-types.factor | 4 +++- basis/compiler/cfg/intrinsics/simd/simd.factor | 6 +++--- basis/compiler/tree/propagation/simd/simd.factor | 5 +++-- basis/math/vectors/simd/intrinsics/intrinsics.factor | 6 +++--- basis/math/vectors/simd/simd-tests.factor | 5 +++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index 027fe046b6..0ee2373b41 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -553,4 +553,6 @@ M: double-2-rep rep-component-type drop double ; { [ dup { uchar ushort uint ulong ulonglong } member-eq? ] [ unsigned-interval ] } } cond ; foldable -: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline +: c-type-clamp ( value c-type -- value' ) + dup { float double } member-eq? + [ drop ] [ c-type-interval clamp ] if ; inline diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 845902c2e6..a64c657556 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -247,7 +247,7 @@ IN: compiler.cfg.intrinsics.simd ] [ ^^vector>scalar ] bi ; : ^sum-vector ( src rep -- dst ) - signed-rep { + { { float-vector-rep [ ^(sum-vector) ] } { int-vector-rep [| src rep | src rep ^unpack-vector-head :> head @@ -290,8 +290,8 @@ IN: compiler.cfg.intrinsics.simd : emit-simd-vneg ( node -- ) { - { float-vector-rep [ [ ^load-neg-zero-vector ] [ ^^sub-vector ] bi ] } - { int-vector-rep [ [ ^^zero-vector ] [ ^^sub-vector ] bi ] } + { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] } + { int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] } } emit-v-vector-op ; : emit-simd-v+- ( node -- ) diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 6002b15c1c..9aab173d7c 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -3,7 +3,7 @@ USING: accessors assocs byte-arrays combinators compiler.cfg.builder continuations fry sequences compiler.tree.propagation.info cpu.architecture kernel words make math math.intervals -math.vectors.simd.intrinsics ; +math.vectors.simd.intrinsics namespaces ; IN: compiler.tree.propagation.simd CONSTANT: vector>vector-intrinsics @@ -112,9 +112,10 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop : inline-unless-intrinsic ( word -- ) dup '[ _ swap over "intrinsic" word-prop + "always-inline-simd-intrinsics" get not swap and ! word node intrinsic [ try-intrinsic [ drop f ] [ def>> ] if ] - [ def>> ] if* + [ drop def>> ] if* ] "custom-inlining" set-word-prop ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 30db6d5e13..eb0e7b1dc8 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -126,7 +126,7 @@ PRIVATE> :: (simd-v+-) ( a b rep -- c ) a b rep 2>rep-array :> ( a' b' ) rep :> c' - 0 rep length 1 - 2 [| n | + 0 rep rep-length 1 - 2 [| n | n a' nth-unsafe n b' nth-unsafe - n c' set-nth-unsafe @@ -151,12 +151,12 @@ PRIVATE> : (simd-sum) ( a rep -- n ) [ + ] components-reduce ; : (simd-vabs) ( a rep -- c ) [ abs ] components-map ; : (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; -: (simd-vbitandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ; : (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; : (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; : (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; : (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ; -: (simd-vandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ; +: (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ; : (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ; : (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ; : (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index b590589345..98ed68a906 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -120,7 +120,7 @@ CONSTANT: vector-words simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ; : check-optimizer ( seq quot eq-quot -- failures ) - '[ + dup '[ @ [ dup [ class ] { } map-as ] dip '[ _ declare @ ] { @@ -128,8 +128,9 @@ CONSTANT: vector-words [ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ] [ [ [ call ] dip call ] call( quot quot -- result ) ] [ [ [ call ] dip compile-call ] call( quot quot -- result ) ] + [ [ t "always-inline-simd-intrinsics" [ [ call ] dip compile-call ] with-variable ] call( quot quot -- result ) ] } 2cleave - @ not + [ drop @ ] [ nip @ ] 3bi and not ] filter ; inline "== Checking -new constructors" print