From 9184254f2cb29add8f4c68a6b3833cf9f8267efc Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Sun, 1 Nov 2009 22:10:28 -0600
Subject: [PATCH 01/46] move define-inline-method from classes.struct.private
 to generic.parser

---
 basis/classes/struct/struct.factor | 3 ---
 core/generic/parser/parser.factor  | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/basis/classes/struct/struct.factor b/basis/classes/struct/struct.factor
index d5e5fdc6c3..f86f3c9d97 100755
--- a/basis/classes/struct/struct.factor
+++ b/basis/classes/struct/struct.factor
@@ -189,9 +189,6 @@ M: struct-c-type c-struct? drop t ;
     \ cleave [ ] 2sequence
     \ output>array [ ] 2sequence ;
 
-: define-inline-method ( class generic quot -- )
-    [ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
-
 : (define-struct-slot-values-method) ( class -- )
     [ \ struct-slot-values ] [ struct-slot-values-quot ] bi
     define-inline-method ;
diff --git a/core/generic/parser/parser.factor b/core/generic/parser/parser.factor
index ce048c41da..11fb2b5b42 100644
--- a/core/generic/parser/parser.factor
+++ b/core/generic/parser/parser.factor
@@ -13,6 +13,9 @@ ERROR: not-in-a-method-error ;
 : create-method-in ( class generic -- method )
     create-method dup set-word dup save-location ;
 
+: define-inline-method ( class generic quot -- )
+    [ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
+
 : CREATE-METHOD ( -- method )
     scan-word bootstrap-word scan-word create-method-in ;
 

From e0ba0c5539809c661625d29b2492e2c3c0feeb59 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 2 Nov 2009 14:21:19 -0600
Subject: [PATCH 02/46] genericize vector ops

---
 basis/math/vectors/vectors.factor | 265 +++++++++++++++++-------------
 core/sequences/sequences.factor   |   3 +-
 2 files changed, 157 insertions(+), 111 deletions(-)

diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor
index 63564f064d..f3dfcda18a 100644
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@@ -6,29 +6,47 @@ byte-arrays accessors locals ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors
 
-MIXIN: simd-128
-MIXIN: simd-256
+GENERIC: vneg ( u -- v )
+M: object vneg [ neg ] map ;
 
-GENERIC: element-type ( obj -- c-type )
-M: object element-type drop f ; inline
+GENERIC# v+n 1 ( u n -- v )
+M: object v+n [ + ] curry map ;
 
-: vneg ( u -- v ) [ neg ] map ;
+GENERIC: n+v ( n v -- w )
+M: object n+v [ + ] with map ;
 
-: v+n ( u n -- v ) [ + ] curry map ;
-: n+v ( n u -- v ) [ + ] with map ;
-: v-n ( u n -- v ) [ - ] curry map ;
-: n-v ( n u -- v ) [ - ] with map ;
+GENERIC# v-n 1 ( u n -- w )
+M: object v-n [ - ] curry map ;
 
-: v*n ( u n -- v ) [ * ] curry map ;
-: n*v ( n u -- v ) [ * ] with map ;
-: v/n ( u n -- v ) [ / ] curry map ;
-: n/v ( n u -- v ) [ / ] with map ;
+GENERIC: n-v ( n v -- w )
+M: object n-v [ - ] with map ;
 
-: v+   ( u v -- w ) [ + ] 2map ;
-: v-   ( u v -- w ) [ - ] 2map ;
-: [v-] ( u v -- w ) [ [-] ] 2map ;
-: v*   ( u v -- w ) [ * ] 2map ;
-: v/   ( u v -- w ) [ / ] 2map ;
+GENERIC# v*n 1 ( u n -- v )
+M: object v*n [ * ] curry map ;
+
+GENERIC: n*v ( n v -- w )
+M: object n*v [ * ] with map ;
+
+GENERIC# v/n 1 ( u n -- v )
+M: object v/n [ / ] curry map ;
+
+GENERIC: n/v ( n v -- w )
+M: object n/v [ / ] with map ;
+
+GENERIC: v+  ( u v -- w )
+M: object v+ [ + ] 2map ;
+
+GENERIC: v-  ( u v -- w )
+M: object v- [ - ] 2map ;
+
+GENERIC: [v-] ( u v -- w )
+M: object [v-] [ [-] ] 2map ;
+
+GENERIC: v* ( u v -- w )
+M: object v* [ * ] 2map ;
+
+GENERIC: v/ ( u v -- w )
+M: object v/ [ / ] 2map ;
 
 <PRIVATE
 
@@ -37,69 +55,60 @@ M: object element-type drop f ; inline
 
 PRIVATE>
 
-: vmax ( u v -- w ) [ [ float-max ] [ max ] if-both-floats ] 2map ;
-: vmin ( u v -- w ) [ [ float-min ] [ min ] if-both-floats ] 2map ;
+GENERIC: vmax ( u v -- w )
+M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ;
 
-: v+- ( u v -- w )
+GENERIC: vmin ( u v -- w )
+M: object vmin [ [ float-min ] [ min ] if-both-floats ] 2map ;
+
+GENERIC: v+- ( u v -- w )
+M: object v+-
     [ t ] 2dip
     [ [ not ] 2dip pick [ + ] [ - ] if ] 2map
     nip ;
 
-<PRIVATE
+GENERIC: vs+ ( u v -- w )
+M: object vs+ [ + ] 2map ;
 
-: 2saturate-map ( u v quot -- w )
-    pick element-type '[ @ _ c-type-clamp ] 2map ; inline
+GENERIC: vs- ( u v -- w )
+M: object vs- [ - ] 2map ;
 
-PRIVATE>
+GENERIC: vs* ( u v -- w )
+M: object vs* [ * ] 2map ;
 
-: vs+ ( u v -- w ) [ + ] 2saturate-map ;
-: vs- ( u v -- w ) [ - ] 2saturate-map ;
-: vs* ( u v -- w ) [ * ] 2saturate-map ;
+GENERIC: vabs ( u -- v )
+M: object vabs [ abs ] map ;
 
-: vabs ( u -- v ) [ abs ] map ;
-: vsqrt ( u -- v ) [ >float fsqrt ] map ;
+GENERIC: vsqrt ( u -- v )
+M: object vsqrt [ >float fsqrt ] map ;
 
 <PRIVATE
 
-: fp-bitwise-op ( x y seq quot -- z )
-    swap element-type {
-        { c:double [ [ [ double>bits ] bi@ ] dip call bits>double ] }
-        { c:float  [ [ [ float>bits ] bi@ ] dip call bits>float   ] }
-        [ drop call ]
-    } case ; inline
-
-: fp-bitwise-unary ( x seq quot -- z )
-    swap element-type {
-        { c:double [ [ double>bits ] dip call bits>double ] }
-        { c:float  [ [ float>bits  ] dip call bits>float  ] }
-        [ drop call ]
-    } case ; inline
-
-: element>bool ( x seq -- ? )
-    element-type [ [ f ] when-zero ] when ; inline
-
 : bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
 
-GENERIC: new-underlying ( underlying seq -- seq' )
-
-: change-underlying ( seq quot -- seq' )
-    '[ underlying>> @ ] keep new-underlying ; inline
-
 PRIVATE>
 
-: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
-: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
-: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
-: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
-: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
+GENERIC: vbitand ( u v -- w )
+M: object vbitand [ bitand ] 2map ;
+GENERIC: vbitandn ( u v -- w )
+M: object vbitandn [ bitandn ] 2map ;
+GENERIC: vbitor ( u v -- w )
+M: object vbitor [ bitor ] 2map ;
+GENERIC: vbitxor ( u v -- w )
+M: object vbitxor [ bitxor ] 2map ;
+GENERIC: vbitnot ( u -- w )
+M: object vbitnot [ bitnot ] 2map ;
 
-:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
+GENERIC# vbroadcast 1 ( u n -- v )
+M:: object vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
 
-: vshuffle-elements ( u perm -- v )
+GENERIC# vshuffle-elements 1 ( u perm -- v )
+M: object vshuffle-elements
     over length 0 pad-tail
     swap [ '[ _ nth ] ] keep map-as ;
 
-: vshuffle-bytes ( u perm -- v )
+GENERIC# vshuffle-bytes 1 ( u perm -- v )
+M: object vshuffle-bytes
     underlying>> [
         swap [ '[ 15 bitand _ nth ] ] keep map-as
     ] curry change-underlying ;
@@ -107,43 +116,72 @@ PRIVATE>
 GENERIC: vshuffle ( u perm -- v )
 M: array vshuffle ( u perm -- v )
     vshuffle-elements ; inline
-M: simd-128 vshuffle ( u perm -- v )
-    vshuffle-bytes ; inline
 
-: vlshift ( u n -- w ) '[ _ shift ] map ;
-: vrshift ( u n -- w ) neg '[ _ shift ] map ;
+GENERIC# vlshift 1 ( u n -- w )
+M: object vlshift '[ _ shift ] map ;
+GENERIC# vrshift 1 ( u n -- w )
+M: object vrshift neg '[ _ shift ] map ;
 
-: hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
-: hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;
+GENERIC# hlshift 1 ( u n -- w )
+M: object hlshift '[ _ <byte-array> prepend 16 head ] change-underlying ;
+GENERIC# hrshift 1 ( u n -- w )
+M: object hrshift '[ _ <byte-array> append 16 tail* ] change-underlying ;
 
-: (vmerge-head) ( u v -- h )
-    over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
-: (vmerge-tail) ( u v -- t )
-    over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
+GENERIC: (vmerge-head) ( u v -- h )
+M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
+GENERIC: (vmerge-tail) ( u v -- t )
+M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
 
-: (vmerge) ( u v -- h t )
+GENERIC: (vmerge) ( u v -- h t )
     [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline
 
-: vmerge ( u v -- w ) [ zip ] keep concat-as ;
+GENERIC: vmerge ( u v -- w )
+M: object vmerge [ zip ] keep concat-as ;
 
-: vand ( u v -- w )  over '[ [ _ element>bool ] bi@ and ] 2map ;
-: vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ;
-: vor  ( u v -- w )  over '[ [ _ element>bool ] bi@ or  ] 2map ;
-: vxor ( u v -- w )  over '[ [ _ element>bool ] bi@ xor ] 2map ;
-: vnot ( u -- w )    dup '[ _ element>bool not ] map ;
+GENERIC: vand ( u v -- w )
+M: object vand [ and ] 2map ;
 
-: vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
-: vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
-: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
+GENERIC: vandn ( u v -- w )
+M: object vandn [ [ not ] dip and ] 2map ;
 
-: v<  ( u v -- w ) [ <   ] 2map ;
-: v<= ( u v -- w ) [ <=  ] 2map ;
-: v>= ( u v -- w ) [ >=  ] 2map ;
-: v>  ( u v -- w ) [ >   ] 2map ;
-: vunordered? ( u v -- w ) [ unordered? ] 2map ;
-: v=  ( u v -- w ) [ =   ] 2map ;
+GENERIC: vor  ( u v -- w )
+M: object vor  [ or  ] 2map ;
 
-: v? ( mask true false -- result )
+GENERIC: vxor ( u v -- w )
+M: object vxor [ xor ] 2map ;
+
+GENERIC: vnot ( u -- w )
+M: object vnot [ not ] map ;
+
+GENERIC: vall? ( v -- ? )
+M: object vall? [ ] all? ;
+
+GENERIC: vany? ( v -- ? )
+M: object vany? [ ] any? ;
+
+GENERIC: vnone? ( v -- ? )
+M: object vnone? [ not ] all? ;
+
+GENERIC: v<  ( u v -- w )
+M: object v<  [ <   ] 2map ;
+
+GENERIC: v<= ( u v -- w )
+M: object v<= [ <=  ] 2map ;
+
+GENERIC: v>= ( u v -- w )
+M: object v>= [ >=  ] 2map ;
+
+GENERIC: v>  ( u v -- w )
+M: object v>  [ >   ] 2map ;
+
+GENERIC: vunordered? ( u v -- w )
+M: object vunordered? [ unordered? ] 2map ;
+
+GENERIC: v=  ( u v -- w )
+M: object v=  [ =   ] 2map ;
+
+GENERIC: v? ( mask true false -- result )
+M: object v? 
     [ vand ] [ vandn ] bi-curry* bi vor ; inline
 
 :: vif ( mask true-quot false-quot -- result )
@@ -157,15 +195,22 @@ M: simd-128 vshuffle ( u perm -- v )
 : vceiling  ( u -- v ) [ ceiling ] map ;
 : vtruncate ( u -- v ) [ truncate ] map ;
 
-: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; 
-: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; 
+: vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; inline
+: vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; inline
 
-: v. ( u v -- x ) [ conjugate * ] [ + ] 2map-reduce ;
-: norm-sq ( v -- x ) [ absq ] [ + ] map-reduce ;
-: norm ( v -- x ) norm-sq sqrt ;
-: normalize ( u -- v ) dup norm v/n ;
+GENERIC: v. ( u v -- x )
+M: object v. [ conjugate * ] [ + ] 2map-reduce ;
 
-: distance ( u v -- x ) [ - absq ] [ + ] 2map-reduce sqrt ;
+GENERIC: norm-sq ( v -- x )
+M: object norm-sq [ absq ] [ + ] map-reduce ;
+
+GENERIC: norm ( v -- x )
+M: object norm norm-sq sqrt ;
+
+: normalize ( u -- v ) dup norm v/n ; inline
+
+GENERIC: distance ( u v -- x )
+M: object distance [ - absq ] [ + ] 2map-reduce sqrt ;
 
 : set-axis ( u v axis -- w )
     [ [ zero? 2over ? ] dip swap nth ] map-index 2nip ;
@@ -198,27 +243,27 @@ PRIVATE>
 : v~ ( a b epsilon -- ? )
     [ ~ ] curry 2all? ; inline
 
-HINTS: vneg { array } ;
-HINTS: norm-sq { array } ;
-HINTS: norm { array } ;
-HINTS: normalize { array } ;
-HINTS: distance { array array } ;
+HINTS: M\ object vneg { array } ;
+HINTS: M\ object norm-sq { array } ;
+HINTS: M\ object norm { array } ;
+HINTS: M\ object distance { array array } ;
 
-HINTS: n*v { object array } ;
-HINTS: v*n { array object } ;
-HINTS: n/v { array } ;
-HINTS: v/n { array object } ;
+HINTS: M\ object n*v { object array } ;
+HINTS: M\ object v*n { array object } ;
+HINTS: M\ object n/v { object array } ;
+HINTS: M\ object v/n { array object } ;
 
-HINTS: v+ { array array } ;
-HINTS: v- { array array } ;
-HINTS: v* { array array } ;
-HINTS: v/ { array array } ;
-HINTS: vmax { array array } ;
-HINTS: vmin { array array } ;
-HINTS: v. { array array } ;
+HINTS: M\ object v+ { array array } ;
+HINTS: M\ object v- { array array } ;
+HINTS: M\ object v* { array array } ;
+HINTS: M\ object v/ { array array } ;
+HINTS: M\ object vmax { array array } ;
+HINTS: M\ object vmin { array array } ;
+HINTS: M\ object v. { array array } ;
 
 HINTS: vlerp { array array array } ;
 HINTS: vnlerp { array array object } ;
 
 HINTS: bilerp { object object object object array } ;
 HINTS: trilerp { object object object object object object object object array } ;
+
diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor
index 1bcedb1d15..e8c24a3b96 100755
--- a/core/sequences/sequences.factor
+++ b/core/sequences/sequences.factor
@@ -929,7 +929,8 @@ PRIVATE>
 : trim ( seq quot -- newseq )
     [ trim-slice ] [ drop ] 2bi like ; inline
 
-: sum ( seq -- n ) 0 [ + ] binary-reduce ;
+GENERIC: sum ( seq -- n )
+M: object sum 0 [ + ] binary-reduce ; inline
 
 : product ( seq -- n ) 1 [ * ] binary-reduce ;
 

From 9cf3ab3da1afb5699919d3f5511df6bca17fbf19 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 2 Nov 2009 14:24:29 -0600
Subject: [PATCH 03/46] redo math.vectors.simd to use generics for
 specialization

---
 basis/math/vectors/simd/simd.factor | 353 +++++++++++++++++++++++++---
 1 file changed, 325 insertions(+), 28 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 388fed5f31..139060333c 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,42 +1,339 @@
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types combinators fry kernel parser math math.parser
-math.vectors.simd.functor sequences splitting vocabs.generated
-vocabs.loader vocabs.parser words accessors vocabs compiler.units
-definitions ;
+! (c)2009 Slava Pestov, Joe Groff bsd license
+USING: math.vectors math.vectors.private ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors.simd
 
-ERROR: bad-base-type type ;
+DEFER: vconvert
+DEFER: simd-with
+DEFER: simd-boa
+DEFER: simd-cast
 
 <PRIVATE
 
-: simd-vocab ( base-type -- vocab )
-    name>> "math.vectors.simd.instances." prepend ;
+! Primitive SIMD constructors
 
-: parse-base-type ( c-type -- c-type )
-    dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } member-eq?
-    [ bad-base-type ] unless ;
+GENERIC: new-underlying ( underlying seq -- seq' )
 
-: forget-instances ( -- )
-    [
-        "math.vectors.simd.instances" child-vocabs
-        [ forget-vocab ] each
-    ] with-compilation-unit ;
+: make-underlying ( seq quot -- seq' )
+    dip new-underlying ; inline
+: change-underlying ( seq quot -- seq' )
+    '[ underlying>> @ ] keep new-underlying ; inline
+
+! SIMD intrinsics
+
+: (simd-v+)                ( a b rep -- c ) \ v+ bad-simd-call ;
+: (simd-v-)                ( a b rep -- c ) \ v- bad-simd-call ;
+: (simd-vneg)              ( a   rep -- c ) \ vneg bad-simd-call ;
+: (simd-v+-)               ( a b rep -- c ) \ v+- bad-simd-call ;
+: (simd-vs+)               ( a b rep -- c ) \ vs+ bad-simd-call ;
+: (simd-vs-)               ( a b rep -- c ) \ vs- bad-simd-call ;
+: (simd-vs*)               ( a b rep -- c ) \ vs* bad-simd-call ;
+: (simd-v*)                ( a b rep -- c ) \ v* bad-simd-call ;
+: (simd-v/)                ( a b rep -- c ) \ v/ bad-simd-call ;
+: (simd-vmin)              ( a b rep -- c ) \ vmin bad-simd-call ;
+: (simd-vmax)              ( a b rep -- c ) \ vmax bad-simd-call ;
+: (simd-v.)                ( a b rep -- n ) \ v. bad-simd-call ;
+: (simd-vsqrt)             ( a   rep -- c ) \ vsqrt bad-simd-call ;
+: (simd-sum)               ( a b rep -- n ) \ sum bad-simd-call ;
+: (simd-vabs)              ( a   rep -- c ) \ vabs bad-simd-call ;
+: (simd-vbitand)           ( a b rep -- c ) \ vbitand bad-simd-call ;
+: (simd-vbitandn)          ( a b rep -- c ) \ vbitandn bad-simd-call ;
+: (simd-vbitor)            ( a b rep -- c ) \ vbitor bad-simd-call ;
+: (simd-vbitxor)           ( a b rep -- c ) \ vbitxor bad-simd-call ;
+: (simd-vbitnot)           ( a b rep -- c ) \ vbitnot bad-simd-call ;
+: (simd-vand)              ( a b rep -- c ) \ vand bad-simd-call ;
+: (simd-vandn)             ( a b rep -- c ) \ vandn bad-simd-call ;
+: (simd-vor)               ( a b rep -- c ) \ vor bad-simd-call ;
+: (simd-vxor)              ( a b rep -- c ) \ vxor bad-simd-call ;
+: (simd-vnot)              ( a b rep -- c ) \ vnot bad-simd-call ;
+: (simd-vlshift)           ( a n rep -- c ) \ vlshift bad-simd-call ;
+: (simd-vrshift)           ( a n rep -- c ) \ vrshift bad-simd-call ;
+: (simd-hlshift)           ( a n rep -- c ) \ hlshift bad-simd-call ;
+: (simd-hrshift)           ( a n rep -- c ) \ hrshift bad-simd-call ;
+: (simd-vshuffle-elements) ( a n rep -- c ) \ vshuffle-elements bad-simd-call ;
+: (simd-vshuffle-bytes)    ( a b rep -- c ) \ vshuffle-bytes bad-simd-call ;
+: (simd-vmerge-head)       ( a b rep -- c ) \ (vmerge-head) bad-simd-call ;
+: (simd-vmerge-tail)       ( a b rep -- c ) \ (vmerge-tail) bad-simd-call ;
+: (simd-v<=)               ( a b rep -- c ) \ v<= bad-simd-call ;
+: (simd-v<)                ( a b rep -- c ) \ v< bad-simd-call ;
+: (simd-v=)                ( a b rep -- c ) \ v= bad-simd-call ;
+: (simd-v>)                ( a b rep -- c ) \ v> bad-simd-call ;
+: (simd-v>=)               ( a b rep -- c ) \ v>= bad-simd-call ;
+: (simd-vunordered?)       ( a b rep -- c ) \ vunordered? bad-simd-call ;
+: (simd-vany?)             ( a   rep -- ? ) \ vany? bad-simd-call ;
+: (simd-vall?)             ( a   rep -- ? ) \ vall? bad-simd-call ;
+: (simd-vnone?)            ( a   rep -- ? ) \ vnone? bad-simd-call ;
+: (simd-v>float)           ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-v>integer)         ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vpack-signed)      ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vpack-unsigned)    ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vunpack-head)      ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vunpack-tail)      ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-with)              (   n rep -- v ) \ simd-with bad-simd-call ;
+: (simd-gather-2)          ( m n rep -- v ) \ simd-boa bad-simd-call ;
+: (simd-gather-4)          ( m n o p rep -- v ) \ simd-boa bad-simd-call ;
+: (simd-select)            ( a n rep -- n ) \ nth bad-simd-call ;
+
+: alien-vector     ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
+: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
+
+: alien-vector-aligned     ( c-ptr n rep -- value ) \ alien-vector-aligned bad-simd-call ;
+: set-alien-vector-aligned ( c-ptr n rep -- value ) \ set-alien-vector-aligned bad-simd-call ;
+
+! Helper for boolean vector literals
+
+: vector-true-value ( class -- value )
+    { c:float c:double } member? [ -1 bits>double ] [ -1 ] if ; foldable
+
+: vector-false-value ( type -- value )
+    { c:float c:double } member? [ 0.0 ] [ 0 ] if ; foldable
+
+: boolean>element ( bool/elt type -- elt )
+    swap {
+        { t [ vector-true-value  ] }
+        { f [ vector-false-value ] }
+        [ nip ]
+    } case ; inline
 
 PRIVATE>
 
-: define-simd-vocab ( type -- vocab )
-    parse-base-type
-    [ simd-vocab ] keep '[
-        _
-        [ define-simd-128 ]
-        [ define-simd-256 ] bi
-    ] generate-vocab ;
+! SIMD base type
 
-SYNTAX: SIMD:
-    scan-word define-simd-vocab use-vocab ;
+TUPLE: simd-128
+    { underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
 
-SYNTAX: SIMDS:
-    \ ; parse-until [ define-simd-vocab use-vocab ] each ;
+GENERIC: simd-element-type ( obj -- c-type )
+GENERIC: simd-rep ( simd -- rep )
+
+: rep-length ( rep -- n )
+    16 swap rep-component-type heap-size /i ; foldable
+
+<< <PRIVATE
+
+! SIMD concrete type functor
+
+FUNCTOR: define-simd-128 ( T -- )
+
+A      DEFINES-CLASS ${T}
+A-rep  IS            ${T}-rep
+>A     DEFINES       >${T}
+A-boa  DEFINES       ${T}-boa
+A-with DEFINES       ${T}-with
+A-cast DEFINES       ${T}-cast
+A{     DEFINES       ${T}{
+
+ELT   [ A-rep rep-component-type ]
+N     [ A-rep rep-length ]
+
+SET-NTH [ ELT dup c:c-setter c:array-accessor ]
+
+WHERE
+
+TUPLE: A < simd-128 ;
+
+M: A new-underlying    drop \ A boa ; inline
+M: A simd-rep          drop A-rep ; inline
+M: A simd-element-type drop ELT ; inline
+M: A length            drop N ; inline
+
+M: A set-nth-unsafe
+    [ ELT boolean>element ] 2dip
+    underlying>> SET-NTH call ; inline
+
+: >A ( seq -- simd ) \ A new clone-like ; inline
+
+M: A like drop dup \ A instance? [ >A ] unless ; inline
+
+: A-with ( n -- v ) \ A new simd-with ; inline
+: A-cast ( v -- v' ) \ A new simd-cast ; inline
+: A-boa ( ...n -- v ) \ A new simd-boa ; inline
+
+M: A pprint-delims drop \ A{ \ } ;
+SYNTAX: A{ \ } [ >A ] parse-literal ;
+
+c:<c-type>
+    byte-array >>class
+    A >>boxed-class
+    [ A-rep alien-vector \ A boa ] >>getter
+    [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter
+    16 >>size
+    16 >>align
+    A-rep >>rep
+\ A c:typedef
+
+;FUNCTOR
+
+SYNTAX: SIMD-128:
+    scan scan-word define-simd-128 ;
+
+PRIVATE> >>
+
+SIMD-128: char-16
+SIMD-128: uchar-16
+SIMD-128: short-8
+SIMD-128: ushort-8
+SIMD-128: int-4
+SIMD-128: uint-4
+SIMD-128: longlong-2
+SIMD-128: ulonglong-2
+SIMD-128: float-4
+SIMD-128: double-2
+
+ERROR: bad-simd-call word ;
+ERROR: bad-simd-length got expected ;
+
+: assert-positive ( x -- y ) ;
+
+! SIMD vectors as sequences
+
+M: simd-128 clone [ clone ] change-underlying ; inline
+M: simd-128 length simd-rep rep-length ; inline
+M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline
+M: simd-128 c:byte-length drop 16 ; inline
+
+M: simd-128 new-sequence
+    2dup length =
+    [ nip [ 16 (byte-array) ] make-underlying ]
+    [ length bad-simd-length ] if ; inline
+
+M: simd-128 equal?
+    [ v= vall? ] [ 2drop f ] if-vectors-match ; inline
+
+M: simd-128 >pprint-sequence ;
+M: simd-128 pprint* pprint-object ;
+
+INSTANCE: simd-128 sequence
+
+! Unboxers for SIMD operations
+
+<PRIVATE
+
+: if-both-vectors ( a b t f -- )
+    [ 2dup [ simd-128? ] both? ] 2dip if ; inline
+
+: if-both-vectors-match ( a b t f -- )
+    [ 2dup [ [ simd-128? ] both? ] [ [ simd-rep ] bi@ eq? ] 2bi and ]
+    2dip if ; inline
+
+: simd-construct-op ( exemplar quot: ( rep -- v ) -- v )
+    [ dup simd-rep ] dip curry make-underlying ; inline
+
+: simd-unbox ( a -- a (a) a-rep )
+    [ ] [ underlying>> ] [ simd-rep ] tri ; inline
+
+: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c )
+    [ simd-unbox ] dip 2curry make-underlying ; inline
+
+: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n )
+    [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline
+
+: ((simd-vv->v-op)) ( a b quot: ( (a) (b) rep -- (c) ) -- c )
+    [ simd-unbox ] [ underlying>> swap ] [ 3curry ] tri* make-underlying ; inline
+
+: ((simd-vv->n-op)) ( a b quot: ( (a) (b) rep -- n ) -- n )
+    [ [ underlying>> ] [ simd-rep ] bi ]
+    [ underlying>> swap ] [ ] tri* call ; inline
+    
+: (simd-vv->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
+    [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors-match ; inline
+
+: (simd-vv'->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
+    [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors ; inline
+
+: (simd-vv->n-op) ( a b quot: ( (a) (b) rep -- n ) fallback-quot -- n )
+    [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline
+
+: (simd-method-fallback) ( accum word -- accum )
+    [ current-method get \ (call-next-method) [ ] 2sequence suffix! ]
+    dip suffix! ; 
+
+SYNTAX: simd-vv->v-op
+    \ (simd-vv->v-op) (simd-method-fallback) ; 
+SYNTAX: simd-vv'->v-op
+    \ (simd-vv'->v-op) (simd-method-fallback) ;
+SYNTAX: simd-vv->n-op
+    \ (simd-vv->n-op) (simd-method-fallback) ; 
+
+PRIVATE>
+
+! SIMD constructors
+
+: simd-with ( n seq -- v )
+    [ (simd-with) ] simd-construct-op ; inline
+
+MACRO: simd-boa ( seq -- )
+    dup length {
+        { 2 [ '[ _ dup [ (simd-gather-2) ] simd-construct-op ] ] }
+        { 4 [ '[ _ dup [ (simd-gather-4) ] simd-construct-op ] ] }
+        [ '[ _ _ nsequence ] ]
+    } case ;
+
+: simd-cast ( v seq -- v' )
+    [ underlying>> ] dip new-underlying ; inline
+
+! SIMD primitive operations
+
+M: simd-128 v+                 [ (simd-v+)                 ] simd-vv->v-op ; inline
+M: simd-128 v-                 [ (simd-v-)                 ] simd-vv->v-op ; inline
+M: simd-128 vneg               [ (simd-vneg)               ] simd-v->v-op  ; inline
+M: simd-128 v+-                [ (simd-v+-)                ] simd-vv->v-op ; inline
+M: simd-128 vs+                [ (simd-vs+)                ] simd-vv->v-op ; inline
+M: simd-128 vs-                [ (simd-vs-)                ] simd-vv->v-op ; inline
+M: simd-128 vs*                [ (simd-vs*)                ] simd-vv->v-op ; inline
+M: simd-128 v*                 [ (simd-v*)                 ] simd-vv->v-op ; inline
+M: simd-128 v/                 [ (simd-v/)                 ] simd-vv->v-op ; inline
+M: simd-128 vmin               [ (simd-vmin)               ] simd-vv->v-op ; inline
+M: simd-128 vmax               [ (simd-vmax)               ] simd-vv->v-op ; inline
+M: simd-128 v.                 [ (simd-v.)                 ] simd-vv->n-op ; inline
+M: simd-128 vsqrt              [ (simd-vsqrt)              ] simd-v->v-op  ; inline
+M: simd-128 sum                [ (simd-sum)                ] simd-vv->n-op ; inline
+M: simd-128 vabs               [ (simd-vabs)               ] simd-v->v-op  ; inline
+M: simd-128 vbitand            [ (simd-vbitand)            ] simd-vv->v-op ; inline
+M: simd-128 vbitandn           [ (simd-vbitandn)           ] simd-vv->v-op ; inline
+M: simd-128 vbitor             [ (simd-vbitor)             ] simd-vv->v-op ; inline
+M: simd-128 vbitxor            [ (simd-vbitxor)            ] simd-vv->v-op ; inline
+M: simd-128 vbitnot            [ (simd-vbitnot)            ] simd-vv->v-op ; inline
+M: simd-128 vand               [ (simd-vand)               ] simd-vv->v-op ; inline
+M: simd-128 vandn              [ (simd-vandn)              ] simd-vv->v-op ; inline
+M: simd-128 vor                [ (simd-vor)                ] simd-vv->v-op ; inline
+M: simd-128 vxor               [ (simd-vxor)               ] simd-vv->v-op ; inline
+M: simd-128 vnot               [ (simd-vnot)               ] simd-vv->v-op ; inline
+M: simd-128 vlshift            [ (simd-vlshift)            ] simd-vn->v-op ; inline
+M: simd-128 vrshift            [ (simd-vrshift)            ] simd-vn->v-op ; inline
+M: simd-128 hlshift            [ (simd-hlshift)            ] simd-vn->v-op ; inline
+M: simd-128 hrshift            [ (simd-hrshift)            ] simd-vn->v-op ; inline
+M: simd-128 vshuffle-elements  [ (simd-vshuffle-elements)  ] simd-vn->v-op ; inline
+M: simd-128 vshuffle-bytes     [ (simd-vshuffle-bytes)     ] simd-vv->v-op ; inline
+M: simd-128 vmerge-head        [ (simd-vmerge-head)        ] simd-vv->v-op ; inline
+M: simd-128 vmerge-tail        [ (simd-vmerge-tail)        ] simd-vv->v-op ; inline
+M: simd-128 v<=                [ (simd-v<=)                ] simd-vv->v-op ; inline
+M: simd-128 v<                 [ (simd-v<)                 ] simd-vv->v-op ; inline
+M: simd-128 v=                 [ (simd-v=)                 ] simd-vv->v-op ; inline
+M: simd-128 v>                 [ (simd-v>)                 ] simd-vv->v-op ; inline
+M: simd-128 v>=                [ (simd-v>=)                ] simd-vv->v-op ; inline
+M: simd-128 vunordered?        [ (simd-vunordered?)        ] simd-vv->v-op ; inline
+M: simd-128 vany?              [ (simd-vany?)              ] simd-v->n-op  ; inline
+M: simd-128 vall?              [ (simd-vall?)              ] simd-v->n-op  ; inline
+M: simd-128 vnone?             [ (simd-vnone?)             ] simd-v->n-op  ; inline
+
+! SIMD high-level specializations
+
+M: simd-128 vbroadcast [ swap nth ] keep simd-with ; inline
+M: simd-128 n+v [ simd-with ] keep v+ ; inline
+M: simd-128 n-v [ simd-with ] keep v- ; inline
+M: simd-128 n*v [ simd-with ] keep v* ; inline
+M: simd-128 n/v [ simd-with ] keep v/ ; inline
+M: simd-128 v+n over simd-with v+
+M: simd-128 v-n over simd-with v- ; inline
+M: simd-128 v*n over simd-with v* ; inline
+M: simd-128 v/n over simd-with v/ ; inline
+M: simd-128 norm-sq dup v. assert-positive ; inline
+M: simd-128 norm      norm-sq sqrt ; inline
+M: simd-128 normalize dup norm v/n ; inline
+M: simd-128 distance  v- norm ; inline
+
+! misc
+
+M: simd-128 vshuffle ( u perm -- v )
+    vshuffle-bytes ; inline
 

From 73d2a756440f41a0913781afd15673937b739fae Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 2 Nov 2009 15:00:39 -0600
Subject: [PATCH 04/46] remove math.vectors .specialization, .simd.functor,
 .simd.intrinsics

---
 basis/math/vectors/simd/functor/authors.txt   |   1 -
 .../math/vectors/simd/functor/functor.factor  | 522 ------------------
 .../math/vectors/simd/intrinsics/authors.txt  |   1 -
 .../simd/intrinsics/intrinsics-tests.factor   |  18 -
 .../vectors/simd/intrinsics/intrinsics.factor | 207 -------
 .../specialization-tests.factor               |  28 -
 .../specialization/specialization.factor      | 207 -------
 7 files changed, 984 deletions(-)
 delete mode 100644 basis/math/vectors/simd/functor/authors.txt
 delete mode 100644 basis/math/vectors/simd/functor/functor.factor
 delete mode 100644 basis/math/vectors/simd/intrinsics/authors.txt
 delete mode 100644 basis/math/vectors/simd/intrinsics/intrinsics-tests.factor
 delete mode 100644 basis/math/vectors/simd/intrinsics/intrinsics.factor
 delete mode 100644 basis/math/vectors/specialization/specialization-tests.factor
 delete mode 100644 basis/math/vectors/specialization/specialization.factor

diff --git a/basis/math/vectors/simd/functor/authors.txt b/basis/math/vectors/simd/functor/authors.txt
deleted file mode 100644
index d4f5d6b3ae..0000000000
--- a/basis/math/vectors/simd/functor/authors.txt
+++ /dev/null
@@ -1 +0,0 @@
-Slava Pestov
\ No newline at end of file
diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor
deleted file mode 100644
index 480981d165..0000000000
--- a/basis/math/vectors/simd/functor/functor.factor
+++ /dev/null
@@ -1,522 +0,0 @@
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs byte-arrays classes classes.algebra effects fry
-functors generalizations kernel literals locals math math.functions
-math.vectors math.vectors.private math.vectors.simd.intrinsics
-math.vectors.conversion.backend
-math.vectors.specialization parser prettyprint.custom sequences
-sequences.private strings words definitions macros cpu.architecture
-namespaces arrays quotations combinators combinators.short-circuit sets
-layouts ;
-QUALIFIED-WITH: alien.c-types c
-QUALIFIED: math.private
-IN: math.vectors.simd.functor
-
-ERROR: bad-length got expected ;
-
-: vector-true-value ( class -- value )
-    {
-        { [ dup integer class<= ] [ drop -1 ] }
-        { [ dup float   class<= ] [ drop -1 bits>double ] }
-    } cond ; foldable
-
-: vector-false-value ( class -- value )
-    {
-        { [ dup integer class<= ] [ drop 0   ] }
-        { [ dup float   class<= ] [ drop 0.0 ] }
-    } cond ; foldable
-
-: boolean>element ( bool/elt class -- elt )
-    swap {
-        { t [ vector-true-value  ] }
-        { f [ vector-false-value ] }
-        [ nip ]
-    } case ; inline
-
-MACRO: simd-boa ( rep class -- simd-array )
-    [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
-
-: can-be-unboxed? ( type -- ? )
-    {
-        { c:float [ \ math.private:float+ "intrinsic" word-prop ] }
-        { c:double [ \ math.private:float+ "intrinsic" word-prop ] }
-        [ c:heap-size cell < ]
-    } case ;
-
-: simd-boa-fast? ( rep -- ? )
-    [ dup rep-gather-word supported-simd-op? ]
-    [ rep-component-type can-be-unboxed? ]
-    bi and ;
-
-:: define-boa-custom-inlining ( word rep class -- )
-    word [
-        drop
-        rep simd-boa-fast? [
-            [ rep (simd-boa) class boa ]
-        ] [ word def>> ] if
-    ] "custom-inlining" set-word-prop ;
-
-: simd-with ( rep class x -- simd-array )
-    [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
-
-: simd-with/nth-fast? ( rep -- ? )
-    [ \ (simd-vshuffle-elements) supported-simd-op? ]
-    [ rep-component-type can-be-unboxed? ]
-    bi and ;
-
-:: define-with-custom-inlining ( word rep class -- )
-    word [
-        drop
-        rep simd-with/nth-fast? [
-            [ rep rep-coerce rep (simd-with) class boa ]
-        ] [ word def>> ] if
-    ] "custom-inlining" set-word-prop ;
-
-: simd-nth-fast ( rep -- quot )
-    [ rep-components ] keep
-    '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
-    '[ swap >fixnum _ case ] ;
-
-: simd-nth-slow ( rep -- quot )
-    rep-component-type dup c:c-type-getter-boxer c:array-accessor ;
-
-MACRO: simd-nth ( rep -- x )
-    dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ;
-
-: boa-effect ( rep n -- effect )
-    [ rep-components ] dip *
-    [ CHAR: a + 1string ] map
-    { "simd-vector" } <effect> ;
-
-: supported-simd-ops ( assoc rep -- assoc' )
-    [ simd-ops get ] dip 
-    '[ nip _ swap supported-simd-op? ] assoc-filter
-    '[ drop _ key? ] assoc-filter ;
-
-ERROR: bad-schema op schema ;
-
-:: op-wrapper ( op specials schemas -- wrapper )
-    op {
-        [ specials at ]
-        [ word-schema schemas at ]
-        [ dup word-schema bad-schema ]
-    } 1|| ;
-
-: low-level-ops ( simd-ops specials schemas -- alist )
-    '[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ;
-
-:: high-level-ops ( ctor elt-class -- assoc )
-    ! Some SIMD operations are defined in terms of others.
-    {
-        { vbroadcast [ swap nth ctor execute ] }
-        { n+v [ [ ctor execute ] dip v+ ] }
-        { v+n [ ctor execute v+ ] }
-        { n-v [ [ ctor execute ] dip v- ] }
-        { v-n [ ctor execute v- ] }
-        { n*v [ [ ctor execute ] dip v* ] }
-        { v*n [ ctor execute v* ] }
-        { n/v [ [ ctor execute ] dip v/ ] }
-        { v/n [ ctor execute v/ ] }
-        { norm-sq [ dup v. assert-positive ] }
-        { norm [ norm-sq sqrt ] }
-        { normalize [ dup norm v/n ] }
-    }
-    ! To compute dot product and distance with integer vectors, we
-    ! have to do things less efficiently, with integer overflow checks,
-    ! in the general case.
-    elt-class float = [ { distance [ v- norm ] } suffix ] when ;
-
-TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
-
-: define-simd ( simd -- )
-    dup rep>> rep-component-type c:c-type-boxed-class >>elt-class
-    {
-        [ class>> ]
-        [ elt-class>> ]
-        [ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ]
-        [ rep>> supported-simd-ops ]
-        [ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
-    } cleave
-    specialize-vector-words ;
-
-:: define-simd-128-type ( class rep -- )
-    c:<c-type>
-        byte-array >>class
-        class >>boxed-class
-        [ rep alien-vector class boa ] >>getter
-        [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
-        16 >>size
-        8 >>align
-        rep >>rep
-    class c:typedef ;
-
-: (define-simd-128) ( simd -- )
-    simd-ops get >>ops
-    [ define-simd ]
-    [ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
-
-FUNCTOR: define-simd-128 ( T -- )
-
-N            [ 16 T c:heap-size /i ]
-
-A            DEFINES-CLASS ${T}-${N}
-A-boa        DEFINES ${A}-boa
-A-with       DEFINES ${A}-with
-A-cast       DEFINES ${A}-cast
->A           DEFINES >${A}
-A{           DEFINES ${A}{
-
-SET-NTH      [ T dup c:c-setter c:array-accessor ]
-
-A-rep        [ A name>> "-rep" append "cpu.architecture" lookup ]
-A-vv->v-op   DEFINES-PRIVATE ${A}-vv->v-op
-A-vn->v-op   DEFINES-PRIVATE ${A}-vn->v-op
-A-vv->n-op   DEFINES-PRIVATE ${A}-vv->n-op
-A-v->v-op    DEFINES-PRIVATE ${A}-v->v-op
-A-v->n-op    DEFINES-PRIVATE ${A}-v->n-op
-A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
-A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op
-
-A-element-class [ A-rep rep-component-type c:c-type-boxed-class ]
-
-WHERE
-
-TUPLE: A
-{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
-
-INSTANCE: A simd-128
-
-M: A clone underlying>> clone \ A boa ; inline
-
-M: A length drop N ; inline
-
-M: A equal?
-    over \ A instance? [ v= vall? ] [ 2drop f ] if ;
-
-M: A nth-unsafe underlying>> A-rep simd-nth ; inline
-
-M: A set-nth-unsafe
-    [ A-element-class boolean>element ] 2dip
-    underlying>> SET-NTH call ; inline
-
-: >A ( seq -- simd-array ) \ A new clone-like ;
-
-M: A like drop dup \ A instance? [ >A ] unless ; inline
-
-M: A new-underlying drop \ A boa ; inline
-
-M: A new-sequence
-    drop dup N =
-    [ drop 16 <byte-array> \ A boa ]
-    [ N bad-length ]
-    if ; inline
-
-M: A c:byte-length underlying>> length ; inline
-
-M: A element-type drop A-rep rep-component-type ;
-
-M: A pprint-delims drop \ A{ \ } ;
-
-M: A >pprint-sequence ;
-
-M: A pprint* pprint-object ;
-
-SYNTAX: A{ \ } [ >A ] parse-literal ;
-
-: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
-
-\ A-with \ A-rep \ A define-with-custom-inlining
-
-\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
-
-\ A-rep rep-gather-word [
-    \ A-boa \ A-rep \ A define-boa-custom-inlining
-] when
-
-: A-cast ( simd-array -- simd-array' )
-    underlying>> \ A boa ; inline
-
-INSTANCE: A sequence
-
-<PRIVATE
-
-: A-vv->v-op ( v1 v2 quot -- v3 )
-    [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
-
-: A-vn->v-op ( v1 v2 quot -- v3 )
-    [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
-
-: A-vv->n-op ( v1 v2 quot -- n )
-    [ [ underlying>> ] bi@ A-rep ] dip call ; inline
-
-: A-v->v-op ( v1 quot -- v2 )
-    [ underlying>> A-rep ] dip call \ A boa ; inline
-
-: A-v->n-op ( v quot -- n )
-    [ underlying>> A-rep ] dip call ; inline
-
-: A-v-conversion-op ( v1 to-type quot -- v2 )
-    swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline
-
-: A-vv-conversion-op ( v1 v2 to-type quot -- v2 )
-    swap {
-        [ underlying>> ]
-        [ underlying>> A-rep ]
-        [ call ]
-        [ '[ _ boa ] call( u -- v ) ]
-    } spread ; inline
-
-simd new
-    \ A >>class
-    \ A-with >>ctor
-    \ A-rep >>rep
-    {
-        { (v>float) A-v-conversion-op }
-        { (v>integer) A-v-conversion-op }
-        { (vpack-signed) A-vv-conversion-op }
-        { (vpack-unsigned) A-vv-conversion-op }
-        { (vunpack-head) A-v-conversion-op }
-        { (vunpack-tail) A-v-conversion-op }
-    } >>special-wrappers
-    {
-        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
-        { { +vector+ +any-vector+ -> +vector+ } A-vv->v-op }
-        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
-        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
-        { { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
-        { { +vector+ +vector+ -> +boolean+ } A-vv->n-op }
-        { { +vector+ -> +vector+ } A-v->v-op }
-        { { +vector+ -> +scalar+ } A-v->n-op }
-        { { +vector+ -> +boolean+ } A-v->n-op }
-        { { +vector+ -> +nonnegative+ } A-v->n-op }
-    } >>schema-wrappers
-(define-simd-128)
-
-PRIVATE>
-
-;FUNCTOR
-
-! Synthesize 256-bit vectors from a pair of 128-bit vectors
-SLOT: underlying1
-SLOT: underlying2
-
-:: define-simd-256-type ( class rep -- )
-    c:<c-type>
-        class >>class
-        class >>boxed-class
-        [
-            [ rep alien-vector ]
-            [ 16 + >fixnum rep alien-vector ] 2bi
-            class boa
-        ] >>getter
-        [
-            [ [ underlying1>> ] 2dip rep set-alien-vector ]
-            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
-            3bi
-        ] >>setter
-        32 >>size
-        8 >>align
-        rep >>rep
-    class c:typedef ;
-
-: (define-simd-256) ( simd -- )
-    simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
-    [ define-simd ]
-    [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
-
-FUNCTOR: define-simd-256 ( T -- )
-
-N            [ 32 T c:heap-size /i ]
-
-N/2          [ N 2 /i ]
-A/2          IS ${T}-${N/2}
-A/2-boa      IS ${A/2}-boa
-A/2-with     IS ${A/2}-with
-
-A            DEFINES-CLASS ${T}-${N}
-A-boa        DEFINES ${A}-boa
-A-with       DEFINES ${A}-with
-A-cast       DEFINES ${A}-cast
->A           DEFINES >${A}
-A{           DEFINES ${A}{
-
-A-deref      DEFINES-PRIVATE ${A}-deref
-
-A-rep        [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
-A-vv->v-op   DEFINES-PRIVATE ${A}-vv->v-op
-A-vn->v-op   DEFINES-PRIVATE ${A}-vn->v-op
-A-v->v-op    DEFINES-PRIVATE ${A}-v->v-op
-A-v.-op      DEFINES-PRIVATE ${A}-v.-op
-(A-v->n-op)  DEFINES-PRIVATE (${A}-v->v-op)
-A-sum-op     DEFINES-PRIVATE ${A}-sum-op
-A-vany-op    DEFINES-PRIVATE ${A}-vany-op
-A-vall-op    DEFINES-PRIVATE ${A}-vall-op
-A-vmerge-head-op    DEFINES-PRIVATE ${A}-vmerge-head-op
-A-vmerge-tail-op    DEFINES-PRIVATE ${A}-vmerge-tail-op
-A-v-conversion-op   DEFINES-PRIVATE ${A}-v-conversion-op
-A-vpack-op          DEFINES-PRIVATE ${A}-vpack-op
-A-vunpack-head-op   DEFINES-PRIVATE ${A}-vunpack-head-op
-A-vunpack-tail-op   DEFINES-PRIVATE ${A}-vunpack-tail-op
-
-WHERE
-
-SLOT: underlying1
-SLOT: underlying2
-
-TUPLE: A
-{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
-{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
-
-INSTANCE: A simd-256
-
-M: A clone
-    [ underlying1>> clone ] [ underlying2>> clone ] bi
-    \ A boa ; inline
-
-M: A length drop N ; inline
-
-M: A equal?
-    over \ A instance? [ v= vall? ] [ 2drop f ] if ;
-
-: A-deref ( n seq -- n' seq' )
-    over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline
-
-M: A nth-unsafe A-deref nth-unsafe ; inline
-
-M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
-
-: >A ( seq -- simd-array ) \ A new clone-like ;
-
-M: A like drop dup \ A instance? [ >A ] unless ; inline
-
-M: A new-sequence
-    drop dup N =
-    [ drop 16 <byte-array> 16 <byte-array> \ A boa ]
-    [ N bad-length ]
-    if ; inline
-
-M: A c:byte-length drop 32 ; inline
-
-M: A element-type drop A-rep rep-component-type ;
-
-SYNTAX: A{ \ } [ >A ] parse-literal ;
-
-M: A pprint-delims drop \ A{ \ } ;
-
-M: A >pprint-sequence ;
-
-M: A pprint* pprint-object ;
-
-: A-with ( x -- simd-array )
-    [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
-    \ A boa ; inline
-
-: A-boa ( ... -- simd-array )
-    [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
-    \ A boa ; inline
-
-\ A-rep 2 boa-effect \ A-boa set-stack-effect
-
-: A-cast ( simd-array -- simd-array' )
-    [ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline
-
-INSTANCE: A sequence
-
-: A-vv->v-op ( v1 v2 quot -- v3 )
-    [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
-    [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
-    \ A boa ; inline
-
-: A-vn->v-op ( v1 v2 quot -- v3 )
-    [ [ [ underlying1>> ] dip A-rep ] dip call ]
-    [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
-    \ A boa ; inline
-
-: A-v->v-op ( v1 combine-quot -- v2 )
-    [ [ underlying1>> A-rep ] dip call ]
-    [ [ underlying2>> A-rep ] dip call ] 2bi
-    \ A boa ; inline
-
-: A-v.-op ( v1 v2 quot -- n )
-    [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
-    [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
-    + ; inline
-
-: (A-v->n-op) ( v1 quot reduce-quot -- n )
-    '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline
-
-: A-sum-op ( v1 quot -- n )
-    [ (simd-v+) ] (A-v->n-op) ; inline
-
-: A-vany-op ( v1 quot -- n )
-    [ (simd-vbitor) ] (A-v->n-op) ; inline
-: A-vall-op ( v1 quot -- n )
-    [ (simd-vbitand) ] (A-v->n-op) ; inline
-
-: A-vmerge-head-op ( v1 v2 quot -- v )
-    drop
-    [ underlying1>> ] bi@
-    [ A-rep (simd-(vmerge-head)) ]
-    [ A-rep (simd-(vmerge-tail)) ] 2bi
-    \ A boa ; inline
-    
-: A-vmerge-tail-op ( v1 v2 quot -- v )
-    drop
-    [ underlying2>> ] bi@
-    [ A-rep (simd-(vmerge-head)) ]
-    [ A-rep (simd-(vmerge-tail)) ] 2bi
-    \ A boa ; inline
-
-: A-v-conversion-op ( v1 to-type quot -- v )
-    swap [ 
-        [ [ underlying1>> A-rep ] dip call ]
-        [ [ underlying2>> A-rep ] dip call ] 2bi
-    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
-
-: A-vpack-op ( v1 v2 to-type quot -- v )
-    swap [ 
-        '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi*
-    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
-
-: A-vunpack-head-op ( v1 to-type quot -- v )
-    '[
-        underlying1>>
-        [ A-rep @ ]
-        [ A-rep (simd-(vunpack-tail)) ] bi
-    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
-
-: A-vunpack-tail-op ( v1 to-type quot -- v )
-    '[
-        underlying2>>
-        [ A-rep (simd-(vunpack-head)) ]
-        [ A-rep @ ] bi
-    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
-
-simd new
-    \ A >>class
-    \ A-with >>ctor
-    \ A-rep >>rep
-    {
-        { v.     A-v.-op   }
-        { sum    A-sum-op  }
-        { vnone? A-vany-op }
-        { vany?  A-vany-op }
-        { vall?  A-vall-op }
-        { (vmerge-head) A-vmerge-head-op }
-        { (vmerge-tail) A-vmerge-tail-op }
-        { (v>integer) A-v-conversion-op }
-        { (v>float) A-v-conversion-op }
-        { (vpack-signed) A-vpack-op }
-        { (vpack-unsigned) A-vpack-op }
-        { (vunpack-head) A-vunpack-head-op }
-        { (vunpack-tail) A-vunpack-tail-op }
-    } >>special-wrappers
-    {
-        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
-        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
-        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
-        { { +vector+ -> +vector+ } A-v->v-op }
-    } >>schema-wrappers
-(define-simd-256)
-
-;FUNCTOR
diff --git a/basis/math/vectors/simd/intrinsics/authors.txt b/basis/math/vectors/simd/intrinsics/authors.txt
deleted file mode 100644
index d4f5d6b3ae..0000000000
--- a/basis/math/vectors/simd/intrinsics/authors.txt
+++ /dev/null
@@ -1 +0,0 @@
-Slava Pestov
\ No newline at end of file
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor
deleted file mode 100644
index 84eee935a0..0000000000
--- a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor
+++ /dev/null
@@ -1,18 +0,0 @@
-IN: math.vectors.simd.intrinsics.tests
-USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
-
-[ 16 ] [ uchar-16-rep rep-components ] unit-test
-[ 16 ] [ char-16-rep rep-components ] unit-test
-[ 8 ] [ ushort-8-rep rep-components ] unit-test
-[ 8 ] [ short-8-rep rep-components ] unit-test
-[ 4 ] [ uint-4-rep rep-components ] unit-test
-[ 4 ] [ int-4-rep rep-components ] unit-test
-[ 4 ] [ float-4-rep rep-components ] unit-test
-[ 2 ] [ double-2-rep rep-components ] unit-test
-
-{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
-{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
-{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
-{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as
-
-
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
deleted file mode 100644
index 003b42fe83..0000000000
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ /dev/null
@@ -1,207 +0,0 @@
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien alien.c-types alien.data assocs combinators
-cpu.architecture compiler.cfg.comparisons fry generalizations
-kernel libc macros math
-math.vectors.conversion.backend
-sequences sets effects accessors namespaces
-lexer parser vocabs.parser words arrays math.vectors ;
-IN: math.vectors.simd.intrinsics
-
-ERROR: bad-simd-call word ;
-
-<<
-
-: simd-effect ( word -- effect )
-    stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
-: simd-conversion-effect ( word -- effect )
-    stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi <effect> ;
-
-SYMBOL: simd-ops
-
-V{ } clone simd-ops set-global
-
-: (SIMD-OP:) ( accum quot -- accum )
-    [
-        scan-word dup name>> "(simd-" ")" surround create-in
-        [ nip dup '[ _ bad-simd-call ] define ]
-    ] dip
-    '[ _ dip set-stack-effect ]
-    [ 2array simd-ops get push ]
-    2tri ; inline
-
-SYNTAX: SIMD-OP:
-    [ simd-effect ] (SIMD-OP:) ;
-
-SYNTAX: SIMD-CONVERSION-OP:
-    [ simd-conversion-effect ] (SIMD-OP:) ;
-
->>
-
-SIMD-OP: v+
-SIMD-OP: v-
-SIMD-OP: vneg
-SIMD-OP: v+-
-SIMD-OP: vs+
-SIMD-OP: vs-
-SIMD-OP: vs*
-SIMD-OP: v*
-SIMD-OP: v/
-SIMD-OP: vmin
-SIMD-OP: vmax
-SIMD-OP: v.
-SIMD-OP: vsqrt
-SIMD-OP: sum
-SIMD-OP: vabs
-SIMD-OP: vbitand
-SIMD-OP: vbitandn
-SIMD-OP: vbitor
-SIMD-OP: vbitxor
-SIMD-OP: vbitnot
-SIMD-OP: vand
-SIMD-OP: vandn
-SIMD-OP: vor
-SIMD-OP: vxor
-SIMD-OP: vnot
-SIMD-OP: vlshift
-SIMD-OP: vrshift
-SIMD-OP: hlshift
-SIMD-OP: hrshift
-SIMD-OP: vshuffle-elements
-SIMD-OP: vshuffle-bytes
-SIMD-OP: (vmerge-head)
-SIMD-OP: (vmerge-tail)
-SIMD-OP: v<=
-SIMD-OP: v<
-SIMD-OP: v=
-SIMD-OP: v>
-SIMD-OP: v>=
-SIMD-OP: vunordered?
-SIMD-OP: vany?
-SIMD-OP: vall?
-SIMD-OP: vnone?
-
-SIMD-CONVERSION-OP: (v>float)
-SIMD-CONVERSION-OP: (v>integer)
-SIMD-CONVERSION-OP: (vpack-signed)
-SIMD-CONVERSION-OP: (vpack-unsigned)
-SIMD-CONVERSION-OP: (vunpack-head)
-SIMD-CONVERSION-OP: (vunpack-tail)
-
-: (simd-with) ( x rep -- v ) bad-simd-call ;
-: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
-: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
-: (simd-select) ( v n rep -- x ) bad-simd-call ;
-
-: assert-positive ( x -- y ) ;
-
-: alien-vector ( c-ptr n rep -- value )
-    ! Inefficient version for when intrinsics are missing
-    [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
-
-: set-alien-vector ( value c-ptr n rep -- )
-    ! Inefficient version for when intrinsics are missing
-    [ swap <displaced-alien> swap ] dip rep-size memcpy ;
-
-<<
-
-: rep-components ( rep -- n )
-    16 swap rep-component-type heap-size /i ; foldable
-
-: rep-coercer ( rep -- quot )
-    {
-        { [ dup int-vector-rep? ] [ [ >fixnum ] ] }
-        { [ dup float-vector-rep? ] [ [ >float ] ] }
-    } cond nip ; foldable
-
-: rep-coerce ( value rep -- value' )
-    rep-coercer call( value -- value' ) ; inline
-
-CONSTANT: rep-gather-words
-    {
-        { 2 (simd-gather-2) }
-        { 4 (simd-gather-4) }
-    }
-
-: rep-gather-word ( rep -- word )
-    rep-components rep-gather-words at ;
-
->>
-
-MACRO: (simd-boa) ( rep -- quot )
-    {
-        [ rep-coercer ]
-        [ rep-components ]
-        [ ]
-        [ rep-gather-word ]
-    } cleave
-    '[ _ _ napply _ _ execute ] ;
-
-GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
-
-: (%unpack-reps) ( -- reps )
-    %merge-vector-reps [ int-vector-rep? ] filter
-    %unpack-vector-head-reps union ;
-
-: (%abs-reps) ( -- reps )
-    cc> %compare-vector-reps [ int-vector-rep? ] filter
-    %xor-vector-reps [ float-vector-rep? ] filter
-    union
-    [ { } ] [ { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ] if-empty ;
-
-: (%shuffle-imm-reps) ( -- reps )
-    %shuffle-vector-reps %shuffle-vector-imm-reps union ;
-
-M: vector-rep supported-simd-op?
-    {
-        { \ (simd-v+)            [ %add-vector-reps            ] }
-        { \ (simd-vs+)           [ %saturated-add-vector-reps  ] }
-        { \ (simd-v+-)           [ %add-sub-vector-reps        ] }
-        { \ (simd-v-)            [ %sub-vector-reps            ] }
-        { \ (simd-vs-)           [ %saturated-sub-vector-reps  ] }
-        { \ (simd-vneg)          [ %sub-vector-reps            ] }
-        { \ (simd-v*)            [ %mul-vector-reps            ] }
-        { \ (simd-vs*)           [ %saturated-mul-vector-reps  ] }
-        { \ (simd-v/)            [ %div-vector-reps            ] }
-        { \ (simd-vmin)          [ %min-vector-reps cc< %compare-vector-reps union ] }
-        { \ (simd-vmax)          [ %max-vector-reps cc> %compare-vector-reps union ] }
-        { \ (simd-v.)            [ %dot-vector-reps            ] }
-        { \ (simd-vsqrt)         [ %sqrt-vector-reps           ] }
-        { \ (simd-sum)           [ %horizontal-add-vector-reps ] }
-        { \ (simd-vabs)          [ (%abs-reps)                 ] }
-        { \ (simd-vbitand)       [ %and-vector-reps            ] }
-        { \ (simd-vbitandn)      [ %andn-vector-reps           ] }
-        { \ (simd-vbitor)        [ %or-vector-reps             ] }
-        { \ (simd-vbitxor)       [ %xor-vector-reps            ] }
-        { \ (simd-vbitnot)       [ %xor-vector-reps            ] }
-        { \ (simd-vand)          [ %and-vector-reps            ] }
-        { \ (simd-vandn)         [ %andn-vector-reps           ] }
-        { \ (simd-vor)           [ %or-vector-reps             ] }
-        { \ (simd-vxor)          [ %xor-vector-reps            ] }
-        { \ (simd-vnot)          [ %xor-vector-reps            ] }
-        { \ (simd-vlshift)       [ %shl-vector-reps            ] }
-        { \ (simd-vrshift)       [ %shr-vector-reps            ] }
-        { \ (simd-hlshift)       [ %horizontal-shl-vector-imm-reps ] }
-        { \ (simd-hrshift)       [ %horizontal-shr-vector-imm-reps ] }
-        { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps)         ] }
-        { \ (simd-vshuffle-bytes)    [ %shuffle-vector-reps        ] }
-        { \ (simd-(vmerge-head)) [ %merge-vector-reps          ] }
-        { \ (simd-(vmerge-tail)) [ %merge-vector-reps          ] }
-        { \ (simd-(v>float))        [ %integer>float-vector-reps ] }
-        { \ (simd-(v>integer))      [ %float>integer-vector-reps ] }
-        { \ (simd-(vpack-signed))   [ %signed-pack-vector-reps   ] }
-        { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
-        { \ (simd-(vunpack-head))   [ (%unpack-reps)             ] }
-        { \ (simd-(vunpack-tail))   [ (%unpack-reps)             ] }
-        { \ (simd-v<=)           [ unsign-rep cc<= %compare-vector-reps   ] }
-        { \ (simd-v<)            [ unsign-rep cc< %compare-vector-reps    ] }
-        { \ (simd-v=)            [ unsign-rep cc= %compare-vector-reps    ] }
-        { \ (simd-v>)            [ unsign-rep cc> %compare-vector-reps    ] }
-        { \ (simd-v>=)           [ unsign-rep cc>= %compare-vector-reps   ] }
-        { \ (simd-vunordered?)   [ unsign-rep cc/<>= %compare-vector-reps ] }
-        { \ (simd-gather-2)      [ %gather-vector-2-reps       ] }
-        { \ (simd-gather-4)      [ %gather-vector-4-reps       ] }
-        { \ (simd-vany?)         [ %test-vector-reps           ] }
-        { \ (simd-vall?)         [ %test-vector-reps           ] }
-        { \ (simd-vnone?)        [ %test-vector-reps           ] }
-    } case member? ;
diff --git a/basis/math/vectors/specialization/specialization-tests.factor b/basis/math/vectors/specialization/specialization-tests.factor
deleted file mode 100644
index f4d4fd93e8..0000000000
--- a/basis/math/vectors/specialization/specialization-tests.factor
+++ /dev/null
@@ -1,28 +0,0 @@
-IN: math.vectors.specialization.tests
-USING: compiler.tree.debugger math.vectors tools.test kernel
-kernel.private math specialized-arrays ;
-QUALIFIED-WITH: alien.c-types c
-QUALIFIED-WITH: alien.complex c
-SPECIALIZED-ARRAY: c:double
-SPECIALIZED-ARRAY: c:complex-float
-SPECIALIZED-ARRAY: c:float
-
-[ V{ t } ] [
-    [ { double-array double-array } declare distance 0.0 < not ] final-literals
-] unit-test
-
-[ V{ float } ] [
-    [ { float-array float } declare v*n norm ] final-classes
-] unit-test
-
-[ V{ complex } ] [
-    [ { complex-float-array complex-float-array } declare v. ] final-classes
-] unit-test
-
-[ V{ float } ] [
-    [ { float-array float } declare v*n norm ] final-classes
-] unit-test
-
-[ V{ float } ] [
-    [ { complex-float-array complex } declare v*n norm ] final-classes
-] unit-test
\ No newline at end of file
diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor
deleted file mode 100644
index 602fd9802c..0000000000
--- a/basis/math/vectors/specialization/specialization.factor
+++ /dev/null
@@ -1,207 +0,0 @@
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: words kernel make sequences effects sets kernel.private
-accessors combinators math math.intervals math.vectors
-math.vectors.conversion.backend namespaces assocs fry splitting
-classes.algebra generalizations locals
-compiler.tree.propagation.info ;
-IN: math.vectors.specialization
-
-SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
-
-: parent-vector-class ( type -- type' )
-    {
-        { [ dup simd-128 class<= ] [ drop simd-128 ] }
-        { [ dup simd-256 class<= ] [ drop simd-256 ] }
-        [ "Not a vector class" throw ]
-    } cond ;
-
-: signature-for-schema ( array-type elt-type schema -- signature )
-    [
-        {
-            { +vector+ [ drop ] }
-            { +any-vector+ [ drop parent-vector-class ] }
-            { +scalar+ [ nip ] }
-            { +boolean+ [ 2drop boolean ] }
-            { +nonnegative+ [ nip ] }
-            { +literal+ [ 2drop f ] }
-        } case
-    ] with with map ;
-
-: (specialize-vector-word) ( word array-type elt-type schema -- word' )
-    signature-for-schema
-    [ [ name>> ] [ [ name>> ] map "," join ] bi* "=>" glue f <word> ]
-    [ [ , \ declare , def>> % ] [ ] make ]
-    [ drop stack-effect ]
-    2tri
-    [ define-declared ] [ 2drop ] 3bi ;
-
-: output-infos ( array-type elt-type schema -- value-infos )
-    [
-        {
-            { +vector+ [ drop <class-info> ] }
-            { +any-vector+ [ drop parent-vector-class <class-info> ] }
-            { +scalar+ [ nip <class-info> ] }
-            { +boolean+ [ 2drop boolean <class-info> ] }
-            {
-                +nonnegative+
-                [
-                    nip
-                    dup complex class<= [ drop float ] when
-                    [0,inf] <class/interval-info>
-                ]
-            }
-        } case
-    ] with with map ;
-
-: record-output-signature ( word array-type elt-type schema -- word )
-    output-infos
-    [ drop ]
-    [ drop ]
-    [ [ stack-effect in>> length '[ _ ndrop ] ] dip append ] 2tri
-    "outputs" set-word-prop ;
-
-CONSTANT: vector-words
-H{
-    { [v-] { +vector+ +vector+ -> +vector+ } }
-    { distance { +vector+ +vector+ -> +nonnegative+ } }
-    { n*v { +scalar+ +vector+ -> +vector+ } }
-    { n+v { +scalar+ +vector+ -> +vector+ } }
-    { n-v { +scalar+ +vector+ -> +vector+ } }
-    { n/v { +scalar+ +vector+ -> +vector+ } }
-    { norm { +vector+ -> +nonnegative+ } }
-    { norm-sq { +vector+ -> +nonnegative+ } }
-    { normalize { +vector+ -> +vector+ } }
-    { v* { +vector+ +vector+ -> +vector+ } }
-    { vs* { +vector+ +vector+ -> +vector+ } }
-    { v*n { +vector+ +scalar+ -> +vector+ } }
-    { v+ { +vector+ +vector+ -> +vector+ } }
-    { vs+ { +vector+ +vector+ -> +vector+ } }
-    { v+- { +vector+ +vector+ -> +vector+ } }
-    { v+n { +vector+ +scalar+ -> +vector+ } }
-    { v- { +vector+ +vector+ -> +vector+ } }
-    { vneg { +vector+ -> +vector+ } }
-    { vs- { +vector+ +vector+ -> +vector+ } }
-    { v-n { +vector+ +scalar+ -> +vector+ } }
-    { v. { +vector+ +vector+ -> +scalar+ } }
-    { v/ { +vector+ +vector+ -> +vector+ } }
-    { v/n { +vector+ +scalar+ -> +vector+ } }
-    { vceiling { +vector+ -> +vector+ } }
-    { vfloor { +vector+ -> +vector+ } }
-    { vmax { +vector+ +vector+ -> +vector+ } }
-    { vmin { +vector+ +vector+ -> +vector+ } }
-    { vneg { +vector+ -> +vector+ } }
-    { vtruncate { +vector+ -> +vector+ } }
-    { sum { +vector+ -> +scalar+ } }
-    { vabs { +vector+ -> +vector+ } }
-    { vsqrt { +vector+ -> +vector+ } }
-    { vbitand { +vector+ +vector+ -> +vector+ } }
-    { vbitandn { +vector+ +vector+ -> +vector+ } }
-    { vbitor { +vector+ +vector+ -> +vector+ } }
-    { vbitxor { +vector+ +vector+ -> +vector+ } }
-    { vbitnot { +vector+ -> +vector+ } }
-    { vand { +vector+ +vector+ -> +vector+ } }
-    { vandn { +vector+ +vector+ -> +vector+ } }
-    { vor { +vector+ +vector+ -> +vector+ } }
-    { vxor { +vector+ +vector+ -> +vector+ } }
-    { vnot { +vector+ -> +vector+ } }
-    { vlshift { +vector+ +scalar+ -> +vector+ } }
-    { vrshift { +vector+ +scalar+ -> +vector+ } }
-    { hlshift { +vector+ +literal+ -> +vector+ } }
-    { hrshift { +vector+ +literal+ -> +vector+ } }
-    { vshuffle-elements { +vector+ +literal+ -> +vector+ } }
-    { vshuffle-bytes    { +vector+ +any-vector+  -> +vector+ } }
-    { vbroadcast { +vector+ +literal+ -> +vector+ } }
-    { (vmerge-head) { +vector+ +vector+ -> +vector+ } }
-    { (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
-    { (v>float) { +vector+ +literal+ -> +vector+ } }
-    { (v>integer) { +vector+ +literal+ -> +vector+ } }
-    { (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } }
-    { (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } }
-    { (vunpack-head) { +vector+ +literal+ -> +vector+ } }
-    { (vunpack-tail) { +vector+ +literal+ -> +vector+ } }
-    { v<= { +vector+ +vector+ -> +vector+ } }
-    { v< { +vector+ +vector+ -> +vector+ } }
-    { v= { +vector+ +vector+ -> +vector+ } }
-    { v> { +vector+ +vector+ -> +vector+ } }
-    { v>= { +vector+ +vector+ -> +vector+ } }
-    { vunordered? { +vector+ +vector+ -> +vector+ } }
-    { vany?  { +vector+ -> +boolean+ } }
-    { vall?  { +vector+ -> +boolean+ } }
-    { vnone? { +vector+ -> +boolean+ } }
-}
-
-PREDICATE: vector-word < word vector-words key? ;
-
-: specializations ( word -- assoc )
-    dup "specializations" word-prop
-    [ ] [ V{ } clone [ "specializations" set-word-prop ] keep ] ?if ;
-
-M: vector-word subwords specializations values [ word? ] filter ;
-
-: add-specialization ( new-word signature word -- )
-    specializations set-at ;
-
-ERROR: bad-vector-word word ;
-
-: word-schema ( word -- schema )
-    vector-words ?at [ bad-vector-word ] unless ;
-
-: inputs ( schema -- seq ) { -> } split first ;
-
-: outputs ( schema -- seq ) { -> } split second ;
-
-: loop-vector-op ( word array-type elt-type -- word' )
-    pick word-schema
-    [ inputs (specialize-vector-word) ]
-    [ outputs record-output-signature ] 3bi ;
-
-:: specialize-vector-word ( word array-type elt-type simd -- word/quot' )
-    word simd key? [ word simd at ] [ word array-type elt-type loop-vector-op ] if ;
-
-:: input-signature ( word array-type elt-type -- signature )
-    array-type elt-type word word-schema inputs signature-for-schema ;
-
-: vector-words-for-type ( elt-type -- words )
-    {
-        ! Can't do shifts on floats
-        { [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
-        ! Can't divide integers
-        { [ dup integer class<= ] [ vector-words keys { vsqrt n/v v/n v/ normalize } diff ] }
-        ! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
-        { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
-        [ { } ]
-    } cond
-    ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
-    {
-        hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
-        (v>integer) (v>float)
-        (vpack-signed) (vpack-unsigned)
-        (vunpack-head) (vunpack-tail)
-    } diff
-    nip ;
-
-:: specialize-vector-words ( array-type elt-type simd -- )
-    elt-type vector-words-for-type simd keys union [
-        [ array-type elt-type simd specialize-vector-word ]
-        [ array-type elt-type input-signature ]
-        [ ]
-        tri add-specialization
-    ] each ;
-
-: specialization-matches? ( value-infos signature -- ? )
-    [ [ [ class>> ] dip class<= ] [ literal?>> ] if* ] 2all? ;
-
-: find-specialization ( classes word -- word/f )
-    specializations
-    [ first specialization-matches? ] with find
-    swap [ second ] when ;
-
-: vector-word-custom-inlining ( #call -- word/f )
-    [ in-d>> [ value-info ] map ] [ word>> ] bi
-    find-specialization ;
-
-vector-words keys [
-    [ vector-word-custom-inlining ]
-    "custom-inlining" set-word-prop
-] each

From 42493b9778a19797866e32f63e48e44516cd6251 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 2 Nov 2009 15:09:16 -0600
Subject: [PATCH 05/46] update compiler.tree.propagation.simd, and don't load
 it till math.vectors.simd is loaded

---
 .../known-words/known-words.factor            |  3 +--
 .../tree/propagation/simd/simd.factor         | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor
index 5646dca3fb..aa2bc01f9e 100644
--- a/basis/compiler/tree/propagation/known-words/known-words.factor
+++ b/basis/compiler/tree/propagation/known-words/known-words.factor
@@ -16,8 +16,7 @@ compiler.tree.propagation.slots
 compiler.tree.propagation.simple
 compiler.tree.propagation.constraints
 compiler.tree.propagation.call-effect
-compiler.tree.propagation.transforms
-compiler.tree.propagation.simd ;
+compiler.tree.propagation.transforms ;
 FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
 IN: compiler.tree.propagation.known-words
 
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 1637148b88..1eac88598b 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors byte-arrays combinators fry sequences
 compiler.tree.propagation.info cpu.architecture kernel words math
-math.intervals math.vectors.simd.intrinsics ;
+math.intervals math.vectors.simd ;
 IN: compiler.tree.propagation.simd
 
 {
@@ -33,14 +33,14 @@ IN: compiler.tree.propagation.simd
     (simd-hrshift)
     (simd-vshuffle-bytes)
     (simd-vshuffle-elements)
-    (simd-(vmerge-head))
-    (simd-(vmerge-tail))
-    (simd-(v>float))
-    (simd-(v>integer))
-    (simd-(vpack-signed))
-    (simd-(vpack-unsigned))
-    (simd-(vunpack-head))
-    (simd-(vunpack-tail))
+    (simd-vmerge-head)
+    (simd-vmerge-tail)
+    (simd-v>float)
+    (simd-v>integer)
+    (simd-vpack-signed)
+    (simd-vpack-unsigned)
+    (simd-vunpack-head)
+    (simd-vunpack-tail)
     (simd-v<=)
     (simd-v<)
     (simd-v=)
@@ -51,6 +51,7 @@ IN: compiler.tree.propagation.simd
     (simd-gather-2)
     (simd-gather-4)
     alien-vector
+    alien-vector-aligned
 } [ { byte-array } "default-output-classes" set-word-prop ] each
 
 : scalar-output-class ( rep -- class )

From e36eb438fa518997ee605eea11979d68153f4c15 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 2 Nov 2009 15:17:34 -0600
Subject: [PATCH 06/46] move all simd intrinsics to
 compiler.cfg.intrinsics.simd, and only load it when math.vectors.simd is
 loaded

---
 .../compiler/cfg/intrinsics/intrinsics.factor | 59 ------------------
 .../compiler/cfg/intrinsics/simd/simd.factor  | 62 ++++++++++++++++++-
 basis/cpu/x86/x86.factor                      |  1 -
 3 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor
index a03f04f182..632c32b12f 100644
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@@ -151,64 +151,5 @@ IN: compiler.cfg.intrinsics
         { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
     } enable-intrinsics ;
 
-: enable-simd ( -- )
-    {
-        { math.vectors.simd.intrinsics:assert-positive [ drop ] }
-        { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
-        { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
-        { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
-        { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
-        { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
-        { math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
-        { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
-        { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
-        { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
-    } enable-intrinsics ;
-
 : emit-intrinsic ( node word -- )
     "intrinsic" word-prop call( node -- ) ;
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index a8dfaab2dd..bac86e2457 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien byte-arrays fry classes.algebra
 cpu.architecture kernel math sequences math.vectors
-math.vectors.simd.intrinsics macros generalizations combinators
+math.vectors.simd macros generalizations combinators
 combinators.short-circuit arrays locals
 compiler.tree.propagation.info compiler.cfg.builder.blocks
 compiler.cfg.comparisons
@@ -351,3 +351,63 @@ MACRO: if-literals-match ( quots -- )
         [ generate-blend-vector ] 3bi
     ] if ;
 
+: enable-simd ( -- )
+    {
+        { math.vectors.simd:assert-positive [ drop ] }
+        { math.vectors.simd:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
+        { math.vectors.simd:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
+        { math.vectors.simd:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
+        { math.vectors.simd:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
+        { math.vectors.simd:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-gather-2) [ emit-gather-vector-2 ] }
+        { math.vectors.simd:(simd-gather-4) [ emit-gather-vector-4 ] }
+        { math.vectors.simd:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
+        { math.vectors.simd:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
+        { math.vectors.simd:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-v>float) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-v>integer) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vpack-signed) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vpack-unsigned) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd:(simd-vunpack-head) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-vunpack-tail) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
+        { math.vectors.simd:(simd-select) [ emit-select-vector ] }
+        { math.vectors.simd:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd:alien-vector [ emit-alien-vector ] }
+        { math.vectors.simd:set-alien-vector [ emit-set-alien-vector ] }
+    } enable-intrinsics ;
+
+enable-simd
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index b4d4b43e59..53c9c98ed3 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -1370,7 +1370,6 @@ M: x86 immediate-bitwise? ( n -- ? )
     #! set up by the caller.
     stack-frame get total-size>> + stack@ ;
 
-enable-simd
 enable-min/max
 enable-fixnum-log2
 

From d655c3c9cca88d9cd952082c9ae24d7e2bfa8b38 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 3 Nov 2009 21:38:29 -0600
Subject: [PATCH 07/46] make horizontal shift available to float vectors (it'd
 still be faster than the software fallback despite pipeline penalty)

---
 basis/cpu/x86/x86.factor | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index 53c9c98ed3..b0a5dc0897 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -1166,7 +1166,7 @@ M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
 
 M: x86 %horizontal-shl-vector-imm-reps
     {
-        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
     } available-reps ;
 
 M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
@@ -1174,7 +1174,7 @@ M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
 
 M: x86 %horizontal-shr-vector-imm-reps
     {
-        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
     } available-reps ;
 
 M: x86 %abs-vector ( dst src rep -- )

From bd77633d5b3ab8ca1114af0bf2f3a5e7f3fc2f1f Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 3 Nov 2009 21:38:45 -0600
Subject: [PATCH 08/46] new intrinsic generators, pt1

---
 .../compiler/cfg/intrinsics/simd/simd.factor  | 720 +++++++++---------
 1 file changed, 345 insertions(+), 375 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index bac86e2457..208e19ccc3 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -10,78 +10,27 @@ compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
 compiler.cfg.instructions compiler.cfg.registers
 compiler.cfg.intrinsics.alien
 specialized-arrays ;
-FROM: alien.c-types => heap-size uchar ushort uint ulonglong float double ;
-SPECIALIZED-ARRAYS: uchar ushort uint ulonglong float double ;
+FROM: alien.c-types => heap-size char short int longlong float double ;
+SPECIALIZED-ARRAYS: char short int longlong float double ;
 IN: compiler.cfg.intrinsics.simd
 
-MACRO: check-elements ( quots -- )
-    [ length '[ _ firstn ] ]
-    [ '[ _ spread ] ]
-    [ length 1 - \ and <repetition> [ ] like ]
-    tri 3append ;
+! compound vector ops
 
-MACRO: if-literals-match ( quots -- )
-    [ length ] [ ] [ length ] tri
-    ! n quots n
-    '[
-        ! node quot
-        [
-            dup node-input-infos
-            _ tail-slice* [ literal>> ] map
-            dup _ check-elements
-        ] dip
-        swap [
-            ! node literals quot
-            [ _ firstn ] dip call
-            drop
-        ] [ 2drop emit-primitive ] if
-    ] ;
+: ^load-neg-zero-vector ( rep -- dst )
+    {
+        { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
+        { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
+    } case ;
 
-: emit-vector-op ( node quot: ( rep -- ) -- )
-    { [ representation? ] } if-literals-match ; inline
-
-: [binary] ( quot -- quot' )
-    '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline
-
-: emit-binary-vector-op ( node quot -- )
-    [binary] emit-vector-op ; inline
-
-: [unary] ( quot -- quot' )
-    '[ [ ds-drop ds-pop ] dip @ ds-push ] ; inline
-
-: emit-unary-vector-op ( node quot -- )
-    [unary] emit-vector-op ; inline
-
-: [unary/param] ( quot -- quot' )
-    '[ [ -2 inc-d ds-pop ] 2dip @ ds-push ] ; inline
-
-: emit-shift-vector-imm-op ( node quot -- )
-    [unary/param]
-    { [ integer? ] [ representation? ] } if-literals-match ; inline
-
-:: emit-shift-vector-op ( node imm-quot var-quot -- )
-    node node-input-infos 2 tail-slice* first literal>> integer?
-    [ node imm-quot emit-shift-vector-imm-op ]
-    [ node var-quot emit-binary-vector-op ] if ; inline
-
-: emit-gather-vector-2 ( node -- )
-    [ ^^gather-vector-2 ] emit-binary-vector-op ;
-
-: emit-gather-vector-4 ( node -- )
-    [
-        ds-drop
-        [
-            D 3 peek-loc
-            D 2 peek-loc
-            D 1 peek-loc
-            D 0 peek-loc
-            -4 inc-d
-        ] dip
-        ^^gather-vector-4
-        ds-push
-    ] emit-vector-op ;
-
-: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
+: ^load-add-sub-vector ( rep -- dst )
+    unsign-rep {
+        { float-4-rep    [ float-array{ -0.0  0.0 -0.0  0.0 } underlying>> ^^load-constant ] }
+        { double-2-rep   [ double-array{ -0.0  0.0 } underlying>> ^^load-constant ] }
+        { char-16-rep    [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
+        { short-8-rep    [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
+        { int-4-rep      [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
+        { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
+    } case ;
 
 : >variable-shuffle ( shuffle rep -- shuffle' )
     rep-component-type heap-size
@@ -89,325 +38,346 @@ MACRO: if-literals-match ( quots -- )
     [ iota >byte-array ] bi
     '[ _ n*v _ v+ ] map concat ;
 
-: generate-shuffle-vector-imm ( src shuffle rep -- dst )
-    dup %shuffle-vector-imm-reps member?
-    [ ^^shuffle-vector-imm ]
-    [
-        [ >variable-shuffle ^^load-constant ] keep
-        ^^shuffle-vector
-    ] if ;
+: ^load-immediate-shuffle ( shuffle rep -- dst )
+    >variable-shuffle ^^load-constant ;
 
-: emit-shuffle-vector-imm ( node -- )
-    ! Pad the permutation with zeroes if it's too short, since we
-    ! can't throw an error at this point.
-    [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
-    { [ shuffle? ] [ representation? ] } if-literals-match ;
-
-: emit-shuffle-vector-var ( node -- )
-    [ ^^shuffle-vector ] [binary]
-    { [ %shuffle-vector-reps member? ] } if-literals-match ;
-
-: emit-shuffle-vector ( node -- )
-    dup node-input-infos {
-        [ length 3 = ]
-        [ first  class>> byte-array class<= ]
-        [ second class>> byte-array class<= ]
-        [ third  literal>> representation?  ]
-    } 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
-
-: ^^broadcast-vector ( src n rep -- dst )
-    [ rep-components swap <array> ] keep
-    generate-shuffle-vector-imm ;
-
-: emit-broadcast-vector ( node -- )
-    [ ^^broadcast-vector ] [unary/param]
-    { [ integer? ] [ representation? ] } if-literals-match ;
-
-: ^^with-vector ( src rep -- dst )
-    [ ^^scalar>vector ] keep [ 0 ] dip ^^broadcast-vector ;
-
-: ^^select-vector ( src n rep -- dst )
-    [ ^^broadcast-vector ] keep ^^vector>scalar ;
-
-: emit-select-vector ( node -- )
-    [ ^^select-vector ] [unary/param]
-    { [ integer? ] [ representation? ] } if-literals-match ; inline
-
-: emit-alien-vector-op ( node quot: ( rep -- ) -- )
-    { [ %alien-vector-reps member? ] } if-literals-match ; inline
-
-: emit-alien-vector ( node -- )
-    dup [
-        '[
-            ds-drop prepare-alien-getter
-            _ ^^alien-vector ds-push
-        ]
-        [ inline-alien-getter? ] inline-alien
-    ] with emit-alien-vector-op ;
-
-: emit-set-alien-vector ( node -- )
-    dup [
-        '[
-            ds-drop prepare-alien-setter ds-pop
-            _ ##set-alien-vector
-        ]
-        [ byte-array inline-alien-setter? ]
-        inline-alien
-    ] with emit-alien-vector-op ;
-
-: generate-not-vector ( src rep -- dst )
-    dup %not-vector-reps member?
-    [ ^^not-vector ]
-    [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
-
-:: ((generate-compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
-    {cc,swap} first2 :> ( cc swap? )
-    swap?
-    [ src2 src1 rep cc ^^compare-vector ]
-    [ src1 src2 rep cc ^^compare-vector ] if ;
-
-:: (generate-compare-vector) ( src1 src2 rep orig-cc -- dst )
-    rep orig-cc %compare-vector-ccs :> ( ccs not? )
-
-    ccs empty?
-    [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
-    [
-        ccs unclip :> ( rest-ccs first-cc )
-        src1 src2 rep first-cc ((generate-compare-vector)) :> first-dst
-
-        rest-ccs first-dst
-        [ [ src1 src2 rep ] dip ((generate-compare-vector)) rep ^^or-vector ]
-        reduce
-
-        not? [ rep generate-not-vector ] when
-    ] if ;
-
-: sign-bit-mask ( rep -- byte-array )
-    unsign-rep {
-        { char-16-rep [ uchar-array{
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-        } underlying>> ] }
-        { short-8-rep [ ushort-array{
-            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
-            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
-        } underlying>> ] }
-        { int-4-rep [ uint-array{
-            HEX: 8000,0000 HEX: 8000,0000
-            HEX: 8000,0000 HEX: 8000,0000
-        } underlying>> ] }
-        { longlong-2-rep [ ulonglong-array{
-            HEX: 8000,0000,0000,0000
-            HEX: 8000,0000,0000,0000
-        } underlying>> ] }
-    } case ;
-
-:: (generate-minmax-compare-vector) ( src1 src2 rep orig-cc -- dst )
-    orig-cc order-cc {
-        { cc<  [ src1 src2 rep ^^max-vector src1 rep cc/= (generate-compare-vector) ] }
-        { cc<= [ src1 src2 rep ^^min-vector src1 rep cc=  (generate-compare-vector) ] }
-        { cc>  [ src1 src2 rep ^^min-vector src1 rep cc/= (generate-compare-vector) ] }
-        { cc>= [ src1 src2 rep ^^max-vector src1 rep cc=  (generate-compare-vector) ] }
-    } case ;
-
-:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
-    {
-        {
-            [ rep orig-cc %compare-vector-reps member? ]
-            [ src1 src2 rep orig-cc (generate-compare-vector) ]
-        }
-        {
-            [ rep %min-vector-reps member? ]
-            [ src1 src2 rep orig-cc (generate-minmax-compare-vector) ]
-        }
-        {
-            [ rep unsign-rep orig-cc %compare-vector-reps member? ]
-            [ 
-                rep sign-bit-mask ^^load-constant :> sign-bits
-                src1 sign-bits rep ^^xor-vector
-                src2 sign-bits rep ^^xor-vector
-                rep unsign-rep orig-cc (generate-compare-vector)
-            ]
-        }
-    } cond ;
-
-:: generate-unpack-vector-head ( src rep -- dst )
-    {
-        {
-            [ rep %unpack-vector-head-reps member? ]
-            [ src rep ^^unpack-vector-head ]
-        }
-        {
-            [ rep unsigned-int-vector-rep? ]
-            [
-                rep ^^zero-vector :> zero
-                src zero rep ^^merge-vector-head
-            ]
-        }
-        {
-            [ rep widen-vector-rep %shr-vector-imm-reps member? ]
-            [
-                src src rep ^^merge-vector-head
-                rep rep-component-type
-                heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
-            ]
-        }
-        [
-            rep ^^zero-vector :> zero
-            zero src rep cc> ^^compare-vector :> sign
-            src sign rep ^^merge-vector-head
-        ] 
-    } cond ;
-
-:: generate-unpack-vector-tail ( src rep -- dst )
-    {
-        {
-            [ rep %unpack-vector-tail-reps member? ]
-            [ src rep ^^unpack-vector-tail ]
-        }
-        {
-            [ rep %unpack-vector-head-reps member? ]
-            [
-                src rep ^^tail>head-vector :> tail
-                tail rep ^^unpack-vector-head
-            ]
-        }
-        {
-            [ rep unsigned-int-vector-rep? ]
-            [
-                rep ^^zero-vector :> zero
-                src zero rep ^^merge-vector-tail
-            ]
-        }
-        {
-            [ rep widen-vector-rep %shr-vector-imm-reps member? ]
-            [
-                src src rep ^^merge-vector-tail
-                rep rep-component-type
-                heap-size 8 * rep widen-vector-rep ^^shr-vector-imm
-            ]
-        }
-        [
-            rep ^^zero-vector :> zero
-            zero src rep cc> ^^compare-vector :> sign
-            src sign rep ^^merge-vector-tail
-        ] 
-    } cond ;
-
-:: generate-load-neg-zero-vector ( rep -- dst )
-    rep {
-        { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
-        { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
-        [ drop rep ^^zero-vector ]
-    } case ;
-
-:: generate-neg-vector ( src rep -- dst )
-    rep generate-load-neg-zero-vector
-    src rep ^^sub-vector ;
-
-:: generate-blend-vector ( mask true false rep -- dst )
-    mask true rep ^^and-vector
+:: ^blend-vector ( mask true false rep -- dst )
+    true mask rep ^^and-vector
     mask false rep ^^andn-vector
     rep ^^or-vector ;
 
-:: generate-abs-vector ( src rep -- dst )
+: ^compare-vector ( src1 src2 rep cc -- dst )
+    ... ;
+
+: ^widened-shr-vector-imm ( src shift rep -- dst )
+    widen-vector-rep ^^shr-vector-imm ;
+
+! intrinsic emitters
+
+: emit-simd-v+ ( node -- )
     {
-        {
-            [ rep unsigned-int-vector-rep? ]
-            [ src ]
-        }
-        {
-            [ rep %abs-vector-reps member? ]
-            [ src rep ^^abs-vector ]
-        }
-        {
-            [ rep float-vector-rep? ]
-            [
-                rep generate-load-neg-zero-vector
-                src rep ^^andn-vector
-            ]
-        }
-        [ 
+        [ ^^add-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v- ( node -- )
+    {
+        [ ^^sub-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vneg ( node -- )
+    {
+        { float-vector-rep [ [ ^load-neg-zero-vector ] [ ^^sub-vector ] bi ] }
+        { int-vector-rep   [ [ ^^zero-vector         ] [ ^^sub-vector ] bi ] }
+    } emit-v-vector-op ;
+
+: emit-simd-v+- ( node -- )
+    {
+        [ ^^add-sub-vector ]
+        { float-vector-rep [| src1 src2 rep |
+            rep ^load-add-sub-vector :> signs
+            src2 signs rep ^^xor-vector :> src2'
+            src1 src2' rep ^^add-vector
+        ] }
+        { int-vector-rep   [| src1 src2 rep |
+            rep ^load-add-sub-vector :> signs
+            src2  signs rep ^^xor-vector :> src2'
+            src2' signs rep ^^sub-vector :> src2''
+            src1 src2'' rep ^^add-vector
+        ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-vs+ ( node -- )
+    {
+        { float-vector-rep [ ^^add-vector ] }
+        { int-vector-rep [ ^^saturated-add-vector ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-vs- ( node -- )
+    {
+        { float-vector-rep [ ^^sub-vector ] }
+        { int-vector-rep [ ^^saturated-sub-vector ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-vs* ( node -- )
+    {
+        { float-vector-rep [ ^^mul-vector ] }
+        { int-vector-rep [ ^^saturated-mul-vector ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-v* ( node -- )
+    {
+        [ ^^mul-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v/ ( node -- )
+    {
+        [ ^^div-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vmin ( node -- )
+    {
+        [ ^^min-vector ]
+        [
+            [ cc< ^compare-vector ]
+            [ ^blend-vector ] 3bi
+        ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vmax ( node -- )
+    {
+        [ ^^max-vector ]
+        [
+            [ cc> ^compare-vector ]
+            [ ^blend-vector ] 3bi
+        ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v. ( node -- )
+    {
+        [ ^^dot-vector ]
+        { float-vector-rep [| src1 src2 rep |
+            
+        ] }
+        { int-vector-rep [| src1 src2 rep |
+            ...
+        ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-vsqrt ( node -- )
+    {
+        [ ^^sqrt-vector ]
+    } emit-v-vector-op ;
+
+: emit-simd-sum ( node -- )
+    ... ;
+
+: emit-simd-vabs ( node -- )
+    {
+        { unsigned-int-vector-rep [ drop ] }
+        [ ^^abs-vector ]
+        { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
+        { int-vector-rep [| src rep |
             rep ^^zero-vector :> zero
             zero src rep ^^sub-vector :> -src
-            zero src rep cc> ^^compare-vector :> sign 
-            sign -src src rep generate-blend-vector
-        ]
-    } cond ;
+            zero src rep cc> ^compare-vector :> sign
+            sign -src src rep ^blend-vector
+        ] }
+    } emit-v-vector-op ;
 
-: generate-min-vector ( src1 src2 rep -- dst )
-    dup %min-vector-reps member?
-    [ ^^min-vector ] [
-        [ cc< generate-compare-vector ]
-        [ generate-blend-vector ] 3bi
-    ] if ;
+: emit-simd-vand ( node -- )
+    {
+        [ ^^and-vector ]
+    } emit-vv-vector-op ;
 
-: generate-max-vector ( src1 src2 rep -- dst )
-    dup %max-vector-reps member?
-    [ ^^max-vector ] [
-        [ cc> generate-compare-vector ]
-        [ generate-blend-vector ] 3bi
-    ] if ;
+: emit-simd-vandn ( node -- )
+    {
+        [ ^^andn-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vor ( node -- )
+    {
+        [ ^^or-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vxor ( node -- )
+    {
+        [ ^^xor-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vnot ( node -- )
+    {
+        [ ^^not-vector ]
+        [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
+    } emit-v-vector-op ;
+
+: emit-simd-vlshift ( node -- )
+    {
+        [ ^^shl-vector ]
+    } {
+        [ ^^shl-vector-imm ]
+    } emit-vn-or-vl-vector-op ;
+
+: emit-simd-vrshift ( node -- )
+    {
+        [ ^^shr-vector ]
+    } {
+        [ ^^shr-vector-imm ]
+    } emit-vn-or-vl-vector-op ;
+
+: emit-simd-hlshift ( node -- )
+    {
+        [ ^^horizontal-shl-vector-imm ]
+    } emit-vl-vector-op ;
+
+: emit-simd-hrshift ( node -- )
+    {
+        [ ^^horizontal-shr-vector-imm ]
+    } emit-vl-vector-op ;
+
+: emit-simd-vshuffle-elements ( node -- )
+    {
+        [ ^^shuffle-vector-imm ]
+        [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
+    } emit-vl-vector-op ;
+
+: emit-simd-vshuffle-bytes ( node -- )
+    {
+        [ ^^shuffle-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vmerge-head ( node -- )
+    {
+        [ ^^merge-vector-head ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vmerge-tail ( node -- )
+    {
+        [ ^^merge-vector-tail ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v<= ( node -- )
+    [ cc<= ^compare-vector ] (emit-vv-vector-op) ;
+: emit-simd-v< ( node -- )
+    [ cc< ^compare-vector ] (emit-vv-vector-op) ;
+: emit-simd-v= ( node -- )
+    [ cc= ^compare-vector ] (emit-vv-vector-op) ;
+: emit-simd-v> ( node -- )
+    [ cc> ^compare-vector ] (emit-vv-vector-op) ;
+: emit-simd-v>= ( node -- )
+    [ cc>= ^compare-vector ] (emit-vv-vector-op) ;
+: emit-simd-vunordered? ( node -- )
+    [ cc/<>= ^compare-vector ] (emit-vv-vector-op) ;
+
+: emit-simd-vany? ( node -- )
+    [ vcc-any ^test-vector ] (emit-vv-vector-op) ;
+: emit-simd-vall? ( node -- )
+    [ vcc-all ^test-vector ] (emit-vv-vector-op) ;
+: emit-simd-vnone? ( node -- )
+    [ vcc-none ^test-vector ] (emit-vv-vector-op) ;
+
+: emit-simd-v>float ( node -- )
+    {
+        { float-vector-rep [ drop ] }
+        { int-vector-rep [ ^^integer>float-vector ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-v>integer ( node -- )
+    {
+        { float-vector-rep [ ^^float>integer-vector ] }
+        { int-vector-rep [ dup ] }
+    } emit-vv-vector-op ;
+
+: emit-simd-vpack-signed ( node -- )
+    {
+        [ ^^signed-pack-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-vpack-unsigned ( node -- )
+    {
+        [ ^^unsigned-pack-vector ]
+    } emit-vv-vector-op ;
+
+! XXX shr vector rep is widened!
+: emit-simd-vunpack-head ( node -- )
+    {
+        [ ^^unpack-vector-head ]
+        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
+        { signed-int-vector-rep [| src rep |
+            src src rep ^^merge-vector-head :> merged
+            rep rep-component-type heap-size 8 * :> bits
+            merged bits rep ^widened-shr-vector-imm
+        ] }
+        { signed-int-vector-rep [| src rep |
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^compare-vector :> sign
+            src sign rep ^^merge-vector-head
+        ] }
+    } emit-v-vector-op ;
+
+: emit-simd-vunpack-tail ( node -- )
+    {
+        [ ^^unpack-vector-tail ]
+        [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
+        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
+        { signed-int-vector-rep [| src rep |
+            src src rep ^^merge-vector-tail :> merged
+            rep rep-component-type heap-size 8 * :> bits
+            merged bits rep widen-vector-rep ^widened-shr-vector-imm
+        ] }
+        { signed-int-vector-rep [| src rep |
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^compare-vector :> sign
+            src sign rep ^^merge-vector-tail
+        ] }
+    } emit-v-vector-op ;
+
+: emit-simd-with ( node -- )
+: emit-simd-gather-2 ( node -- )
+: emit-simd-gather-4 ( node -- )
+: emit-simd-select ( node -- )
+: emit-alien-vector ( node -- )
+: emit-set-alien-vector ( node -- )
+: emit-alien-vector-aligned ( node -- )
+: emit-set-alien-vector-aligned ( node -- )
 
 : enable-simd ( -- )
     {
-        { math.vectors.simd:assert-positive [ drop ] }
-        { math.vectors.simd:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vneg) [ [ generate-neg-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vmin) [ [ generate-min-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vmax) [ [ generate-max-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v.) [ [ ^^dot-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vabs) [ [ generate-abs-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vlshift) [ [ ^^shl-vector-imm ] [ ^^shl-vector ] emit-shift-vector-op ] }
-        { math.vectors.simd:(simd-vrshift) [ [ ^^shr-vector-imm ] [ ^^shr-vector ] emit-shift-vector-op ] }
-        { math.vectors.simd:(simd-hlshift) [ [ ^^horizontal-shl-vector-imm ] emit-shift-vector-imm-op ] }
-        { math.vectors.simd:(simd-hrshift) [ [ ^^horizontal-shr-vector-imm ] emit-shift-vector-imm-op ] }
-        { math.vectors.simd:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-gather-2) [ emit-gather-vector-2 ] }
-        { math.vectors.simd:(simd-gather-4) [ emit-gather-vector-4 ] }
-        { math.vectors.simd:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
-        { math.vectors.simd:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
-        { math.vectors.simd:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-v>float) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-v>integer) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vpack-signed) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vpack-unsigned) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd:(simd-vunpack-head) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-vunpack-tail) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
-        { math.vectors.simd:(simd-select) [ emit-select-vector ] }
-        { math.vectors.simd:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd:alien-vector [ emit-alien-vector ] }
-        { math.vectors.simd:set-alien-vector [ emit-set-alien-vector ] }
+        { (simd-v+)                [ emit-simd-v+                  ] }
+        { (simd-v-)                [ emit-simd-v-                  ] }
+        { (simd-vneg)              [ emit-simd-vneg                ] }
+        { (simd-v+-)               [ emit-simd-v+-                 ] }
+        { (simd-vs+)               [ emit-simd-vs+                 ] }
+        { (simd-vs-)               [ emit-simd-vs-                 ] }
+        { (simd-vs*)               [ emit-simd-vs*                 ] }
+        { (simd-v*)                [ emit-simd-v*                  ] }
+        { (simd-v/)                [ emit-simd-v/                  ] }
+        { (simd-vmin)              [ emit-simd-vmin                ] }
+        { (simd-vmax)              [ emit-simd-vmax                ] }
+        { (simd-v.)                [ emit-simd-v.                  ] }
+        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
+        { (simd-sum)               [ emit-simd-sum                 ] }
+        { (simd-vabs)              [ emit-simd-vabs                ] }
+        { (simd-vbitand)           [ emit-simd-vand                ] }
+        { (simd-vbitandn)          [ emit-simd-vandn               ] }
+        { (simd-vbitor)            [ emit-simd-vor                 ] }
+        { (simd-vbitxor)           [ emit-simd-vxor                ] }
+        { (simd-vbitnot)           [ emit-simd-vnot                ] }
+        { (simd-vand)              [ emit-simd-vand                ] }
+        { (simd-vandn)             [ emit-simd-vandn               ] }
+        { (simd-vor)               [ emit-simd-vor                 ] }
+        { (simd-vxor)              [ emit-simd-vxor                ] }
+        { (simd-vnot)              [ emit-simd-vnot                ] }
+        { (simd-vlshift)           [ emit-simd-vlshift             ] }
+        { (simd-vrshift)           [ emit-simd-vrshift             ] }
+        { (simd-hlshift)           [ emit-simd-hlshift             ] }
+        { (simd-hrshift)           [ emit-simd-hrshift             ] }
+        { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
+        { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
+        { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
+        { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
+        { (simd-v<=)               [ emit-simd-v<=                 ] }
+        { (simd-v<)                [ emit-simd-v<                  ] }
+        { (simd-v=)                [ emit-simd-v=                  ] }
+        { (simd-v>)                [ emit-simd-v>                  ] }
+        { (simd-v>=)               [ emit-simd-v>=                 ] }
+        { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
+        { (simd-vany?)             [ emit-simd-vany?               ] }
+        { (simd-vall?)             [ emit-simd-vall?               ] }
+        { (simd-vnone?)            [ emit-simd-vnone?              ] }
+        { (simd-v>float)           [ emit-simd-v>float             ] }
+        { (simd-v>integer)         [ emit-simd-v>integer           ] }
+        { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
+        { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
+        { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
+        { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
+        { (simd-with)              [ emit-simd-with                ] }
+        { (simd-gather-2)          [ emit-simd-gather-2            ] }
+        { (simd-gather-4)          [ emit-simd-gather-4            ] }
+        { (simd-select)            [ emit-simd-select              ] }
+        { alien-vector             [ emit-alien-vector             ] }
+        { set-alien-vector         [ emit-set-alien-vector         ] }
+        { alien-vector-aligned     [ emit-alien-vector             ] }
+        { set-alien-vector-aligned [ emit-set-alien-vector         ] }
     } enable-intrinsics ;
 
 enable-simd

From b98742be3095ba51cc21ca5f769d01dc26ad15db Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 3 Nov 2009 21:38:55 -0600
Subject: [PATCH 09/46] typos

---
 basis/math/vectors/simd/simd.factor | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 139060333c..1aff80a0a9 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -65,8 +65,8 @@ GENERIC: new-underlying ( underlying seq -- seq' )
 : (simd-vnone?)            ( a   rep -- ? ) \ vnone? bad-simd-call ;
 : (simd-v>float)           ( a   rep -- c ) \ vconvert bad-simd-call ;
 : (simd-v>integer)         ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vpack-signed)      ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vpack-unsigned)    ( a   rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vpack-signed)      ( a b rep -- c ) \ vconvert bad-simd-call ;
+: (simd-vpack-unsigned)    ( a b rep -- c ) \ vconvert bad-simd-call ;
 : (simd-vunpack-head)      ( a   rep -- c ) \ vconvert bad-simd-call ;
 : (simd-vunpack-tail)      ( a   rep -- c ) \ vconvert bad-simd-call ;
 : (simd-with)              (   n rep -- v ) \ simd-with bad-simd-call ;
@@ -337,3 +337,5 @@ M: simd-128 distance  v- norm ; inline
 M: simd-128 vshuffle ( u perm -- v )
     vshuffle-bytes ; inline
 
+"compiler.tree.propagation.simd" require
+"compiler.cfg.intrinsics.simd" require

From f6643a1c72a905a543fa40b403fb5cd3dce1f45a Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 4 Nov 2009 12:18:01 -0600
Subject: [PATCH 10/46] change ##horizontal-add-vector insn to better match
 what the HADD SSE instructions do (add adjacent pairs, pack results)

---
 .../cfg/instructions/instructions.factor      |  8 ++---
 basis/cpu/architecture/architecture.factor    |  4 +--
 basis/cpu/x86/x86.factor                      | 31 ++++++++++++++-----
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor
index d4d84a088a..30fe8b590e 100644
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -408,13 +408,13 @@ use: src1 src2
 literal: rep ;
 
 PURE-INSN: ##horizontal-add-vector
-def: dst/scalar-rep
-use: src
+def: dst
+use: src1 src2
 literal: rep ;
 
 PURE-INSN: ##horizontal-sub-vector
-def: dst/scalar-rep
-use: src
+def: dst
+use: src1 src2
 literal: rep ;
 
 PURE-INSN: ##horizontal-shl-vector-imm
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 75fbb85542..81aea67eb5 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -277,8 +277,8 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- )
 HOOK: %max-vector cpu ( dst src1 src2 rep -- )
 HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
 HOOK: %sqrt-vector cpu ( dst src rep -- )
-HOOK: %horizontal-add-vector cpu ( dst src rep -- )
-HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
+HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
+HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
 HOOK: %abs-vector cpu ( dst src rep -- )
 HOOK: %and-vector cpu ( dst src1 src2 rep -- )
 HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index b0a5dc0897..68c2fb0438 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -1134,14 +1134,25 @@ M: x86 %dot-vector
         { float-4-rep [
             sse4.1?
             [ HEX: ff DPPS ]
-            [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
-            if
+            [
+                [ MULPS ] [
+                    drop 2dup float-4-rep
+                    [ %horizontal-add-vector ]
+                    [ %horizontal-add-vector ]
+                    [ nip %vector>scalar ] 3tri
+                ] 2bi
+            ] if
         ] }
         { double-2-rep [
             sse4.1?
             [ HEX: ff DPPD ]
-            [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
-            if
+            [
+                [ MULPD ] [
+                    drop 2dup double-2-rep
+                    [ %horizontal-add-vector ]
+                    [ nip %vector>scalar ] 3bi
+                ] 2bi
+            ] if
         ] }
     } case ;
 
@@ -1150,15 +1161,19 @@ M: x86 %dot-vector-reps
         { sse3? { float-4-rep double-2-rep } }
     } available-reps ;
 
-M: x86 %horizontal-add-vector ( dst src rep -- )
-    {
-        { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
-        { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
+    [ two-operand ] keep
+    unsign-rep {
+        { float-4-rep  [ HADDPS ] }
+        { double-2-rep [ HADDPD ] }
+        { int-4-rep    [ PHADDD ] }
+        { short-8-rep  [ PHADDW ] }
     } case ;
 
 M: x86 %horizontal-add-vector-reps
     {
         { sse3? { float-4-rep double-2-rep } }
+        { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
     } available-reps ;
 
 M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )

From 4d54f27cd1add5af1b55a742f161a565a0ff9c17 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 5 Nov 2009 09:52:57 -0600
Subject: [PATCH 11/46] more intrinsic madness

---
 .../compiler/cfg/intrinsics/simd/simd.factor  | 298 ++++++++++++++----
 basis/cpu/x86/x86.factor                      |  27 +-
 basis/math/vectors/simd/simd.factor           |   3 -
 3 files changed, 245 insertions(+), 83 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 208e19ccc3..c4fcdca23e 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -1,4 +1,4 @@
-! Copyright (C) 2009 Slava Pestov.
+! Copyright (C) 2009 Slava Pestov, Joe Groff.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien byte-arrays fry classes.algebra
 cpu.architecture kernel math sequences math.vectors
@@ -16,6 +16,28 @@ IN: compiler.cfg.intrinsics.simd
 
 ! compound vector ops
 
+: sign-bit-mask ( rep -- byte-array )
+    unsign-rep {
+        { char-16-rep [ uchar-array{
+            HEX: 80 HEX: 80 HEX: 80 HEX: 80
+            HEX: 80 HEX: 80 HEX: 80 HEX: 80
+            HEX: 80 HEX: 80 HEX: 80 HEX: 80
+            HEX: 80 HEX: 80 HEX: 80 HEX: 80
+        } underlying>> ] }
+        { short-8-rep [ ushort-array{
+            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
+            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
+        } underlying>> ] }
+        { int-4-rep [ uint-array{
+            HEX: 8000,0000 HEX: 8000,0000
+            HEX: 8000,0000 HEX: 8000,0000
+        } underlying>> ] }
+        { longlong-2-rep [ ulonglong-array{
+            HEX: 8000,0000,0000,0000
+            HEX: 8000,0000,0000,0000
+        } underlying>> ] }
+    } case ;
+
 : ^load-neg-zero-vector ( rep -- dst )
     {
         { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
@@ -46,11 +68,163 @@ IN: compiler.cfg.intrinsics.simd
     mask false rep ^^andn-vector
     rep ^^or-vector ;
 
-: ^compare-vector ( src1 src2 rep cc -- dst )
-    ... ;
+: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
+    order-cc {
+        { cc<  [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
+        { cc<= [ src1 src2 rep ^^min-vector src1 rep cc=  ^^compare-vector ] }
+        { cc>  [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
+        { cc>= [ src1 src2 rep ^^max-vector src1 rep cc=  ^^compare-vector ] }
+    } case ;
 
-: ^widened-shr-vector-imm ( src shift rep -- dst )
-    widen-vector-rep ^^shr-vector-imm ;
+: ^compare-vector ( src1 src2 rep cc -- dst )
+    {
+        [ ^^compare-vector ]
+        [ ^minmax-compare-vector ]
+        { unsigned-int-vector-rep [| src1 src2 rep cc |
+            rep sign-bit-mask ^^load-constant :> sign-bits
+            src1 sign-bits rep ^^xor-vector
+            src2 sign-bits rep ^^xor-vector
+            rep unsign-rep cc ^^compare-vector
+        ] }
+    } vv-cc-vector-op ;
+
+: ^unpack-vector-head ( src rep -- dst )
+    {
+        [ ^^unpack-vector-head ]
+        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
+        { signed-int-vector-rep [| src rep |
+            src src rep ^^merge-vector-head :> merged
+            rep rep-component-type heap-size 8 * :> bits
+            merged bits rep ^widened-shr-vector-imm
+        ] }
+        { signed-int-vector-rep [| src rep |
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^compare-vector :> sign
+            src sign rep ^^merge-vector-head
+        ] }
+    } v-vector-op ;
+
+: ^unpack-vector-tail ( src rep -- dst )
+    {
+        [ ^^unpack-vector-tail ]
+        [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
+        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
+        { signed-int-vector-rep [| src rep |
+            src src rep ^^merge-vector-tail :> merged
+            rep rep-component-type heap-size 8 * :> bits
+            merged bits rep ^widened-shr-vector-imm
+        ] }
+        { signed-int-vector-rep [| src rep |
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^compare-vector :> sign
+            src sign rep ^^merge-vector-tail
+        ] }
+    } v-vector-op ;
+
+: ^(sum-2) ( src rep -- dst )
+    {
+        [ dupd ^^horizontal-add-vector ]
+        [| src rep | 
+            src src rep ^^merge-vector-head :> head
+            src src rep ^^merge-vector-tail :> tail
+            head tail rep ^^add-vector
+        ]
+    } v-vector-op ;
+
+: ^(sum-4) ( src rep -- dst )
+    {
+        [
+            [ dupd ^^horizontal-add-vector ]
+            [ dupd ^^horizontal-add-vector ] bi
+        ]
+        [| src rep | 
+            src src rep ^^merge-vector-head :> head
+            src src rep ^^merge-vector-tail :> tail
+            head tail rep ^^add-vector :> src'
+
+            rep widen-rep :> rep'
+            src' src' rep' ^^merge-vector-head :> head'
+            src' src' rep' ^^merge-vector-tail :> tail'
+            head' tail' rep ^^add-vector
+        ]
+    } v-vector-op ;
+
+: ^(sum-8) ( src rep -- dst )
+    {
+        [
+            [ dupd ^^horizontal-add-vector ]
+            [ dupd ^^horizontal-add-vector ]
+            [ dupd ^^horizontal-add-vector ] tri
+        ]
+        [| src rep | 
+            src src rep ^^merge-vector-head :> head
+            src src rep ^^merge-vector-tail :> tail
+            head tail rep ^^add-vector :> src'
+
+            rep widen-rep :> rep'
+            src' src' rep' ^^merge-vector-head :> head'
+            src' src' rep' ^^merge-vector-tail :> tail'
+            head' tail' rep ^^add-vector :> src''
+
+            rep' widen-rep :> rep''
+            src'' src'' rep'' ^^merge-vector-head :> head''
+            src'' src'' rep'' ^^merge-vector-tail :> tail''
+            head'' tail'' rep ^^add-vector
+        ]
+    } v-vector-op ;
+
+: ^(sum-16) ( src rep -- dst )
+    {
+        [
+            {
+                [ dupd ^^horizontal-add-vector ]
+                [ dupd ^^horizontal-add-vector ]
+                [ dupd ^^horizontal-add-vector ]
+                [ dupd ^^horizontal-add-vector ]
+            } cleave
+        ]
+        [| src rep | 
+            src src rep ^^merge-vector-head :> head
+            src src rep ^^merge-vector-tail :> tail
+            head tail rep ^^add-vector :> src'
+
+            rep widen-rep :> rep'
+            src' src' rep' ^^merge-vector-head :> head'
+            src' src' rep' ^^merge-vector-tail :> tail'
+            head' tail' rep ^^add-vector :> src''
+
+            rep' widen-rep :> rep''
+            src'' src'' rep'' ^^merge-vector-head :> head''
+            src'' src'' rep'' ^^merge-vector-tail :> tail''
+            head'' tail'' rep ^^add-vector :> src'''
+
+            rep'' widen-rep :> rep'''
+            src''' src''' rep''' ^^merge-vector-head :> head'''
+            src''' src''' rep''' ^^merge-vector-tail :> tail'''
+            head''' tail''' rep ^^add-vector
+        ]
+    } v-vector-op ;
+
+: ^(sum-vector) ( src rep -- dst )
+    [
+        rep-length {
+            {  2 [ ^(sum-2) ] }
+            {  4 [ ^(sum-4) ] }
+            {  8 [ ^(sum-8) ] }
+            { 16 [ ^(sum-16) ] }
+        } case
+    ] [ ^^vector>scalar ] bi ;
+
+: ^sum-vector ( src rep -- dst )
+    unsign-rep {
+        { float-vector-rep [ ^(sum-vector) ] }
+        { int-vector-rep [| src rep |
+            src rep ^unpack-vector-head :> head
+            src rep ^unpack-vector-tail :> tail
+            rep widen-rep :> wide-rep
+            head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
+        ] }
+    } v-vector-op ;
 
 ! intrinsic emitters
 
@@ -135,12 +309,7 @@ IN: compiler.cfg.intrinsics.simd
 : emit-simd-v. ( node -- )
     {
         [ ^^dot-vector ]
-        { float-vector-rep [| src1 src2 rep |
-            
-        ] }
-        { int-vector-rep [| src1 src2 rep |
-            ...
-        ] }
+        [ [ ^^mul-vector ] [ ^sum-vector ] bi ]
     } emit-vv-vector-op ;
 
 : emit-simd-vsqrt ( node -- )
@@ -149,7 +318,9 @@ IN: compiler.cfg.intrinsics.simd
     } emit-v-vector-op ;
 
 : emit-simd-sum ( node -- )
-    ... ;
+    {
+        [ ^sum-vector ]
+    } emit-v-vector-op ;
 
 : emit-simd-vabs ( node -- )
     {
@@ -195,30 +366,32 @@ IN: compiler.cfg.intrinsics.simd
         [ ^^shl-vector ]
     } {
         [ ^^shl-vector-imm ]
-    } emit-vn-or-vl-vector-op ;
+    } [ integer? ] emit-vv-or-vl-vector-op ;
 
 : emit-simd-vrshift ( node -- )
     {
         [ ^^shr-vector ]
     } {
         [ ^^shr-vector-imm ]
-    } emit-vn-or-vl-vector-op ;
+    } [ integer? ] emit-vv-or-vl-vector-op ;
 
 : emit-simd-hlshift ( node -- )
     {
         [ ^^horizontal-shl-vector-imm ]
-    } emit-vl-vector-op ;
+    } [ integer? ] emit-vl-vector-op ;
 
 : emit-simd-hrshift ( node -- )
     {
         [ ^^horizontal-shr-vector-imm ]
-    } emit-vl-vector-op ;
+    } [ integer? ] emit-vl-vector-op ;
+
+: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
 
 : emit-simd-vshuffle-elements ( node -- )
     {
         [ ^^shuffle-vector-imm ]
         [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
-    } emit-vl-vector-op ;
+    } [ shuffle? ] emit-vl-vector-op ;
 
 : emit-simd-vshuffle-bytes ( node -- )
     {
@@ -236,24 +409,42 @@ IN: compiler.cfg.intrinsics.simd
     } emit-vv-vector-op ;
 
 : emit-simd-v<= ( node -- )
-    [ cc<= ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc<= ^compare-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-v< ( node -- )
-    [ cc< ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc< ^compare-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-v= ( node -- )
-    [ cc= ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc=  ^compare-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-v> ( node -- )
-    [ cc> ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc>  ^compare-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-v>= ( node -- )
-    [ cc>= ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc>= ^compare-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-vunordered? ( node -- )
-    [ cc/<>= ^compare-vector ] (emit-vv-vector-op) ;
+    {
+        [ cc/<>= ^compare-vector ]
+    } emit-vv-vector-op ;
 
 : emit-simd-vany? ( node -- )
-    [ vcc-any ^test-vector ] (emit-vv-vector-op) ;
+    {
+        [ vcc-any ^test-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-vall? ( node -- )
-    [ vcc-all ^test-vector ] (emit-vv-vector-op) ;
+    {
+        [ vcc-all ^test-vector ]
+    } emit-vv-vector-op ;
 : emit-simd-vnone? ( node -- )
-    [ vcc-none ^test-vector ] (emit-vv-vector-op) ;
+    {
+        [ vcc-none ^test-vector ]
+    } emit-vv-vector-op ;
 
 : emit-simd-v>float ( node -- )
     {
@@ -277,48 +468,45 @@ IN: compiler.cfg.intrinsics.simd
         [ ^^unsigned-pack-vector ]
     } emit-vv-vector-op ;
 
-! XXX shr vector rep is widened!
 : emit-simd-vunpack-head ( node -- )
     {
-        [ ^^unpack-vector-head ]
-        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
-        { signed-int-vector-rep [| src rep |
-            src src rep ^^merge-vector-head :> merged
-            rep rep-component-type heap-size 8 * :> bits
-            merged bits rep ^widened-shr-vector-imm
-        ] }
-        { signed-int-vector-rep [| src rep |
-            rep ^^zero-vector :> zero
-            zero src rep cc> ^compare-vector :> sign
-            src sign rep ^^merge-vector-head
-        ] }
+        [ ^unpack-vector-head ]
     } emit-v-vector-op ;
 
 : emit-simd-vunpack-tail ( node -- )
     {
-        [ ^^unpack-vector-tail ]
-        [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
-        { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
-        { signed-int-vector-rep [| src rep |
-            src src rep ^^merge-vector-tail :> merged
-            rep rep-component-type heap-size 8 * :> bits
-            merged bits rep widen-vector-rep ^widened-shr-vector-imm
-        ] }
-        { signed-int-vector-rep [| src rep |
-            rep ^^zero-vector :> zero
-            zero src rep cc> ^compare-vector :> sign
-            src sign rep ^^merge-vector-tail
-        ] }
+        [ ^unpack-vector-tail ]
     } emit-v-vector-op ;
 
 : emit-simd-with ( node -- )
+    {
+        [ ^^with-vector ]
+    } emit-v-vector-op ;
+
 : emit-simd-gather-2 ( node -- )
+    {
+        [ ^^gather-vector-2 ]
+    } emit-vv-vector-op ;
+
 : emit-simd-gather-4 ( node -- )
+    {
+        [ ^^gather-vector-4 ]
+    } emit-vvvv-vector-op ;
+
 : emit-simd-select ( node -- )
+    {
+        [ ^^select-vector ]
+    } [ integer? ] emit-vl-vector-op ;
+
 : emit-alien-vector ( node -- )
+    {
+        [ ^^alien-vector ]
+    } emit-alien-vector-op ;
+    
 : emit-set-alien-vector ( node -- )
-: emit-alien-vector-aligned ( node -- )
-: emit-set-alien-vector-aligned ( node -- )
+    {
+        [ ^^set-alien-vector ]
+    } emit-set-alien-vector-op ;
 
 : enable-simd ( -- )
     {
@@ -376,8 +564,6 @@ IN: compiler.cfg.intrinsics.simd
         { (simd-select)            [ emit-simd-select              ] }
         { alien-vector             [ emit-alien-vector             ] }
         { set-alien-vector         [ emit-set-alien-vector         ] }
-        { alien-vector-aligned     [ emit-alien-vector             ] }
-        { set-alien-vector-aligned [ emit-set-alien-vector         ] }
     } enable-intrinsics ;
 
 enable-simd
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index 68c2fb0438..d78d8c852e 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -1131,34 +1131,13 @@ M: x86 %max-vector-reps
 M: x86 %dot-vector
     [ two-operand ] keep
     {
-        { float-4-rep [
-            sse4.1?
-            [ HEX: ff DPPS ]
-            [
-                [ MULPS ] [
-                    drop 2dup float-4-rep
-                    [ %horizontal-add-vector ]
-                    [ %horizontal-add-vector ]
-                    [ nip %vector>scalar ] 3tri
-                ] 2bi
-            ] if
-        ] }
-        { double-2-rep [
-            sse4.1?
-            [ HEX: ff DPPD ]
-            [
-                [ MULPD ] [
-                    drop 2dup double-2-rep
-                    [ %horizontal-add-vector ]
-                    [ nip %vector>scalar ] 3bi
-                ] 2bi
-            ] if
-        ] }
+        { float-4-rep [ HEX: ff DPPS ] }
+        { double-2-rep [ HEX: ff DPPD ] }
     } case ;
 
 M: x86 %dot-vector-reps
     {
-        { sse3? { float-4-rep double-2-rep } }
+        { sse4.1? { float-4-rep double-2-rep } }
     } available-reps ;
 
 M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 1aff80a0a9..c155c797ff 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -77,9 +77,6 @@ GENERIC: new-underlying ( underlying seq -- seq' )
 : alien-vector     ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
 : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
 
-: alien-vector-aligned     ( c-ptr n rep -- value ) \ alien-vector-aligned bad-simd-call ;
-: set-alien-vector-aligned ( c-ptr n rep -- value ) \ set-alien-vector-aligned bad-simd-call ;
-
 ! Helper for boolean vector literals
 
 : vector-true-value ( class -- value )

From eac9bacf40bb959061a88cef4f75841d805da1fa Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 10 Nov 2009 23:35:46 -0600
Subject: [PATCH 12/46] backend for choosing available SIMD intrinsic
 implementations

---
 .../intrinsics/simd/backend/backend.factor    | 135 ++++++++++++++++++
 .../compiler/cfg/intrinsics/simd/simd.factor  |  52 +++++--
 2 files changed, 177 insertions(+), 10 deletions(-)
 create mode 100644 basis/compiler/cfg/intrinsics/simd/backend/backend.factor

diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
new file mode 100644
index 0000000000..4fe9774282
--- /dev/null
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -0,0 +1,135 @@
+! (c)2009 Joe Groff bsd license
+USING: accessors fry generalizations kernel locals math sequences
+splitting words ;
+IN: compiler.cfg.intrinsics.simd.backend
+
+! Selection of implementation based on available CPU instructions
+
+: can-has? ( quot -- ? )
+    [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
+
+GENERIC: create-can-has-word ( word -- word' )
+
+PREDICATE: vector-op-word
+    {
+        [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ]
+        [ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ]
+    } 1&& ;
+
+: reps-word ( word -- word' )
+    name>> "^^" ?head drop "##" ?head drop
+    "%" "-reps" surround "cpu.architecture" lookup ;
+
+:: can-has-^^-quot ( word def effect -- def' )
+    effect in>> { "rep" } split1 [ length ] bi@ 1 +
+    word reps-word
+    effect out>> length f <array> >quotation
+    '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ;
+
+:: can-has-^-quot ( word def effect -- def' )
+    def create-can-has ;
+
+M: object create-can-has ;
+
+M: sequence create-can-has
+    [ create-can-has-word ] map ;
+
+: (create-can-has-word) ( word -- word' created? )
+    name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend"
+    2dup lookup
+    [ 2nip f ] [ create t ] if* ;
+
+: (create-can-has-quot) ( word -- def effect )
+    [ ] [ def>> ] [ stack-effect ] tri [
+        {
+            { [ pick "^^" head? ] [ can-has-^^-quot ] }
+            { [ pick "##" head? ] [ can-has-^^-quot ] }
+            { [ pick "^"  head? ] [ can-has-^-quot  ] }
+        } cond
+    ] keep ;
+
+M: vector-op-word create-can-has
+    [ (create-can-has-word) ] keep
+    '[ _ (create-can-has-quot) define-declared ]
+    [ nip ] if ;
+
+GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
+M:: callable >can-has-cond
+    #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ;
+    
+M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
+    pair first2 :> ( class quot )
+    #pick class #dup quot create-can-has
+    '[ _ npick _ instance? [ _ ndup _ can-has? ] dip and ]
+    quot 2array ;
+
+MACRO: v-vector-op ( trials -- )
+    [ 1 2 >can-has-cond ] map '[ _ cond ] ;
+MACRO: vl-vector-op ( trials -- )
+    [ 1 3 >can-has-cond ] map '[ _ cond ] ;
+MACRO: vv-vector-op ( trials -- )
+    [ 1 3 >can-has-cond ] map '[ _ cond ] ;
+MACRO: vv-cc-vector-op ( trials -- )
+    [ 2 4 >can-has-cond ] map '[ _ cond ] ;
+MACRO: vvvv-vector-op ( trials -- )
+    [ 1 5 >can-has-cond ] map '[ _ cond ] ;
+
+! Special-case conditional instructions
+
+: can-has-^(compare-vector) ( src1 src2 rep cc -- dst )
+    [ 2drop ] 2dip %compare-vector-reps member?
+    \ can-has? [ and ] change
+    f ;
+
+! Intrinsic code emission
+
+MACRO: if-literals-match ( quots -- )
+    [ length ] [ ] [ length ] tri
+    ! n quots n
+    '[
+        ! node quot
+        [
+            dup node-input-infos
+            _ tail-slice* [ literal>> ] map
+            dup _ check-elements
+        ] dip
+        swap [
+            ! node literals quot
+            [ _ firstn ] dip call
+            drop
+        ] [ 2drop emit-primitive ] if
+    ] ;
+
+CONSTANT: [unary]       [ ds-drop  ds-pop ]
+CONSTANT: [unary/param] [ [ -2 inc-d ds-pop ] dip ]
+CONSTANT: [binary]      [ ds-drop 2inputs ]
+CONSTANT: [quaternary]
+    [
+        ds-drop 
+        D 3 peek-loc
+        D 2 peek-loc
+        D 1 peek-loc
+        D 0 peek-loc
+        -4 inc-d
+    ]
+
+:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ;
+    params-quot trials op-quot literal-preds 
+    '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
+
+MACRO: emit-v-vector-op ( trials -- )
+    [unary] [ v-vector-op ] { [ representation? ] } [emit-vector-op] ;
+MACRO: emit-vl-vector-op ( trials literal-pred -- )
+    [ [unary/param] [ vl-vector-op ] { [ representation? ] } ] dip prefix [emit-vector-op] ;
+MACRO: emit-vv-vector-op ( trials -- )
+    [binary] [ vv-vector-op ] { [ representation? ] } [emit-vector-op] ;
+MACRO: emit-vvvv-vector-op ( trials -- )
+    [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
+
+MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- )
+    literal-pred trials literal-pred trials
+    '[
+        dup node-input-infos 2 tail-slice* first literal>> @
+        [ _ _ emit-vl-vector-op ]
+        [ _   emit-vv-vector-op ] if 
+    ] ;
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index c4fcdca23e..1cf076af1d 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -9,6 +9,7 @@ compiler.cfg.comparisons
 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
 compiler.cfg.instructions compiler.cfg.registers
 compiler.cfg.intrinsics.alien
+compiler.cfg.intrinsics.simd.backend
 specialized-arrays ;
 FROM: alien.c-types => heap-size char short int longlong float double ;
 SPECIALIZED-ARRAYS: char short int longlong float double ;
@@ -76,15 +77,37 @@ IN: compiler.cfg.intrinsics.simd
         { cc>= [ src1 src2 rep ^^max-vector src1 rep cc=  ^^compare-vector ] }
     } case ;
 
+:: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
+    {cc,swap} first2 :> ( cc swap? )
+    swap?
+    [ src2 src1 rep cc ^^compare-vector ]
+    [ src1 src2 rep cc ^^compare-vector ] if ;
+
+:: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
+    rep orig-cc %compare-vector-ccs :> ( ccs not? )
+
+    ccs empty?
+    [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
+    [
+        ccs unclip :> ( rest-ccs first-cc )
+        src1 src2 rep first-cc ^((compare-vector)) :> first-dst
+
+        rest-ccs first-dst
+        [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
+        reduce
+
+        not? [ rep generate-not-vector ] when
+    ] if ;
+
 : ^compare-vector ( src1 src2 rep cc -- dst )
     {
-        [ ^^compare-vector ]
+        [ ^(compare-vector) ]
         [ ^minmax-compare-vector ]
         { unsigned-int-vector-rep [| src1 src2 rep cc |
             rep sign-bit-mask ^^load-constant :> sign-bits
             src1 sign-bits rep ^^xor-vector
             src2 sign-bits rep ^^xor-vector
-            rep unsign-rep cc ^^compare-vector
+            rep unsign-rep cc ^(compare-vector)
         ] }
     } vv-cc-vector-op ;
 
@@ -95,7 +118,7 @@ IN: compiler.cfg.intrinsics.simd
         { signed-int-vector-rep [| src rep |
             src src rep ^^merge-vector-head :> merged
             rep rep-component-type heap-size 8 * :> bits
-            merged bits rep ^widened-shr-vector-imm
+            merged bits rep widen-rep ^shr-vector-imm
         ] }
         { signed-int-vector-rep [| src rep |
             rep ^^zero-vector :> zero
@@ -499,14 +522,23 @@ IN: compiler.cfg.intrinsics.simd
     } [ integer? ] emit-vl-vector-op ;
 
 : emit-alien-vector ( node -- )
-    {
-        [ ^^alien-vector ]
-    } emit-alien-vector-op ;
-    
+    dup [
+        '[
+            ds-drop prepare-alien-getter
+            _ ^^alien-vector ds-push
+        ]
+        [ inline-alien-getter? ] inline-alien
+    ] with { [ %alien-vector-reps member? ] } if-literals-match ;
+
 : emit-set-alien-vector ( node -- )
-    {
-        [ ^^set-alien-vector ]
-    } emit-set-alien-vector-op ;
+    dup [
+        '[
+            ds-drop prepare-alien-setter ds-pop
+            _ ##set-alien-vector
+        ]
+        [ byte-array inline-alien-setter? ]
+        inline-alien
+    ] with { [ %alien-vector-reps member? ] } if-literals-match ;
 
 : enable-simd ( -- )
     {

From 324889e2acafec116c27bc525e9b5027d35ba921 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 11 Nov 2009 15:50:20 -0600
Subject: [PATCH 13/46] '[ ] should make an empty quot

---
 basis/fry/fry-tests.factor |  2 ++
 basis/fry/fry.factor       | 12 +++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/basis/fry/fry-tests.factor b/basis/fry/fry-tests.factor
index 10d9b282ad..f33eb276a0 100644
--- a/basis/fry/fry-tests.factor
+++ b/basis/fry/fry-tests.factor
@@ -5,6 +5,8 @@ IN: fry.tests
 
 SYMBOLS: a b c d e f g h ;
 
+[ [ ] ] [ '[ ] ] unit-test
+[ [ + ] ] [ '[ + ] ] unit-test
 [ [ 1 ] ] [ 1 '[ _ ] ] unit-test
 [ [ 1 ] ] [ [ 1 ] '[ @ ] ] unit-test
 [ [ 1 2 ] ] [ [ 1 ] [ 2 ] '[ @ @ ] ] unit-test
diff --git a/basis/fry/fry.factor b/basis/fry/fry.factor
index 931397e933..e58253692f 100644
--- a/basis/fry/fry.factor
+++ b/basis/fry/fry.factor
@@ -136,10 +136,12 @@ TUPLE: dredge-fry-state
 PRIVATE>
 
 M: callable fry ( quot -- quot' )
-    0 swap <dredge-fry>
-    [ dredge-fry ] [
-        [ prequot>> >quotation ]
-        [ quot>> >quotation shallow-fry ] bi append
-    ] bi ;
+    [ [ [ ] ] ] [
+        0 swap <dredge-fry>
+        [ dredge-fry ] [
+            [ prequot>> >quotation ]
+            [ quot>> >quotation shallow-fry ] bi append
+        ] bi
+    ] if-empty ;
 
 SYNTAX: '[ parse-quotation fry append! ;

From e323071c44d263edc9710ce44da5c3588ebfe9f1 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 11 Nov 2009 16:08:40 -0600
Subject: [PATCH 14/46] sever lingering dependencies on simd from compiler

---
 .../compiler/cfg/intrinsics/intrinsics.factor |   2 -
 .../value-numbering/rewrite/rewrite.factor    |  93 +-------------
 .../cfg/value-numbering/simd/simd.factor      | 120 ++++++++++++++++++
 .../value-numbering/simplify/simplify.factor  |  12 +-
 .../value-numbering-tests.factor              |   2 +-
 .../math/vectors/simd/mirrors/mirrors.factor  |   3 +
 basis/math/vectors/simd/simd.factor           |   7 +-
 basis/math/vectors/vectors.factor             |   3 +-
 .../specialized-arrays/mirrors/mirrors.factor |   2 -
 .../specialized-arrays.factor                 |   9 +-
 10 files changed, 135 insertions(+), 118 deletions(-)
 create mode 100644 basis/compiler/cfg/value-numbering/simd/simd.factor
 create mode 100644 basis/math/vectors/simd/mirrors/mirrors.factor

diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor
index 632c32b12f..f4bbcdb255 100644
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@@ -7,7 +7,6 @@ compiler.cfg.intrinsics.alien
 compiler.cfg.intrinsics.allot
 compiler.cfg.intrinsics.fixnum
 compiler.cfg.intrinsics.float
-compiler.cfg.intrinsics.simd
 compiler.cfg.intrinsics.slots
 compiler.cfg.intrinsics.misc
 compiler.cfg.comparisons ;
@@ -23,7 +22,6 @@ QUALIFIED: classes.tuple.private
 QUALIFIED: math.private
 QUALIFIED: math.integers.private
 QUALIFIED: math.floats.private
-QUALIFIED: math.vectors.simd.intrinsics
 QUALIFIED: math.libm
 IN: compiler.cfg.intrinsics
 
diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
index 4864a8bfb7..746fe0e5ea 100755
--- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
+++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors combinators combinators.short-circuit arrays
 fry kernel layouts math namespaces sequences cpu.architecture
-math.bitwise math.order math.vectors.simd.intrinsics classes
+math.bitwise math.order classes
 vectors locals make alien.c-types io.binary grouping
 compiler.cfg
 compiler.cfg.registers
@@ -463,100 +463,9 @@ M: ##alien-signed-2 rewrite rewrite-alien-addressing ;
 M: ##alien-signed-4 rewrite rewrite-alien-addressing ;
 M: ##alien-float rewrite rewrite-alien-addressing ;
 M: ##alien-double rewrite rewrite-alien-addressing ;
-M: ##alien-vector rewrite rewrite-alien-addressing ;
 M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ;
 M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ;
 M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ;
 M: ##set-alien-float rewrite rewrite-alien-addressing ;
 M: ##set-alien-double rewrite rewrite-alien-addressing ;
-M: ##set-alien-vector rewrite rewrite-alien-addressing ;
 
-! Some lame constant folding for SIMD intrinsics. Eventually this
-! should be redone completely.
-
-: rewrite-shuffle-vector-imm ( insn expr -- insn' )
-    2dup [ rep>> ] bi@ eq? [
-        [ [ dst>> ] [ src>> vn>vreg ] bi* ]
-        [ [ shuffle>> ] bi@ nths ]
-        [ drop rep>> ]
-        2tri \ ##shuffle-vector-imm new-insn
-    ] [ 2drop f ] if ;
-
-: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
-    2dup length swap length /i group nths concat ;
-
-: fold-shuffle-vector-imm ( insn expr -- insn' )
-    [ [ dst>> ] [ shuffle>> ] bi ] dip value>>
-    (fold-shuffle-vector-imm) \ ##load-constant new-insn ;
-
-M: ##shuffle-vector-imm rewrite
-    dup src>> vreg>expr {
-        { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
-        { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
-        { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
-        [ 2drop f ]
-    } cond ;
-
-: (fold-scalar>vector) ( insn bytes -- insn' )
-    [ [ dst>> ] [ rep>> rep-components ] bi ] dip <repetition> concat
-    \ ##load-constant new-insn ;
-
-: fold-scalar>vector ( insn expr -- insn' )
-    value>> over rep>> {
-        { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
-        { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
-        [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
-    } case ;
-
-M: ##scalar>vector rewrite
-    dup src>> vreg>expr dup constant-expr?
-    [ fold-scalar>vector ] [ 2drop f ] if ;
-
-M: ##xor-vector rewrite
-    dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
-    [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
-
-: vector-not? ( expr -- ? )
-    {
-        [ not-vector-expr? ]
-        [ {
-            [ xor-vector-expr? ]
-            [ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
-        } 1&& ]
-    } 1|| ;
-
-GENERIC: vector-not-src ( expr -- vreg )
-M: not-vector-expr vector-not-src src>> vn>vreg ;
-M: xor-vector-expr vector-not-src
-    dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
-
-M: ##and-vector rewrite 
-    {
-        { [ dup src1>> vreg>expr vector-not? ] [
-            {
-                [ dst>> ]
-                [ src1>> vreg>expr vector-not-src ]
-                [ src2>> ]
-                [ rep>> ]
-            } cleave \ ##andn-vector new-insn
-        ] }
-        { [ dup src2>> vreg>expr vector-not? ] [
-            {
-                [ dst>> ]
-                [ src2>> vreg>expr vector-not-src ]
-                [ src1>> ]
-                [ rep>> ]
-            } cleave \ ##andn-vector new-insn
-        ] }
-        [ drop f ]
-    } cond ;
-
-M: ##andn-vector rewrite
-    dup src1>> vreg>expr vector-not? [
-        {
-            [ dst>> ]
-            [ src1>> vreg>expr vector-not-src ]
-            [ src2>> ]
-            [ rep>> ]
-        } cleave \ ##and-vector new-insn
-    ] [ drop f ] if ;
diff --git a/basis/compiler/cfg/value-numbering/simd/simd.factor b/basis/compiler/cfg/value-numbering/simd/simd.factor
new file mode 100644
index 0000000000..9bb4453313
--- /dev/null
+++ b/basis/compiler/cfg/value-numbering/simd/simd.factor
@@ -0,0 +1,120 @@
+! Copyright (C) 2008, 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors combinators combinators.short-circuit arrays
+fry kernel layouts math namespaces sequences cpu.architecture
+math.bitwise math.order classes
+vectors locals make alien.c-types io.binary grouping
+math.vectors.simd
+compiler.cfg
+compiler.cfg.registers
+compiler.cfg.comparisons
+compiler.cfg.instructions
+compiler.cfg.value-numbering.expressions
+compiler.cfg.value-numbering.graph
+compiler.cfg.value-numbering.rewrite
+compiler.cfg.value-numbering.simplify ;
+IN: compiler.cfg.value-numbering.simd
+
+M: ##alien-vector rewrite rewrite-alien-addressing ;
+M: ##set-alien-vector rewrite rewrite-alien-addressing ;
+
+! Some lame constant folding for SIMD intrinsics. Eventually this
+! should be redone completely.
+
+: rewrite-shuffle-vector-imm ( insn expr -- insn' )
+    2dup [ rep>> ] bi@ eq? [
+        [ [ dst>> ] [ src>> vn>vreg ] bi* ]
+        [ [ shuffle>> ] bi@ nths ]
+        [ drop rep>> ]
+        2tri \ ##shuffle-vector-imm new-insn
+    ] [ 2drop f ] if ;
+
+: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
+    2dup length swap length /i group nths concat ;
+
+: fold-shuffle-vector-imm ( insn expr -- insn' )
+    [ [ dst>> ] [ shuffle>> ] bi ] dip value>>
+    (fold-shuffle-vector-imm) \ ##load-constant new-insn ;
+
+M: ##shuffle-vector-imm rewrite
+    dup src>> vreg>expr {
+        { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
+        { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
+        { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
+        [ 2drop f ]
+    } cond ;
+
+: (fold-scalar>vector) ( insn bytes -- insn' )
+    [ [ dst>> ] [ rep>> rep-length ] bi ] dip <repetition> concat
+    \ ##load-constant new-insn ;
+
+: fold-scalar>vector ( insn expr -- insn' )
+    value>> over rep>> {
+        { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
+        { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
+        [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
+    } case ;
+
+M: ##scalar>vector rewrite
+    dup src>> vreg>expr dup constant-expr?
+    [ fold-scalar>vector ] [ 2drop f ] if ;
+
+M: ##xor-vector rewrite
+    dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
+    [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
+
+: vector-not? ( expr -- ? )
+    {
+        [ not-vector-expr? ]
+        [ {
+            [ xor-vector-expr? ]
+            [ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
+        } 1&& ]
+    } 1|| ;
+
+GENERIC: vector-not-src ( expr -- vreg )
+M: not-vector-expr vector-not-src src>> vn>vreg ;
+M: xor-vector-expr vector-not-src
+    dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
+
+M: ##and-vector rewrite 
+    {
+        { [ dup src1>> vreg>expr vector-not? ] [
+            {
+                [ dst>> ]
+                [ src1>> vreg>expr vector-not-src ]
+                [ src2>> ]
+                [ rep>> ]
+            } cleave \ ##andn-vector new-insn
+        ] }
+        { [ dup src2>> vreg>expr vector-not? ] [
+            {
+                [ dst>> ]
+                [ src2>> vreg>expr vector-not-src ]
+                [ src1>> ]
+                [ rep>> ]
+            } cleave \ ##andn-vector new-insn
+        ] }
+        [ drop f ]
+    } cond ;
+
+M: ##andn-vector rewrite
+    dup src1>> vreg>expr vector-not? [
+        {
+            [ dst>> ]
+            [ src1>> vreg>expr vector-not-src ]
+            [ src2>> ]
+            [ rep>> ]
+        } cleave \ ##and-vector new-insn
+    ] [ drop f ] if ;
+
+M: scalar>vector-expr simplify*
+    src>> vn>expr {
+        { [ dup vector>scalar-expr? ] [ src>> ] }
+        [ drop f ]
+    } cond ;
+
+M: shuffle-vector-imm-expr simplify*
+    [ src>> ] [ shuffle>> ] [ rep>> rep-length iota ] tri
+    sequence= [ drop f ] unless ;
+
diff --git a/basis/compiler/cfg/value-numbering/simplify/simplify.factor b/basis/compiler/cfg/value-numbering/simplify/simplify.factor
index df3dc6aab9..7a95711b01 100644
--- a/basis/compiler/cfg/value-numbering/simplify/simplify.factor
+++ b/basis/compiler/cfg/value-numbering/simplify/simplify.factor
@@ -1,7 +1,7 @@
 ! Copyright (C) 2008, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel accessors combinators classes math layouts
-sequences math.vectors.simd.intrinsics
+sequences 
 compiler.cfg.instructions
 compiler.cfg.value-numbering.graph
 compiler.cfg.value-numbering.expressions ;
@@ -130,16 +130,6 @@ M: box-displaced-alien-expr simplify*
         [ 2drop f ]
     } cond ;
 
-M: scalar>vector-expr simplify*
-    src>> vn>expr {
-        { [ dup vector>scalar-expr? ] [ src>> ] }
-        [ drop f ]
-    } cond ;
-
-M: shuffle-vector-imm-expr simplify*
-    [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
-    sequence= [ drop f ] unless ;
-
 M: expr simplify* drop f ;
 
 : simplify ( expr -- vn )
diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
index b404c4d4a4..ac992ff98d 100644
--- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
+++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
@@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit
 accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
 compiler.cfg.ssa.destruction compiler.cfg.loop-detection
 compiler.cfg.representations compiler.cfg assocs vectors arrays
-layouts literals namespaces alien ;
+layouts literals namespaces alien compiler.cfg.value-numbering.simd ;
 IN: compiler.cfg.value-numbering.tests
 
 : trim-temps ( insns -- insns )
diff --git a/basis/math/vectors/simd/mirrors/mirrors.factor b/basis/math/vectors/simd/mirrors/mirrors.factor
new file mode 100644
index 0000000000..e8a103d449
--- /dev/null
+++ b/basis/math/vectors/simd/mirrors/mirrors.factor
@@ -0,0 +1,3 @@
+USING: math.vectors.simd mirrors ;
+IN: math.vectors.simd.mirrors
+INSTANCE: simd-128          enumerated-sequence
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index c155c797ff..4953abb4ea 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -320,7 +320,7 @@ M: simd-128 n+v [ simd-with ] keep v+ ; inline
 M: simd-128 n-v [ simd-with ] keep v- ; inline
 M: simd-128 n*v [ simd-with ] keep v* ; inline
 M: simd-128 n/v [ simd-with ] keep v/ ; inline
-M: simd-128 v+n over simd-with v+
+M: simd-128 v+n over simd-with v+ ; inline
 M: simd-128 v-n over simd-with v- ; inline
 M: simd-128 v*n over simd-with v* ; inline
 M: simd-128 v/n over simd-with v/ ; inline
@@ -336,3 +336,8 @@ M: simd-128 vshuffle ( u perm -- v )
 
 "compiler.tree.propagation.simd" require
 "compiler.cfg.intrinsics.simd" require
+"compiler.cfg.value-numbering.simd" require
+
+"mirrors" vocab [
+    "math.vectors.simd.mirrors" require
+] when
diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor
index f3dfcda18a..d524ba309f 100644
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@@ -97,7 +97,7 @@ M: object vbitor [ bitor ] 2map ;
 GENERIC: vbitxor ( u v -- w )
 M: object vbitxor [ bitxor ] 2map ;
 GENERIC: vbitnot ( u -- w )
-M: object vbitnot [ bitnot ] 2map ;
+M: object vbitnot [ bitnot ] map ;
 
 GENERIC# vbroadcast 1 ( u n -- v )
 M:: object vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
@@ -133,6 +133,7 @@ GENERIC: (vmerge-tail) ( u v -- t )
 M: object (vmerge-tail) over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
 
 GENERIC: (vmerge) ( u v -- h t )
+M: object (vmerge)
     [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline
 
 GENERIC: vmerge ( u v -- w )
diff --git a/basis/specialized-arrays/mirrors/mirrors.factor b/basis/specialized-arrays/mirrors/mirrors.factor
index ee7953b501..eea9e83b58 100644
--- a/basis/specialized-arrays/mirrors/mirrors.factor
+++ b/basis/specialized-arrays/mirrors/mirrors.factor
@@ -4,5 +4,3 @@ USING: mirrors specialized-arrays math.vectors ;
 IN: specialized-arrays.mirrors
 
 INSTANCE: specialized-array enumerated-sequence
-INSTANCE: simd-128          enumerated-sequence
-INSTANCE: simd-256          enumerated-sequence
diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor
index 711354d803..b6f7209cc6 100755
--- a/basis/specialized-arrays/specialized-arrays.factor
+++ b/basis/specialized-arrays/specialized-arrays.factor
@@ -2,8 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien alien.c-types alien.data alien.parser
 assocs byte-arrays classes compiler.units functors kernel lexer
-libc math math.vectors math.vectors.private
-math.vectors.specialization namespaces
+libc math math.vectors math.vectors.private namespaces
 parser prettyprint.custom sequences sequences.private strings
 summary vocabs vocabs.loader vocabs.parser vocabs.generated
 words fry combinators make ;
@@ -69,8 +68,6 @@ TUPLE: A
     [ drop \ T bad-byte-array-length ] unless
     <direct-A> ; inline
 
-M: A new-underlying drop byte-array>A ;
-
 M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
 
 M: A length length>> ; inline
@@ -96,8 +93,6 @@ M: A resize
 
 M: A byte-length length \ T heap-size * ; inline
 
-M: A element-type drop \ T ; inline
-
 M: A direct-array-syntax drop \ A@ ;
 
 M: A pprint-delims drop \ A{ \ } ;
@@ -109,8 +104,6 @@ SYNTAX: A@ scan-object scan-object <direct-A> suffix! ;
 
 INSTANCE: A specialized-array
 
-A T c-type-boxed-class f specialize-vector-words
-
 ;FUNCTOR
 
 GENERIC: (underlying-type) ( c-type -- c-type' )

From 8a8699ac984aed542b748ffe2f7fa1791d8a38f1 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Sat, 14 Nov 2009 20:59:03 -0600
Subject: [PATCH 15/46] backend fixups

---
 .../intrinsics/simd/backend/backend.factor    |  60 ++---
 .../compiler/cfg/intrinsics/simd/simd.factor  | 220 ++++++++++--------
 .../tree/propagation/simd/simd.factor         |   3 +-
 basis/math/vectors/simd/simd-docs.factor      |  37 +--
 basis/math/vectors/simd/simd.factor           |  67 ++++--
 5 files changed, 199 insertions(+), 188 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
index 4fe9774282..90514c6cc9 100644
--- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -1,5 +1,10 @@
 ! (c)2009 Joe Groff bsd license
-USING: accessors fry generalizations kernel locals math sequences
+USING: accessors arrays classes combinators
+combinators.short-circuit compiler.cfg.builder.blocks
+compiler.cfg.registers compiler.cfg.stacks
+compiler.cfg.stacks.local compiler.tree.propagation.info
+cpu.architecture effects fry generalizations help.lint.checks
+kernel locals macros math namespaces quotations sequences
 splitting words ;
 IN: compiler.cfg.intrinsics.simd.backend
 
@@ -8,55 +13,51 @@ IN: compiler.cfg.intrinsics.simd.backend
 : can-has? ( quot -- ? )
     [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
 
-GENERIC: create-can-has-word ( word -- word' )
+GENERIC: create-can-has ( word -- word' )
 
-PREDICATE: vector-op-word
+PREDICATE: vector-op-word < word
     {
         [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ]
-        [ vocabulary>> { "compiler.cfg.intrinsics.simd" "cpu.architecture" } member? ]
+        [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
     } 1&& ;
 
 : reps-word ( word -- word' )
     name>> "^^" ?head drop "##" ?head drop
     "%" "-reps" surround "cpu.architecture" lookup ;
 
-:: can-has-^^-quot ( word def effect -- def' )
+:: can-has-^^-quot ( word def effect -- quot )
     effect in>> { "rep" } split1 [ length ] bi@ 1 +
     word reps-word
     effect out>> length f <array> >quotation
     '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ;
 
-:: can-has-^-quot ( word def effect -- def' )
+:: can-has-^-quot ( word def effect -- quot )
     def create-can-has ;
 
-M: object create-can-has ;
+M: object create-can-has 1quotation ;
 
-M: sequence create-can-has
-    [ create-can-has-word ] map ;
+M: array create-can-has
+    [ create-can-has ] map concat ;
+M: callable create-can-has
+    [ create-can-has ] map concat ;
 
-: (create-can-has-word) ( word -- word' created? )
-    name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend"
-    2dup lookup
-    [ 2nip f ] [ create t ] if* ;
+: (can-has-word) ( word -- word' )
+    name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
 
-: (create-can-has-quot) ( word -- def effect )
-    [ ] [ def>> ] [ stack-effect ] tri [
-        {
-            { [ pick "^^" head? ] [ can-has-^^-quot ] }
-            { [ pick "##" head? ] [ can-has-^^-quot ] }
-            { [ pick "^"  head? ] [ can-has-^-quot  ] }
-        } cond
-    ] keep ;
+: (can-has-quot) ( word -- quot )
+    [ ] [ def>> ] [ stack-effect ] tri {
+        { [ pick name>> "^^" head? ] [ can-has-^^-quot ] }
+        { [ pick name>> "##" head? ] [ can-has-^^-quot ] }
+        { [ pick name>> "^"  head? ] [ can-has-^-quot  ] }
+    } cond ;
 
 M: vector-op-word create-can-has
-    [ (create-can-has-word) ] keep
-    '[ _ (create-can-has-quot) define-declared ]
-    [ nip ] if ;
+    dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
 
 GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
-M:: callable >can-has-cond
+M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
     #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ;
-    
+
 M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
     pair first2 :> ( class quot )
     #pick class #dup quot create-can-has
@@ -113,7 +114,7 @@ CONSTANT: [quaternary]
         -4 inc-d
     ]
 
-:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot ) ;
+:: [emit-vector-op] ( trials params-quot op-quot literal-preds -- quot )
     params-quot trials op-quot literal-preds 
     '[ [ _ dip _ @ ds-push ] _ if-literals-match ] ;
 
@@ -126,10 +127,11 @@ MACRO: emit-vv-vector-op ( trials -- )
 MACRO: emit-vvvv-vector-op ( trials -- )
     [quaternary] [ vvvv-vector-op ] { [ representation? ] } [emit-vector-op] ;
 
-MACRO:: emit-vv-or-vl-vector-op ( trials literal-pred -- )
-    literal-pred trials literal-pred trials
+MACRO:: emit-vv-or-vl-vector-op ( var-trials imm-trials literal-pred -- )
+    literal-pred imm-trials literal-pred var-trials
     '[
         dup node-input-infos 2 tail-slice* first literal>> @
         [ _ _ emit-vl-vector-op ]
         [ _   emit-vv-vector-op ] if 
     ] ;
+
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 1cf076af1d..512df6c129 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -1,18 +1,20 @@
 ! Copyright (C) 2009 Slava Pestov, Joe Groff.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien byte-arrays fry classes.algebra
-cpu.architecture kernel math sequences math.vectors
-math.vectors.simd macros generalizations combinators
-combinators.short-circuit arrays locals
-compiler.tree.propagation.info compiler.cfg.builder.blocks
+USING: accessors alien alien.c-types byte-arrays fry
+classes.algebra cpu.architecture kernel math sequences
+math.vectors math.vectors.simd math.vectors.simd.private
+macros generalizations combinators combinators.short-circuit
+arrays locals compiler.tree.propagation.info
+compiler.cfg.builder.blocks
 compiler.cfg.comparisons
 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
 compiler.cfg.instructions compiler.cfg.registers
+compiler.cfg.intrinsics
 compiler.cfg.intrinsics.alien
 compiler.cfg.intrinsics.simd.backend
 specialized-arrays ;
 FROM: alien.c-types => heap-size char short int longlong float double ;
-SPECIALIZED-ARRAYS: char short int longlong float double ;
+SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
 IN: compiler.cfg.intrinsics.simd
 
 ! compound vector ops
@@ -69,8 +71,14 @@ IN: compiler.cfg.intrinsics.simd
     mask false rep ^^andn-vector
     rep ^^or-vector ;
 
-: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
-    order-cc {
+: ^not-vector ( src rep -- dst )
+    {
+        [ ^^not-vector ]
+        [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
+    } v-vector-op ;
+
+:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
+    cc order-cc {
         { cc<  [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
         { cc<= [ src1 src2 rep ^^min-vector src1 rep cc=  ^^compare-vector ] }
         { cc>  [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
@@ -96,7 +104,7 @@ IN: compiler.cfg.intrinsics.simd
         [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
         reduce
 
-        not? [ rep generate-not-vector ] when
+        not? [ rep ^not-vector ] when
     ] if ;
 
 : ^compare-vector ( src1 src2 rep cc -- dst )
@@ -118,7 +126,7 @@ IN: compiler.cfg.intrinsics.simd
         { signed-int-vector-rep [| src rep |
             src src rep ^^merge-vector-head :> merged
             rep rep-component-type heap-size 8 * :> bits
-            merged bits rep widen-rep ^shr-vector-imm
+            merged bits rep widen-vector-rep ^^shr-vector-imm
         ] }
         { signed-int-vector-rep [| src rep |
             rep ^^zero-vector :> zero
@@ -135,7 +143,7 @@ IN: compiler.cfg.intrinsics.simd
         { signed-int-vector-rep [| src rep |
             src src rep ^^merge-vector-tail :> merged
             rep rep-component-type heap-size 8 * :> bits
-            merged bits rep ^widened-shr-vector-imm
+            merged bits rep widen-vector-rep ^^shr-vector-imm
         ] }
         { signed-int-vector-rep [| src rep |
             rep ^^zero-vector :> zero
@@ -144,7 +152,7 @@ IN: compiler.cfg.intrinsics.simd
         ] }
     } v-vector-op ;
 
-: ^(sum-2) ( src rep -- dst )
+: ^(sum-vector-2) ( src rep -- dst )
     {
         [ dupd ^^horizontal-add-vector ]
         [| src rep | 
@@ -154,7 +162,7 @@ IN: compiler.cfg.intrinsics.simd
         ]
     } v-vector-op ;
 
-: ^(sum-4) ( src rep -- dst )
+: ^(sum-vector-4) ( src rep -- dst )
     {
         [
             [ dupd ^^horizontal-add-vector ]
@@ -165,14 +173,14 @@ IN: compiler.cfg.intrinsics.simd
             src src rep ^^merge-vector-tail :> tail
             head tail rep ^^add-vector :> src'
 
-            rep widen-rep :> rep'
+            rep widen-vector-rep :> rep'
             src' src' rep' ^^merge-vector-head :> head'
             src' src' rep' ^^merge-vector-tail :> tail'
             head' tail' rep ^^add-vector
         ]
     } v-vector-op ;
 
-: ^(sum-8) ( src rep -- dst )
+: ^(sum-vector-8) ( src rep -- dst )
     {
         [
             [ dupd ^^horizontal-add-vector ]
@@ -184,19 +192,19 @@ IN: compiler.cfg.intrinsics.simd
             src src rep ^^merge-vector-tail :> tail
             head tail rep ^^add-vector :> src'
 
-            rep widen-rep :> rep'
+            rep widen-vector-rep :> rep'
             src' src' rep' ^^merge-vector-head :> head'
             src' src' rep' ^^merge-vector-tail :> tail'
             head' tail' rep ^^add-vector :> src''
 
-            rep' widen-rep :> rep''
+            rep' widen-vector-rep :> rep''
             src'' src'' rep'' ^^merge-vector-head :> head''
             src'' src'' rep'' ^^merge-vector-tail :> tail''
             head'' tail'' rep ^^add-vector
         ]
     } v-vector-op ;
 
-: ^(sum-16) ( src rep -- dst )
+: ^(sum-vector-16) ( src rep -- dst )
     {
         [
             {
@@ -211,17 +219,17 @@ IN: compiler.cfg.intrinsics.simd
             src src rep ^^merge-vector-tail :> tail
             head tail rep ^^add-vector :> src'
 
-            rep widen-rep :> rep'
+            rep widen-vector-rep :> rep'
             src' src' rep' ^^merge-vector-head :> head'
             src' src' rep' ^^merge-vector-tail :> tail'
             head' tail' rep ^^add-vector :> src''
 
-            rep' widen-rep :> rep''
+            rep' widen-vector-rep :> rep''
             src'' src'' rep'' ^^merge-vector-head :> head''
             src'' src'' rep'' ^^merge-vector-tail :> tail''
             head'' tail'' rep ^^add-vector :> src'''
 
-            rep'' widen-rep :> rep'''
+            rep'' widen-vector-rep :> rep'''
             src''' src''' rep''' ^^merge-vector-head :> head'''
             src''' src''' rep''' ^^merge-vector-tail :> tail'''
             head''' tail''' rep ^^add-vector
@@ -230,11 +238,11 @@ IN: compiler.cfg.intrinsics.simd
 
 : ^(sum-vector) ( src rep -- dst )
     [
-        rep-length {
-            {  2 [ ^(sum-2) ] }
-            {  4 [ ^(sum-4) ] }
-            {  8 [ ^(sum-8) ] }
-            { 16 [ ^(sum-16) ] }
+        dup rep-length {
+            {  2 [ ^(sum-vector-2) ] }
+            {  4 [ ^(sum-vector-4) ] }
+            {  8 [ ^(sum-vector-8) ] }
+            { 16 [ ^(sum-vector-16) ] }
         } case
     ] [ ^^vector>scalar ] bi ;
 
@@ -244,11 +252,29 @@ IN: compiler.cfg.intrinsics.simd
         { int-vector-rep [| src rep |
             src rep ^unpack-vector-head :> head
             src rep ^unpack-vector-tail :> tail
-            rep widen-rep :> wide-rep
+            rep widen-vector-rep :> wide-rep
             head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
         ] }
     } v-vector-op ;
 
+: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
+
+: ^shuffle-vector-imm ( src1 src2 rep -- dst )
+    {
+        [ ^^shuffle-vector-imm ]
+        [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
+    } vl-vector-op ;
+
+: ^broadcast-vector ( src n rep -- dst )
+    [ rep-length swap <array> ] keep
+    ^shuffle-vector-imm ;
+
+: ^with-vector ( src rep -- dst )
+    [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
+
+: ^select-vector ( src n rep -- dst )
+    [ ^broadcast-vector ] keep ^^vector>scalar ;
+
 ! intrinsic emitters
 
 : emit-simd-v+ ( node -- )
@@ -380,8 +406,7 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-vnot ( node -- )
     {
-        [ ^^not-vector ]
-        [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
+        [ ^not-vector ]
     } emit-v-vector-op ;
 
 : emit-simd-vlshift ( node -- )
@@ -408,12 +433,9 @@ IN: compiler.cfg.intrinsics.simd
         [ ^^horizontal-shr-vector-imm ]
     } [ integer? ] emit-vl-vector-op ;
 
-: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
-
 : emit-simd-vshuffle-elements ( node -- )
     {
-        [ ^^shuffle-vector-imm ]
-        [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
+        [ ^shuffle-vector-imm ]
     } [ shuffle? ] emit-vl-vector-op ;
 
 : emit-simd-vshuffle-bytes ( node -- )
@@ -458,28 +480,28 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-vany? ( node -- )
     {
-        [ vcc-any ^test-vector ]
+        [ vcc-any ^^test-vector ]
     } emit-vv-vector-op ;
 : emit-simd-vall? ( node -- )
     {
-        [ vcc-all ^test-vector ]
+        [ vcc-all ^^test-vector ]
     } emit-vv-vector-op ;
 : emit-simd-vnone? ( node -- )
     {
-        [ vcc-none ^test-vector ]
+        [ vcc-none ^^test-vector ]
     } emit-vv-vector-op ;
 
 : emit-simd-v>float ( node -- )
     {
         { float-vector-rep [ drop ] }
         { int-vector-rep [ ^^integer>float-vector ] }
-    } emit-vv-vector-op ;
+    } emit-v-vector-op ;
 
 : emit-simd-v>integer ( node -- )
     {
         { float-vector-rep [ ^^float>integer-vector ] }
         { int-vector-rep [ dup ] }
-    } emit-vv-vector-op ;
+    } emit-v-vector-op ;
 
 : emit-simd-vpack-signed ( node -- )
     {
@@ -503,7 +525,7 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-with ( node -- )
     {
-        [ ^^with-vector ]
+        [ ^with-vector ]
     } emit-v-vector-op ;
 
 : emit-simd-gather-2 ( node -- )
@@ -518,7 +540,7 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-select ( node -- )
     {
-        [ ^^select-vector ]
+        [ ^select-vector ]
     } [ integer? ] emit-vl-vector-op ;
 
 : emit-alien-vector ( node -- )
@@ -540,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd
         inline-alien
     ] with { [ %alien-vector-reps member? ] } if-literals-match ;
 
-: enable-simd ( -- )
-    {
-        { (simd-v+)                [ emit-simd-v+                  ] }
-        { (simd-v-)                [ emit-simd-v-                  ] }
-        { (simd-vneg)              [ emit-simd-vneg                ] }
-        { (simd-v+-)               [ emit-simd-v+-                 ] }
-        { (simd-vs+)               [ emit-simd-vs+                 ] }
-        { (simd-vs-)               [ emit-simd-vs-                 ] }
-        { (simd-vs*)               [ emit-simd-vs*                 ] }
-        { (simd-v*)                [ emit-simd-v*                  ] }
-        { (simd-v/)                [ emit-simd-v/                  ] }
-        { (simd-vmin)              [ emit-simd-vmin                ] }
-        { (simd-vmax)              [ emit-simd-vmax                ] }
-        { (simd-v.)                [ emit-simd-v.                  ] }
-        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
-        { (simd-sum)               [ emit-simd-sum                 ] }
-        { (simd-vabs)              [ emit-simd-vabs                ] }
-        { (simd-vbitand)           [ emit-simd-vand                ] }
-        { (simd-vbitandn)          [ emit-simd-vandn               ] }
-        { (simd-vbitor)            [ emit-simd-vor                 ] }
-        { (simd-vbitxor)           [ emit-simd-vxor                ] }
-        { (simd-vbitnot)           [ emit-simd-vnot                ] }
-        { (simd-vand)              [ emit-simd-vand                ] }
-        { (simd-vandn)             [ emit-simd-vandn               ] }
-        { (simd-vor)               [ emit-simd-vor                 ] }
-        { (simd-vxor)              [ emit-simd-vxor                ] }
-        { (simd-vnot)              [ emit-simd-vnot                ] }
-        { (simd-vlshift)           [ emit-simd-vlshift             ] }
-        { (simd-vrshift)           [ emit-simd-vrshift             ] }
-        { (simd-hlshift)           [ emit-simd-hlshift             ] }
-        { (simd-hrshift)           [ emit-simd-hrshift             ] }
-        { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
-        { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
-        { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
-        { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
-        { (simd-v<=)               [ emit-simd-v<=                 ] }
-        { (simd-v<)                [ emit-simd-v<                  ] }
-        { (simd-v=)                [ emit-simd-v=                  ] }
-        { (simd-v>)                [ emit-simd-v>                  ] }
-        { (simd-v>=)               [ emit-simd-v>=                 ] }
-        { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
-        { (simd-vany?)             [ emit-simd-vany?               ] }
-        { (simd-vall?)             [ emit-simd-vall?               ] }
-        { (simd-vnone?)            [ emit-simd-vnone?              ] }
-        { (simd-v>float)           [ emit-simd-v>float             ] }
-        { (simd-v>integer)         [ emit-simd-v>integer           ] }
-        { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
-        { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
-        { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
-        { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
-        { (simd-with)              [ emit-simd-with                ] }
-        { (simd-gather-2)          [ emit-simd-gather-2            ] }
-        { (simd-gather-4)          [ emit-simd-gather-4            ] }
-        { (simd-select)            [ emit-simd-select              ] }
-        { alien-vector             [ emit-alien-vector             ] }
-        { set-alien-vector         [ emit-set-alien-vector         ] }
-    } enable-intrinsics ;
-
-enable-simd
+! : enable-simd ( -- )
+!     {
+!         { (simd-v+)                [ emit-simd-v+                  ] }
+!         { (simd-v-)                [ emit-simd-v-                  ] }
+!         { (simd-vneg)              [ emit-simd-vneg                ] }
+!         { (simd-v+-)               [ emit-simd-v+-                 ] }
+!         { (simd-vs+)               [ emit-simd-vs+                 ] }
+!         { (simd-vs-)               [ emit-simd-vs-                 ] }
+!         { (simd-vs*)               [ emit-simd-vs*                 ] }
+!         { (simd-v*)                [ emit-simd-v*                  ] }
+!         { (simd-v/)                [ emit-simd-v/                  ] }
+!         { (simd-vmin)              [ emit-simd-vmin                ] }
+!         { (simd-vmax)              [ emit-simd-vmax                ] }
+!         { (simd-v.)                [ emit-simd-v.                  ] }
+!         { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
+!         { (simd-sum)               [ emit-simd-sum                 ] }
+!         { (simd-vabs)              [ emit-simd-vabs                ] }
+!         { (simd-vbitand)           [ emit-simd-vand                ] }
+!         { (simd-vbitandn)          [ emit-simd-vandn               ] }
+!         { (simd-vbitor)            [ emit-simd-vor                 ] }
+!         { (simd-vbitxor)           [ emit-simd-vxor                ] }
+!         { (simd-vbitnot)           [ emit-simd-vnot                ] }
+!         { (simd-vand)              [ emit-simd-vand                ] }
+!         { (simd-vandn)             [ emit-simd-vandn               ] }
+!         { (simd-vor)               [ emit-simd-vor                 ] }
+!         { (simd-vxor)              [ emit-simd-vxor                ] }
+!         { (simd-vnot)              [ emit-simd-vnot                ] }
+!         { (simd-vlshift)           [ emit-simd-vlshift             ] }
+!         { (simd-vrshift)           [ emit-simd-vrshift             ] }
+!         { (simd-hlshift)           [ emit-simd-hlshift             ] }
+!         { (simd-hrshift)           [ emit-simd-hrshift             ] }
+!         { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
+!         { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
+!         { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
+!         { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
+!         { (simd-v<=)               [ emit-simd-v<=                 ] }
+!         { (simd-v<)                [ emit-simd-v<                  ] }
+!         { (simd-v=)                [ emit-simd-v=                  ] }
+!         { (simd-v>)                [ emit-simd-v>                  ] }
+!         { (simd-v>=)               [ emit-simd-v>=                 ] }
+!         { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
+!         { (simd-vany?)             [ emit-simd-vany?               ] }
+!         { (simd-vall?)             [ emit-simd-vall?               ] }
+!         { (simd-vnone?)            [ emit-simd-vnone?              ] }
+!         { (simd-v>float)           [ emit-simd-v>float             ] }
+!         { (simd-v>integer)         [ emit-simd-v>integer           ] }
+!         { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
+!         { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
+!         { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
+!         { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
+!         { (simd-with)              [ emit-simd-with                ] }
+!         { (simd-gather-2)          [ emit-simd-gather-2            ] }
+!         { (simd-gather-4)          [ emit-simd-gather-4            ] }
+!         { (simd-select)            [ emit-simd-select              ] }
+!         { alien-vector             [ emit-alien-vector             ] }
+!         { set-alien-vector         [ emit-set-alien-vector         ] }
+!     } enable-intrinsics ;
+! 
+! enable-simd
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 1eac88598b..b71a34e938 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors byte-arrays combinators fry sequences
 compiler.tree.propagation.info cpu.architecture kernel words math
-math.intervals math.vectors.simd ;
+math.intervals math.vectors.simd math.vectors.simd.private ;
 IN: compiler.tree.propagation.simd
 
 {
@@ -51,7 +51,6 @@ IN: compiler.tree.propagation.simd
     (simd-gather-2)
     (simd-gather-4)
     alien-vector
-    alien-vector-aligned
 } [ { byte-array } "default-output-classes" set-word-prop ] each
 
 : scalar-output-class ( rep -- class )
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index 2fbe823965..d600b0bc24 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -1,6 +1,6 @@
 USING: classes.tuple.private cpu.architecture help.markup
 help.syntax kernel.private math math.vectors
-math.vectors.simd.intrinsics sequences ;
+sequences ;
 IN: math.vectors.simd
 
 ARTICLE: "math.vectors.simd.intro" "Introduction to SIMD support"
@@ -23,7 +23,7 @@ $nl
 $nl
 "SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
 $nl
-"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
+"SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
 $nl
 "SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
 $nl
@@ -36,26 +36,7 @@ $nl
 ARTICLE: "math.vectors.simd.types" "SIMD vector types"
 "Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
 $nl
-"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
-{ $subsections
-    POSTPONE: SIMD:
-    POSTPONE: SIMDS:
-}
-"The following scalar types are supported:"
-{ $code
-    "char"
-    "uchar"
-    "short"
-    "ushort"
-    "int"
-    "uint"
-    "longlong"
-    "ulonglong"
-    "float"
-    "double"
-}
-
-"The following vector types are generated from the above scalar types:"
+"The following vector types are available:"
 { $code
     "char-16"
     "uchar-16"
@@ -218,16 +199,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
     "math.vectors.simd.intrinsics"
 } ;
 
-HELP: SIMD:
-{ $syntax "SIMD: type" }
-{ $values { "type" "a scalar C type" } }
-{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
-
-HELP: SIMDS:
-{ $syntax "SIMDS: type type type ... ;" }
-{ $values { "type" "a scalar C type" } }
-{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of each " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
-
-{ POSTPONE: SIMD: POSTPONE: SIMDS: } related-words
-
 ABOUT: "math.vectors.simd"
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 4953abb4ea..10305c673a 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,5 +1,9 @@
-! (c)2009 Slava Pestov, Joe Groff bsd license
-USING: math.vectors math.vectors.private ;
+USING: accessors alien.c-types byte-arrays classes combinators
+cpu.architecture fry functors generalizations generic
+generic.parser kernel lexer literals macros math math.functions
+math.vectors math.vectors.private namespaces parser
+prettyprint.custom quotations sequences sequences.private vocabs
+vocabs.loader ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors.simd
 
@@ -8,8 +12,11 @@ DEFER: simd-with
 DEFER: simd-boa
 DEFER: simd-cast
 
-<PRIVATE
+ERROR: bad-simd-call word ;
+ERROR: bad-simd-length got expected ;
 
+<<
+<PRIVATE
 ! Primitive SIMD constructors
 
 GENERIC: new-underlying ( underlying seq -- seq' )
@@ -18,6 +25,10 @@ GENERIC: new-underlying ( underlying seq -- seq' )
     dip new-underlying ; inline
 : change-underlying ( seq quot -- seq' )
     '[ underlying>> @ ] keep new-underlying ; inline
+PRIVATE>
+>>
+
+<PRIVATE
 
 ! SIMD intrinsics
 
@@ -34,18 +45,18 @@ GENERIC: new-underlying ( underlying seq -- seq' )
 : (simd-vmax)              ( a b rep -- c ) \ vmax bad-simd-call ;
 : (simd-v.)                ( a b rep -- n ) \ v. bad-simd-call ;
 : (simd-vsqrt)             ( a   rep -- c ) \ vsqrt bad-simd-call ;
-: (simd-sum)               ( a b rep -- n ) \ sum bad-simd-call ;
+: (simd-sum)               ( a   rep -- n ) \ sum bad-simd-call ;
 : (simd-vabs)              ( a   rep -- c ) \ vabs bad-simd-call ;
 : (simd-vbitand)           ( a b rep -- c ) \ vbitand bad-simd-call ;
 : (simd-vbitandn)          ( a b rep -- c ) \ vbitandn bad-simd-call ;
 : (simd-vbitor)            ( a b rep -- c ) \ vbitor bad-simd-call ;
 : (simd-vbitxor)           ( a b rep -- c ) \ vbitxor bad-simd-call ;
-: (simd-vbitnot)           ( a b rep -- c ) \ vbitnot bad-simd-call ;
+: (simd-vbitnot)           ( a   rep -- c ) \ vbitnot bad-simd-call ;
 : (simd-vand)              ( a b rep -- c ) \ vand bad-simd-call ;
 : (simd-vandn)             ( a b rep -- c ) \ vandn bad-simd-call ;
 : (simd-vor)               ( a b rep -- c ) \ vor bad-simd-call ;
 : (simd-vxor)              ( a b rep -- c ) \ vxor bad-simd-call ;
-: (simd-vnot)              ( a b rep -- c ) \ vnot bad-simd-call ;
+: (simd-vnot)              ( a   rep -- c ) \ vnot bad-simd-call ;
 : (simd-vlshift)           ( a n rep -- c ) \ vlshift bad-simd-call ;
 : (simd-vrshift)           ( a n rep -- c ) \ vrshift bad-simd-call ;
 : (simd-hlshift)           ( a n rep -- c ) \ hlshift bad-simd-call ;
@@ -74,9 +85,13 @@ GENERIC: new-underlying ( underlying seq -- seq' )
 : (simd-gather-4)          ( m n o p rep -- v ) \ simd-boa bad-simd-call ;
 : (simd-select)            ( a n rep -- n ) \ nth bad-simd-call ;
 
+PRIVATE>
+
 : alien-vector     ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
 : set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
 
+<PRIVATE
+
 ! Helper for boolean vector literals
 
 : vector-true-value ( class -- value )
@@ -102,10 +117,11 @@ TUPLE: simd-128
 GENERIC: simd-element-type ( obj -- c-type )
 GENERIC: simd-rep ( simd -- rep )
 
+<<
 : rep-length ( rep -- n )
     16 swap rep-component-type heap-size /i ; foldable
 
-<< <PRIVATE
+<PRIVATE
 
 ! SIMD concrete type functor
 
@@ -161,9 +177,11 @@ c:<c-type>
 ;FUNCTOR
 
 SYNTAX: SIMD-128:
-    scan scan-word define-simd-128 ;
+    scan define-simd-128 ;
 
-PRIVATE> >>
+PRIVATE>
+
+>>
 
 SIMD-128: char-16
 SIMD-128: uchar-16
@@ -176,16 +194,14 @@ SIMD-128: ulonglong-2
 SIMD-128: float-4
 SIMD-128: double-2
 
-ERROR: bad-simd-call word ;
-ERROR: bad-simd-length got expected ;
-
 : assert-positive ( x -- y ) ;
 
 ! SIMD vectors as sequences
 
+M: simd-128 hashcode* underlying>> hashcode* ; inline
 M: simd-128 clone [ clone ] change-underlying ; inline
 M: simd-128 length simd-rep rep-length ; inline
-M: simd-128 nth-unsafe tuck simd-rep (simd-select) ; inline
+M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline
 M: simd-128 c:byte-length drop 16 ; inline
 
 M: simd-128 new-sequence
@@ -193,16 +209,13 @@ M: simd-128 new-sequence
     [ nip [ 16 (byte-array) ] make-underlying ]
     [ length bad-simd-length ] if ; inline
 
-M: simd-128 equal?
-    [ v= vall? ] [ 2drop f ] if-vectors-match ; inline
-
 M: simd-128 >pprint-sequence ;
 M: simd-128 pprint* pprint-object ;
 
 INSTANCE: simd-128 sequence
 
 ! Unboxers for SIMD operations
-
+<<
 <PRIVATE
 
 : if-both-vectors ( a b t f -- )
@@ -221,6 +234,9 @@ INSTANCE: simd-128 sequence
 : simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c )
     [ simd-unbox ] dip 2curry make-underlying ; inline
 
+: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c )
+    [ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline
+
 : simd-v->n-op ( a quot: ( (a) rep -- n ) -- n )
     [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline
 
@@ -241,7 +257,7 @@ INSTANCE: simd-128 sequence
     [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline
 
 : (simd-method-fallback) ( accum word -- accum )
-    [ current-method get \ (call-next-method) [ ] 2sequence suffix! ]
+    [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ]
     dip suffix! ; 
 
 SYNTAX: simd-vv->v-op
@@ -252,6 +268,10 @@ SYNTAX: simd-vv->n-op
     \ (simd-vv->n-op) (simd-method-fallback) ; 
 
 PRIVATE>
+>>
+
+M: simd-128 equal?
+    [ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline
 
 ! SIMD constructors
 
@@ -283,26 +303,26 @@ M: simd-128 vmin               [ (simd-vmin)               ] simd-vv->v-op ; inl
 M: simd-128 vmax               [ (simd-vmax)               ] simd-vv->v-op ; inline
 M: simd-128 v.                 [ (simd-v.)                 ] simd-vv->n-op ; inline
 M: simd-128 vsqrt              [ (simd-vsqrt)              ] simd-v->v-op  ; inline
-M: simd-128 sum                [ (simd-sum)                ] simd-vv->n-op ; inline
+M: simd-128 sum                [ (simd-sum)                ] simd-v->n-op  ; inline
 M: simd-128 vabs               [ (simd-vabs)               ] simd-v->v-op  ; inline
 M: simd-128 vbitand            [ (simd-vbitand)            ] simd-vv->v-op ; inline
 M: simd-128 vbitandn           [ (simd-vbitandn)           ] simd-vv->v-op ; inline
 M: simd-128 vbitor             [ (simd-vbitor)             ] simd-vv->v-op ; inline
 M: simd-128 vbitxor            [ (simd-vbitxor)            ] simd-vv->v-op ; inline
-M: simd-128 vbitnot            [ (simd-vbitnot)            ] simd-vv->v-op ; inline
+M: simd-128 vbitnot            [ (simd-vbitnot)            ] simd-v->v-op  ; inline
 M: simd-128 vand               [ (simd-vand)               ] simd-vv->v-op ; inline
 M: simd-128 vandn              [ (simd-vandn)              ] simd-vv->v-op ; inline
 M: simd-128 vor                [ (simd-vor)                ] simd-vv->v-op ; inline
 M: simd-128 vxor               [ (simd-vxor)               ] simd-vv->v-op ; inline
-M: simd-128 vnot               [ (simd-vnot)               ] simd-vv->v-op ; inline
+M: simd-128 vnot               [ (simd-vnot)               ] simd-v->v-op  ; inline
 M: simd-128 vlshift            [ (simd-vlshift)            ] simd-vn->v-op ; inline
 M: simd-128 vrshift            [ (simd-vrshift)            ] simd-vn->v-op ; inline
 M: simd-128 hlshift            [ (simd-hlshift)            ] simd-vn->v-op ; inline
 M: simd-128 hrshift            [ (simd-hrshift)            ] simd-vn->v-op ; inline
 M: simd-128 vshuffle-elements  [ (simd-vshuffle-elements)  ] simd-vn->v-op ; inline
 M: simd-128 vshuffle-bytes     [ (simd-vshuffle-bytes)     ] simd-vv->v-op ; inline
-M: simd-128 vmerge-head        [ (simd-vmerge-head)        ] simd-vv->v-op ; inline
-M: simd-128 vmerge-tail        [ (simd-vmerge-tail)        ] simd-vv->v-op ; inline
+M: simd-128 (vmerge-head)      [ (simd-vmerge-head)        ] simd-vv->v-op ; inline
+M: simd-128 (vmerge-tail)      [ (simd-vmerge-tail)        ] simd-vv->v-op ; inline
 M: simd-128 v<=                [ (simd-v<=)                ] simd-vv->v-op ; inline
 M: simd-128 v<                 [ (simd-v<)                 ] simd-vv->v-op ; inline
 M: simd-128 v=                 [ (simd-v=)                 ] simd-vv->v-op ; inline
@@ -326,7 +346,6 @@ M: simd-128 v*n over simd-with v* ; inline
 M: simd-128 v/n over simd-with v/ ; inline
 M: simd-128 norm-sq dup v. assert-positive ; inline
 M: simd-128 norm      norm-sq sqrt ; inline
-M: simd-128 normalize dup norm v/n ; inline
 M: simd-128 distance  v- norm ; inline
 
 ! misc

From 243e5b43cebc7794220cb88ba4487fb75960a290 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Sat, 14 Nov 2009 22:25:00 -0600
Subject: [PATCH 16/46] fixes for stronger stack checker

---
 basis/math/vectors/simd/simd.factor | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 10305c673a..c02c713b48 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,9 +1,9 @@
-USING: accessors alien.c-types byte-arrays classes combinators
-cpu.architecture fry functors generalizations generic
+USING: accessors alien.c-types arrays byte-arrays classes combinators
+cpu.architecture effects fry functors generalizations generic
 generic.parser kernel lexer literals macros math math.functions
 math.vectors math.vectors.private namespaces parser
 prettyprint.custom quotations sequences sequences.private vocabs
-vocabs.loader ;
+vocabs.loader words ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors.simd
 
@@ -140,6 +140,8 @@ N     [ A-rep rep-length ]
 
 SET-NTH [ ELT dup c:c-setter c:array-accessor ]
 
+BOA-EFFECT [ N "n" <repetition> >array { "v" } <effect> ]
+
 WHERE
 
 TUPLE: A < simd-128 ;
@@ -159,10 +161,11 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline
 
 : A-with ( n -- v ) \ A new simd-with ; inline
 : A-cast ( v -- v' ) \ A new simd-cast ; inline
-: A-boa ( ...n -- v ) \ A new simd-boa ; inline
 
-M: A pprint-delims drop \ A{ \ } ;
-SYNTAX: A{ \ } [ >A ] parse-literal ;
+\ A-boa { \ A simd-boa } >quotation BOA-EFFECT define-inline
+
+! M: A pprint-delims drop \ A{ \ } ;
+! SYNTAX: A{ \ } [ >A ] parse-literal ;
 
 c:<c-type>
     byte-array >>class
@@ -209,8 +212,8 @@ M: simd-128 new-sequence
     [ nip [ 16 (byte-array) ] make-underlying ]
     [ length bad-simd-length ] if ; inline
 
-M: simd-128 >pprint-sequence ;
-M: simd-128 pprint* pprint-object ;
+! M: simd-128 >pprint-sequence ;
+! M: simd-128 pprint* pprint-object ;
 
 INSTANCE: simd-128 sequence
 
@@ -278,11 +281,11 @@ M: simd-128 equal?
 : simd-with ( n seq -- v )
     [ (simd-with) ] simd-construct-op ; inline
 
-MACRO: simd-boa ( seq -- )
-    dup length {
-        { 2 [ '[ _ dup [ (simd-gather-2) ] simd-construct-op ] ] }
-        { 4 [ '[ _ dup [ (simd-gather-4) ] simd-construct-op ] ] }
-        [ '[ _ _ nsequence ] ]
+MACRO: simd-boa ( class -- )
+    new dup length {
+        { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] }
+        { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] }
+        [ swap '[ _ _ nsequence ] ]
     } case ;
 
 : simd-cast ( v seq -- v' )

From d56afe9c3d45c11a076e93732b1c5d65b82f0914 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Sat, 14 Nov 2009 23:43:22 -0600
Subject: [PATCH 17/46] compilation fixes

---
 .../intrinsics/simd/backend/backend.factor    | 25 +++++++++++++------
 .../compiler/cfg/intrinsics/simd/simd.factor  |  8 +++---
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
index 90514c6cc9..8f9fa801e2 100644
--- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -13,6 +13,9 @@ IN: compiler.cfg.intrinsics.simd.backend
 : can-has? ( quot -- ? )
     [ t \ can-has? ] dip '[ @ drop \ can-has? get ] with-variable ; inline
 
+: can-has-rep? ( rep reps -- )
+    member? \ can-has? [ and ] change ; inline
+
 GENERIC: create-can-has ( word -- word' )
 
 PREDICATE: vector-op-word < word
@@ -27,19 +30,22 @@ PREDICATE: vector-op-word < word
 
 :: can-has-^^-quot ( word def effect -- quot )
     effect in>> { "rep" } split1 [ length ] bi@ 1 +
-    word reps-word
+    word reps-word 1quotation
     effect out>> length f <array> >quotation
-    '[ [ _ ndrop ] _ ndip _ execute member? \ can-has? [ and ] change @ ] ;
+    '[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ;
 
 :: can-has-^-quot ( word def effect -- quot )
-    def create-can-has ;
+    def create-can-has first ;
+
+: map-concat-like ( seq quot -- seq' )
+    '[ _ map ] [ concat-as ] bi ; inline
 
 M: object create-can-has 1quotation ;
 
 M: array create-can-has
-    [ create-can-has ] map concat ;
+    [ create-can-has ] map-concat-like 1quotation ;
 M: callable create-can-has
-    [ create-can-has ] map concat ;
+    [ create-can-has ] map-concat-like 1quotation ;
 
 : (can-has-word) ( word -- word' )
     name>> "can-has-" prepend "compiler.cfg.intrinsics.simd.backend" lookup ;
@@ -56,12 +62,12 @@ M: vector-op-word create-can-has
 
 GENERIC# >can-has-cond 2 ( quot #pick #dup -- quotpair )
 M:: callable >can-has-cond ( quot #pick #dup -- quotpair )
-    #dup quot create-can-has '[ _ ndup _ can-has? ] quot 2array ;
+    #dup quot create-can-has '[ _ ndup @ can-has? ] quot 2array ;
 
 M:: pair >can-has-cond ( pair #pick #dup -- quotpair )
     pair first2 :> ( class quot )
     #pick class #dup quot create-can-has
-    '[ _ npick _ instance? [ _ ndup _ can-has? ] dip and ]
+    '[ _ npick _ instance? [ _ ndup @ can-has? ] dip and ]
     quot 2array ;
 
 MACRO: v-vector-op ( trials -- )
@@ -82,6 +88,11 @@ MACRO: vvvv-vector-op ( trials -- )
     \ can-has? [ and ] change
     f ;
 
+: can-has-^^test-vector ( src rep vcc -- dst )
+    [ drop ] 2dip drop %test-vector-reps member?
+    \ can-has? [ and ] change
+    f ;
+
 ! Intrinsic code emission
 
 MACRO: if-literals-match ( quots -- )
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 512df6c129..5130ff36b7 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -481,15 +481,15 @@ IN: compiler.cfg.intrinsics.simd
 : emit-simd-vany? ( node -- )
     {
         [ vcc-any ^^test-vector ]
-    } emit-vv-vector-op ;
+    } emit-v-vector-op ;
 : emit-simd-vall? ( node -- )
     {
         [ vcc-all ^^test-vector ]
-    } emit-vv-vector-op ;
+    } emit-v-vector-op ;
 : emit-simd-vnone? ( node -- )
     {
         [ vcc-none ^^test-vector ]
-    } emit-vv-vector-op ;
+    } emit-v-vector-op ;
 
 : emit-simd-v>float ( node -- )
     {
@@ -500,7 +500,7 @@ IN: compiler.cfg.intrinsics.simd
 : emit-simd-v>integer ( node -- )
     {
         { float-vector-rep [ ^^float>integer-vector ] }
-        { int-vector-rep [ dup ] }
+        { int-vector-rep [ drop ] }
     } emit-v-vector-op ;
 
 : emit-simd-vpack-signed ( node -- )

From cd2cf91b95f35e9bb3f99e7534454a6761b520cd Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 17 Nov 2009 06:53:22 -0600
Subject: [PATCH 18/46] start on tests for simd intrinsics

---
 .../cfg/intrinsics/simd/simd-tests.factor     | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 basis/compiler/cfg/intrinsics/simd/simd-tests.factor

diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
new file mode 100644
index 0000000000..c7d999f029
--- /dev/null
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@@ -0,0 +1,112 @@
+! (c)2009 Joe Groff bsd license
+USING: assocs biassocs byte-arrays byte-arrays.hex classes
+compiler.cfg.instructions compiler.cfg.intrinsics.simd
+compiler.cfg.registers compiler.cfg.stacks.local compiler.tree
+compiler.tree.propagation.info cpu.architecture fry kernel
+locals make namespaces sequences system tools.test words ;
+IN: compiler.cfg.intrinsics.simd.tests
+
+:: 1test-node ( rep    -- node  ) 
+    T{ #call
+        { in-d  { 1 2 3 4 } }
+        { out-d { 5 } }
+        { info H{
+            { 1 T{ value-info { class byte-array } } }
+            { 2 T{ value-info { class byte-array } } }
+            { 3 T{ value-info { class byte-array } } }
+            { 4 T{ value-info { class word } { literal? t } { literal rep } } }
+            { 5 T{ value-info { class byte-array } } }
+        } }
+    } ;
+:: 2test-node ( rep cc -- node )
+    T{ #call
+        { in-d  { 1 2 3 4 5 } }
+        { out-d { 6 } }
+        { info H{
+            { 1 T{ value-info { class byte-array } } }
+            { 2 T{ value-info { class byte-array } } }
+            { 3 T{ value-info { class byte-array } } }
+            { 4 T{ value-info { class word } { literal? t } { literal rep } } }
+            { 5 T{ value-info { class word } { literal? t } { literal cc  } } }
+            { 6 T{ value-info { class byte-array } } }
+        } }
+    } ;
+
+: test-compiler-env ( -- x )
+    H{ } clone
+        T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at
+        H{ } clone \ local-peek-set pick set-at
+        H{ } clone \ replace-mapping pick set-at
+        H{ } <biassoc> \ locs>vregs pick set-at ;
+
+: make-classes ( quot -- seq )
+    { } make [ class ] map ; inline
+
+: 1test-emit ( cpu rep quot -- node )
+    [
+        [ new cpu ] 2dip '[
+            test-compiler-env [ _ 1test-node @ ] bind
+        ] with-variable
+    ] make-classes ; inline
+
+: 2test-emit ( cpu rep cc quot -- node )
+    [
+        [ new cpu ] 3dip '[
+            test-compiler-env [ _ _ 2test-node @ ] bind
+        ] with-variable
+    ] make-classes ; inline
+
+TUPLE: scalar-cpu ;
+
+TUPLE: simple-ops-cpu ;
+M: simple-ops-cpu %zero-vector-reps { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %add-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %sub-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %mul-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %div-vector-reps  {           float-4-rep } ;
+
+! v+
+[ { ##add-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] 1test-emit ]
+unit-test
+
+! v-
+[ { ##sub-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v- ] 1test-emit ]
+unit-test
+
+! vneg
+[ { ##load-constant ##sub-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] 1test-emit ]
+unit-test
+
+[ { ##zero-vector ##sub-vector } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] 1test-emit ]
+unit-test
+
+! v*
+[ { ##mul-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v* ] 1test-emit ]
+unit-test
+
+! v/
+[ { ##div-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] 1test-emit ]
+unit-test
+
+TUPLE: addsub-cpu < simple-ops-cpu ;
+M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
+
+! v+-
+[ { ##add-sub-vector } ]
+[ addsub-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ]
+unit-test
+
+[ { ##load-constant ##xor-vector ##add-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ]
+unit-test
+
+[ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] 1test-emit ]
+unit-test
+

From f545c5d3e5c812a968b0a5bd4c8514567b50ae63 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 18 Nov 2009 12:36:41 -0800
Subject: [PATCH 19/46] properly handle -vector-op and case words in
 simd.backend

---
 .../intrinsics/simd/backend/backend.factor    | 64 +++++++++++++++++--
 .../cfg/intrinsics/simd/simd-tests.factor     |  9 ++-
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
index 8f9fa801e2..f2ba9af41d 100644
--- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -1,11 +1,11 @@
 ! (c)2009 Joe Groff bsd license
-USING: accessors arrays classes combinators
+USING: accessors arrays assocs classes combinators
 combinators.short-circuit compiler.cfg.builder.blocks
 compiler.cfg.registers compiler.cfg.stacks
 compiler.cfg.stacks.local compiler.tree.propagation.info
 cpu.architecture effects fry generalizations help.lint.checks
 kernel locals macros math namespaces quotations sequences
-splitting words ;
+splitting stack-checker words ;
 IN: compiler.cfg.intrinsics.simd.backend
 
 ! Selection of implementation based on available CPU instructions
@@ -18,20 +18,25 @@ IN: compiler.cfg.intrinsics.simd.backend
 
 GENERIC: create-can-has ( word -- word' )
 
-PREDICATE: vector-op-word < word
+PREDICATE: hat-word < word
     {
-        [ name>> { [ { [ "^" head? ] [ "##" head? ] } 1|| ] [ "-vector" swap subseq? ] } 1&& ]
+        [ name>> { [ "^" head? ] [ "##" head? ] } 1|| ]
         [ vocabulary>> { "compiler.cfg.intrinsics.simd" "compiler.cfg.hats" } member? ]
     } 1&& ;
 
+PREDICATE: vector-op-word < hat-word
+    name>> "-vector" swap subseq? ;
+
 : reps-word ( word -- word' )
     name>> "^^" ?head drop "##" ?head drop
     "%" "-reps" surround "cpu.architecture" lookup ;
 
+SYMBOL: blub
+
 :: can-has-^^-quot ( word def effect -- quot )
     effect in>> { "rep" } split1 [ length ] bi@ 1 +
     word reps-word 1quotation
-    effect out>> length f <array> >quotation
+    effect out>> length blub <array> >quotation
     '[ [ _ ndrop ] _ ndip @ can-has-rep? @ ] ;
 
 :: can-has-^-quot ( word def effect -- quot )
@@ -57,6 +62,17 @@ M: callable create-can-has
         { [ pick name>> "^"  head? ] [ can-has-^-quot  ] }
     } cond ;
 
+: (can-has-nop-quot) ( word -- quot )
+    stack-effect in>> length '[ _ ndrop blub ] ;
+
+DEFER: can-has-words
+
+M: word create-can-has
+    can-has-words ?at drop 1quotation ;
+
+M: hat-word create-can-has
+    (can-has-nop-quot) ;
+
 M: vector-op-word create-can-has
     dup (can-has-word) [ 1quotation ] [ (can-has-quot) ] ?if ;
 
@@ -86,12 +102,46 @@ MACRO: vvvv-vector-op ( trials -- )
 : can-has-^(compare-vector) ( src1 src2 rep cc -- dst )
     [ 2drop ] 2dip %compare-vector-reps member?
     \ can-has? [ and ] change
-    f ;
+    blub ;
 
 : can-has-^^test-vector ( src rep vcc -- dst )
     [ drop ] 2dip drop %test-vector-reps member?
     \ can-has? [ and ] change
-    f ;
+    blub ;
+
+MACRO: can-has-case ( cases -- )
+    dup first second infer in>> length 1 +
+    '[ _ ndrop f ] suffix '[ _ case ] ;
+
+GENERIC# >can-has-trial 1 ( obj #pick -- quot )
+
+M: callable >can-has-trial
+    drop '[ _ can-has? ] ;
+M: pair >can-has-trial
+    swap first2 dup infer in>> length
+    '[ _ npick _ instance? [ _ can-has? ] [ _ ndrop blub ] if ] ; 
+
+MACRO: can-has-vector-op ( trials #pick #dup -- )
+    [ '[ _ >can-has-trial ] map ] dip '[ _ _ n|| \ can-has? [ and ] change blub ] ;
+
+: can-has-v-vector-op ( trials -- ? )
+    1 2 can-has-vector-op ; inline
+: can-has-vv-vector-op ( trials -- ? )
+    1 3 can-has-vector-op ; inline
+: can-has-vv-cc-vector-op ( trials -- ? )
+    2 4 can-has-vector-op ; inline
+: can-has-vvvv-vector-op ( trials -- ? )
+    1 5 can-has-vector-op ; inline
+
+CONSTANT: can-has-words
+    H{
+        { case can-has-case }
+        { v-vector-op     can-has-v-vector-op  }
+        { vl-vector-op    can-has-vv-vector-op }
+        { vv-vector-op    can-has-vv-vector-op }
+        { vv-cc-vector-op can-has-vv-cc-vector-op }
+        { vvvv-vector-op  can-has-vvvv-vector-op }
+    }
 
 ! Intrinsic code emission
 
diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
index c7d999f029..fadabbe604 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@@ -44,14 +44,14 @@ IN: compiler.cfg.intrinsics.simd.tests
 
 : 1test-emit ( cpu rep quot -- node )
     [
-        [ new cpu ] 2dip '[
+        [ new \ cpu ] 2dip '[
             test-compiler-env [ _ 1test-node @ ] bind
         ] with-variable
     ] make-classes ; inline
 
 : 2test-emit ( cpu rep cc quot -- node )
     [
-        [ new cpu ] 3dip '[
+        [ new \ cpu ] 3dip '[
             test-compiler-env [ _ _ 2test-node @ ] bind
         ] with-variable
     ] make-classes ; inline
@@ -64,6 +64,11 @@ M: simple-ops-cpu %add-vector-reps  { int-4-rep float-4-rep } ;
 M: simple-ops-cpu %sub-vector-reps  { int-4-rep float-4-rep } ;
 M: simple-ops-cpu %mul-vector-reps  { int-4-rep float-4-rep } ;
 M: simple-ops-cpu %div-vector-reps  {           float-4-rep } ;
+M: simple-ops-cpu %not-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %andn-vector-reps { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %and-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %or-vector-reps   { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %xor-vector-reps  { int-4-rep float-4-rep } ;
 
 ! v+
 [ { ##add-vector } ]

From 658387505525b6a4fb9a741f72e0f73a1dbca2de Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 18 Nov 2009 14:39:39 -0800
Subject: [PATCH 20/46] tests for all simd intrinsics

---
 .../cfg/intrinsics/simd/simd-tests.factor     | 459 ++++++++++++++++--
 basis/cpu/architecture/architecture.factor    |   5 +
 2 files changed, 430 insertions(+), 34 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
index fadabbe604..27b3b38b01 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@@ -1,12 +1,13 @@
 ! (c)2009 Joe Groff bsd license
-USING: assocs biassocs byte-arrays byte-arrays.hex classes
-compiler.cfg.instructions compiler.cfg.intrinsics.simd
-compiler.cfg.registers compiler.cfg.stacks.local compiler.tree
+USING: arrays assocs biassocs byte-arrays byte-arrays.hex
+classes compiler.cfg.comparisons compiler.cfg.instructions
+compiler.cfg.intrinsics.simd compiler.cfg.registers
+compiler.cfg.stacks.local compiler.tree
 compiler.tree.propagation.info cpu.architecture fry kernel
 locals make namespaces sequences system tools.test words ;
 IN: compiler.cfg.intrinsics.simd.tests
 
-:: 1test-node ( rep    -- node  ) 
+:: test-node ( rep -- node ) 
     T{ #call
         { in-d  { 1 2 3 4 } }
         { out-d { 5 } }
@@ -18,17 +19,18 @@ IN: compiler.cfg.intrinsics.simd.tests
             { 5 T{ value-info { class byte-array } } }
         } }
     } ;
-:: 2test-node ( rep cc -- node )
+
+:: test-node-literal ( lit rep -- node )
+    lit class :> lit-class
     T{ #call
-        { in-d  { 1 2 3 4 5 } }
-        { out-d { 6 } }
+        { in-d  { 1 2 3 4 } }
+        { out-d { 5 } }
         { info H{
             { 1 T{ value-info { class byte-array } } }
             { 2 T{ value-info { class byte-array } } }
-            { 3 T{ value-info { class byte-array } } }
+            { 3 T{ value-info { class lit-class } { literal? t } { literal lit } } }
             { 4 T{ value-info { class word } { literal? t } { literal rep } } }
-            { 5 T{ value-info { class word } { literal? t } { literal cc  } } }
-            { 6 T{ value-info { class byte-array } } }
+            { 5 T{ value-info { class byte-array } } }
         } }
     } ;
 
@@ -37,66 +39,85 @@ IN: compiler.cfg.intrinsics.simd.tests
         T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at
         H{ } clone \ local-peek-set pick set-at
         H{ } clone \ replace-mapping pick set-at
-        H{ } <biassoc> \ locs>vregs pick set-at ;
+        H{ } <biassoc> \ locs>vregs pick set-at
+        H{ } clone \ peek-sets pick set-at
+        H{ } clone \ replace-sets pick set-at
+        H{ } clone \ kill-sets pick set-at ;
 
 : make-classes ( quot -- seq )
     { } make [ class ] map ; inline
 
-: 1test-emit ( cpu rep quot -- node )
+: test-emit ( cpu rep quot -- node )
     [
         [ new \ cpu ] 2dip '[
-            test-compiler-env [ _ 1test-node @ ] bind
+            test-compiler-env [ _ test-node @ ] bind
         ] with-variable
     ] make-classes ; inline
 
-: 2test-emit ( cpu rep cc quot -- node )
+: test-emit-literal ( cpu lit rep quot -- node )
     [
         [ new \ cpu ] 3dip '[
-            test-compiler-env [ _ _ 2test-node @ ] bind
+            test-compiler-env [ _ _ test-node-literal @ ] bind
         ] with-variable
     ] make-classes ; inline
 
+CONSTANT: signed-reps
+    { char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
+CONSTANT: all-reps
+    {
+        char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep
+        uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep
+    }
+
 TUPLE: scalar-cpu ;
 
 TUPLE: simple-ops-cpu ;
-M: simple-ops-cpu %zero-vector-reps { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %add-vector-reps  { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %sub-vector-reps  { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %mul-vector-reps  { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %div-vector-reps  {           float-4-rep } ;
-M: simple-ops-cpu %not-vector-reps  { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %andn-vector-reps { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %and-vector-reps  { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %or-vector-reps   { int-4-rep float-4-rep } ;
-M: simple-ops-cpu %xor-vector-reps  { int-4-rep float-4-rep } ;
+M: simple-ops-cpu %zero-vector-reps  all-reps ;
+M: simple-ops-cpu %fill-vector-reps  all-reps ;
+M: simple-ops-cpu %add-vector-reps   all-reps ;
+M: simple-ops-cpu %sub-vector-reps   all-reps ;
+M: simple-ops-cpu %mul-vector-reps   all-reps ;
+M: simple-ops-cpu %div-vector-reps   all-reps ;
+M: simple-ops-cpu %andn-vector-reps  all-reps ;
+M: simple-ops-cpu %and-vector-reps   all-reps ;
+M: simple-ops-cpu %or-vector-reps    all-reps ;
+M: simple-ops-cpu %xor-vector-reps   all-reps ;
+M: simple-ops-cpu %merge-vector-reps all-reps ;
+M: simple-ops-cpu %sqrt-vector-reps  all-reps ;
+M: simple-ops-cpu %test-vector-reps  all-reps ;
+M: simple-ops-cpu %signed-pack-vector-reps   all-reps ;
+M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
+M: simple-ops-cpu %gather-vector-2-reps { longlong-2-rep ulonglong-2-rep double-2-rep } ;
+M: simple-ops-cpu %gather-vector-4-reps { int-4-rep uint-4-rep float-4-rep } ;
+M: simple-ops-cpu %alien-vector-reps all-reps ;
 
 ! v+
 [ { ##add-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v+ ] test-emit ]
 unit-test
 
 ! v-
 [ { ##sub-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-v- ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v- ] test-emit ]
 unit-test
 
 ! vneg
 [ { ##load-constant ##sub-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vneg ] test-emit ]
 unit-test
 
 [ { ##zero-vector ##sub-vector } ]
-[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] 1test-emit ]
+[ simple-ops-cpu int-4-rep [ emit-simd-vneg ] test-emit ]
 unit-test
 
 ! v*
 [ { ##mul-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-v* ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v* ] test-emit ]
 unit-test
 
 ! v/
 [ { ##div-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v/ ] test-emit ]
 unit-test
 
 TUPLE: addsub-cpu < simple-ops-cpu ;
@@ -104,14 +125,384 @@ M: addsub-cpu %add-sub-vector-reps { int-4-rep float-4-rep } ;
 
 ! v+-
 [ { ##add-sub-vector } ]
-[ addsub-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ]
+[ addsub-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
 unit-test
 
 [ { ##load-constant ##xor-vector ##add-vector } ]
-[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] 1test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v+- ] test-emit ]
 unit-test
 
 [ { ##load-constant ##xor-vector ##sub-vector ##add-vector } ]
-[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] 1test-emit ]
+[ simple-ops-cpu int-4-rep [ emit-simd-v+- ] test-emit ]
 unit-test
 
+TUPLE: saturating-cpu < simple-ops-cpu ;
+M: saturating-cpu %saturated-add-vector-reps { int-4-rep } ;
+M: saturating-cpu %saturated-sub-vector-reps { int-4-rep } ;
+M: saturating-cpu %saturated-mul-vector-reps { int-4-rep } ;
+
+! vs+
+[ { ##add-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
+unit-test
+
+[ { ##add-vector } ]
+[ saturating-cpu float-4-rep [ emit-simd-vs+ ] test-emit ]
+unit-test
+
+[ { ##saturated-add-vector } ]
+[ saturating-cpu int-4-rep [ emit-simd-vs+ ] test-emit ]
+unit-test
+
+! vs-
+[ { ##sub-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
+unit-test
+
+[ { ##sub-vector } ]
+[ saturating-cpu float-4-rep [ emit-simd-vs- ] test-emit ]
+unit-test
+
+[ { ##saturated-sub-vector } ]
+[ saturating-cpu int-4-rep [ emit-simd-vs- ] test-emit ]
+unit-test
+
+! vs*
+[ { ##mul-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
+unit-test
+
+[ { ##mul-vector } ]
+[ saturating-cpu float-4-rep [ emit-simd-vs* ] test-emit ]
+unit-test
+
+[ { ##saturated-mul-vector } ]
+[ saturating-cpu int-4-rep [ emit-simd-vs* ] test-emit ]
+unit-test
+
+TUPLE: minmax-cpu < simple-ops-cpu ;
+M: minmax-cpu %min-vector-reps signed-reps ;
+M: minmax-cpu %max-vector-reps signed-reps ;
+M: minmax-cpu %compare-vector-reps { cc= cc/= } member? [ signed-reps ] [ { } ] if ;
+M: minmax-cpu %compare-vector-ccs nip f 2array 1array f ;
+
+TUPLE: compare-cpu < simple-ops-cpu ;
+M: compare-cpu %compare-vector-reps drop signed-reps ;
+M: compare-cpu %compare-vector-ccs nip f 2array 1array f ;
+
+! vmin
+[ { ##min-vector } ]
+[ minmax-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
+unit-test
+
+[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
+[ compare-cpu float-4-rep [ emit-simd-vmin ] test-emit ]
+unit-test
+
+! vmax
+[ { ##max-vector } ]
+[ minmax-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
+unit-test
+
+[ { ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
+[ compare-cpu float-4-rep [ emit-simd-vmax ] test-emit ]
+unit-test
+
+TUPLE: dot-cpu < simple-ops-cpu ;
+M: dot-cpu %dot-vector-reps { float-4-rep } ;
+
+TUPLE: horizontal-cpu < simple-ops-cpu ;
+M: horizontal-cpu %horizontal-add-vector-reps signed-reps ;
+M: horizontal-cpu %unpack-vector-head-reps signed-reps ;
+M: horizontal-cpu %unpack-vector-tail-reps signed-reps ;
+
+! v.
+[ { ##dot-vector } ]
+[ dot-cpu float-4-rep [ emit-simd-v. ] test-emit ]
+unit-test
+
+[ { ##mul-vector ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
+[ horizontal-cpu float-4-rep [ emit-simd-v. ] test-emit ]
+unit-test
+
+[ {
+    ##mul-vector
+    ##merge-vector-head ##merge-vector-tail ##add-vector 
+    ##merge-vector-head ##merge-vector-tail ##add-vector 
+    ##vector>scalar
+} ]
+[ simple-ops-cpu float-4-rep [ emit-simd-v. ] test-emit ]
+unit-test
+
+! vsqrt
+[ { ##sqrt-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vsqrt ] test-emit ]
+unit-test
+
+! sum
+[ { ##horizontal-add-vector ##vector>scalar } ]
+[ horizontal-cpu double-2-rep [ emit-simd-sum ] test-emit ]
+unit-test
+
+[ { ##horizontal-add-vector ##horizontal-add-vector ##vector>scalar } ]
+[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
+unit-test
+
+[ {
+    ##unpack-vector-head ##unpack-vector-tail ##add-vector
+    ##horizontal-add-vector
+    ##vector>scalar
+} ]
+[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ]
+unit-test
+
+[ {
+    ##unpack-vector-head ##unpack-vector-tail ##add-vector
+    ##horizontal-add-vector ##horizontal-add-vector
+    ##vector>scalar
+} ]
+[ horizontal-cpu short-8-rep [ emit-simd-sum ] test-emit ]
+unit-test
+
+[ {
+    ##unpack-vector-head ##unpack-vector-tail ##add-vector
+    ##horizontal-add-vector ##horizontal-add-vector ##horizontal-add-vector
+    ##vector>scalar
+} ]
+[ horizontal-cpu char-16-rep [ emit-simd-sum ] test-emit ]
+unit-test
+
+TUPLE: abs-cpu < simple-ops-cpu ;
+M: abs-cpu %abs-vector-reps signed-reps ;
+
+! vabs
+[ { } ]
+[ simple-ops-cpu uint-4-rep [ emit-simd-vabs ] test-emit ]
+unit-test
+
+[ { ##abs-vector } ]
+[ abs-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
+unit-test
+
+[ { ##load-constant ##andn-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vabs ] test-emit ]
+unit-test
+
+[ { ##zero-vector ##sub-vector ##compare-vector ##and-vector ##andn-vector ##or-vector } ]
+[ compare-cpu int-4-rep [ emit-simd-vabs ] test-emit ]
+unit-test
+
+! vand
+[ { ##and-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vand ] test-emit ]
+unit-test
+
+! vandn
+[ { ##andn-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vandn ] test-emit ]
+unit-test
+
+! vor
+[ { ##or-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vor ] test-emit ]
+unit-test
+
+! vxor
+[ { ##xor-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vxor ] test-emit ]
+unit-test
+
+TUPLE: not-cpu < simple-ops-cpu ;
+M: not-cpu %not-vector-reps signed-reps ;
+
+! vnot
+[ { ##not-vector } ]
+[ not-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
+unit-test
+
+[ { ##fill-vector ##xor-vector } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vnot ] test-emit ]
+unit-test
+
+TUPLE: shift-cpu < simple-ops-cpu ;
+M: shift-cpu %shl-vector-reps signed-reps ;
+M: shift-cpu %shr-vector-reps signed-reps ;
+
+TUPLE: shift-imm-cpu < simple-ops-cpu ;
+M: shift-imm-cpu %shl-vector-imm-reps signed-reps ;
+M: shift-imm-cpu %shr-vector-imm-reps signed-reps ;
+
+TUPLE: horizontal-shift-cpu < simple-ops-cpu ;
+M: horizontal-shift-cpu %horizontal-shl-vector-imm-reps signed-reps ;
+M: horizontal-shift-cpu %horizontal-shr-vector-imm-reps signed-reps ;
+
+! vlshift
+[ { ##shl-vector-imm } ]
+[ shift-imm-cpu 2 int-4-rep [ emit-simd-vlshift ] test-emit-literal ]
+unit-test
+
+[ { ##shl-vector } ]
+[ shift-cpu int-4-rep [ emit-simd-vlshift ] test-emit ]
+unit-test
+
+! vrshift
+[ { ##shr-vector-imm } ]
+[ shift-imm-cpu 2 int-4-rep [ emit-simd-vrshift ] test-emit-literal ]
+unit-test
+
+[ { ##shr-vector } ]
+[ shift-cpu int-4-rep [ emit-simd-vrshift ] test-emit ]
+unit-test
+
+! hlshift
+[ { ##horizontal-shl-vector-imm } ]
+[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hlshift ] test-emit-literal ]
+unit-test
+
+! hrshift
+[ { ##horizontal-shr-vector-imm } ]
+[ horizontal-shift-cpu 2 int-4-rep [ emit-simd-hrshift ] test-emit-literal ]
+unit-test
+
+TUPLE: shuffle-imm-cpu < simple-ops-cpu ;
+M: shuffle-imm-cpu %shuffle-vector-imm-reps signed-reps ;
+
+TUPLE: shuffle-cpu < simple-ops-cpu ;
+M: shuffle-cpu %shuffle-vector-reps signed-reps ;
+
+! vshuffle-elements
+[ { ##load-constant ##shuffle-vector } ]
+[ shuffle-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
+unit-test
+
+[ { ##shuffle-vector-imm } ]
+[ shuffle-imm-cpu { 0 1 2 3 } int-4-rep [ emit-simd-vshuffle-elements ] test-emit-literal ]
+unit-test
+
+! vshuffle-bytes
+[ { ##shuffle-vector } ]
+[ shuffle-cpu int-4-rep [ emit-simd-vshuffle-bytes ] test-emit ]
+unit-test
+
+! vmerge-head
+[ { ##merge-vector-head } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-head ] test-emit ]
+unit-test
+
+! vmerge-tail
+[ { ##merge-vector-tail } ]
+[ simple-ops-cpu float-4-rep [ emit-simd-vmerge-tail ] test-emit ]
+unit-test
+
+! v<= etc.
+[ { ##compare-vector } ]
+[ compare-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
+unit-test
+
+[ { ##min-vector ##compare-vector } ]
+[ minmax-cpu int-4-rep [ emit-simd-v<= ] test-emit ]
+unit-test
+
+[ { ##load-constant ##xor-vector ##xor-vector ##compare-vector } ]
+[ compare-cpu uint-4-rep [ emit-simd-v<= ] test-emit ]
+unit-test
+
+! vany? etc.
+[ { ##test-vector } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-vany? ] test-emit ]
+unit-test
+
+TUPLE: convert-cpu < simple-ops-cpu ;
+M: convert-cpu %integer>float-vector-reps { int-4-rep } ;
+M: convert-cpu %float>integer-vector-reps { float-4-rep } ;
+
+! v>float
+[ { } ]
+[ convert-cpu float-4-rep [ emit-simd-v>float ] test-emit ]
+unit-test
+
+[ { ##integer>float-vector } ]
+[ convert-cpu int-4-rep [ emit-simd-v>float ] test-emit ]
+unit-test
+
+! v>integer
+[ { } ]
+[ convert-cpu int-4-rep [ emit-simd-v>integer ] test-emit ]
+unit-test
+
+[ { ##float>integer-vector } ]
+[ convert-cpu float-4-rep [ emit-simd-v>integer ] test-emit ]
+unit-test
+
+! vpack-signed
+[ { ##signed-pack-vector } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-vpack-signed ] test-emit ]
+unit-test
+
+! vpack-unsigned
+[ { ##unsigned-pack-vector } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-vpack-unsigned ] test-emit ]
+unit-test
+
+TUPLE: unpack-head-cpu < simple-ops-cpu ;
+M: unpack-head-cpu %unpack-vector-head-reps all-reps ;
+TUPLE: unpack-cpu < unpack-head-cpu ;
+M: unpack-cpu %unpack-vector-tail-reps all-reps ;
+
+! vunpack-head
+[ { ##unpack-vector-head } ]
+[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
+unit-test
+
+[ { ##zero-vector ##merge-vector-head } ]
+[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-head ] test-emit ]
+unit-test
+
+[ { ##merge-vector-head ##shr-vector-imm } ]
+[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
+unit-test
+
+[ { ##zero-vector ##compare-vector ##merge-vector-head } ]
+[ compare-cpu int-4-rep [ emit-simd-vunpack-head ] test-emit ]
+unit-test
+
+! vunpack-tail
+[ { ##unpack-vector-tail } ]
+[ unpack-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
+unit-test
+
+[ { ##tail>head-vector ##unpack-vector-head } ]
+[ unpack-head-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
+unit-test
+
+[ { ##zero-vector ##merge-vector-tail } ]
+[ simple-ops-cpu uint-4-rep [ emit-simd-vunpack-tail ] test-emit ]
+unit-test
+
+[ { ##merge-vector-tail ##shr-vector-imm } ]
+[ shift-imm-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
+unit-test
+
+[ { ##zero-vector ##compare-vector ##merge-vector-tail } ]
+[ compare-cpu int-4-rep [ emit-simd-vunpack-tail ] test-emit ]
+unit-test
+
+! with
+[ { ##scalar>vector ##shuffle-vector-imm } ]
+[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ]
+unit-test
+
+! gather-2
+[ { ##gather-vector-2 } ]
+[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ]
+unit-test
+
+! gather-4
+[ { ##gather-vector-4 } ]
+[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ]
+unit-test
+
+! select
+[ { ##shuffle-vector-imm ##vector>scalar } ]
+[ shuffle-imm-cpu int-4-rep [ emit-simd-select ] test-emit ]
+unit-test
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index b5bb0157af..f4d150033b 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -115,6 +115,7 @@ scalar-rep ;
         { uchar-16-rep    ushort-8-rep    }
         { ushort-8-rep    uint-4-rep      }
         { uint-4-rep      ulonglong-2-rep }
+        { float-4-rep     double-2-rep    }
     } at ;
 
 ! Register classes
@@ -385,6 +386,10 @@ M: object %shr-vector-imm-reps { } ;
 M: object %horizontal-shl-vector-imm-reps { } ;
 M: object %horizontal-shr-vector-imm-reps { } ;
 
+ALIAS: %merge-vector-head-reps %merge-vector-reps
+ALIAS: %merge-vector-tail-reps %merge-vector-reps
+ALIAS: %tail>head-vector-reps %unpack-vector-head-reps
+
 HOOK: %unbox-alien cpu ( dst src -- )
 HOOK: %unbox-any-c-ptr cpu ( dst src -- )
 HOOK: %box-alien cpu ( dst src temp -- )

From a3e4ecfc7d93ec4fae18a9adf75b39f1b5f58bd6 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 18 Nov 2009 20:32:05 -0800
Subject: [PATCH 21/46] enable simd intrinsics and fix first-pass compiler
 errors

---
 .../compiler/cfg/intrinsics/simd/simd.factor  | 118 ++++++-------
 basis/math/vectors/simd/simd-tests.factor     | 161 ++++++++----------
 basis/math/vectors/simd/simd.factor           |  37 ++--
 3 files changed, 156 insertions(+), 160 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 5130ff36b7..54f105ca02 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -562,62 +562,62 @@ IN: compiler.cfg.intrinsics.simd
         inline-alien
     ] with { [ %alien-vector-reps member? ] } if-literals-match ;
 
-! : enable-simd ( -- )
-!     {
-!         { (simd-v+)                [ emit-simd-v+                  ] }
-!         { (simd-v-)                [ emit-simd-v-                  ] }
-!         { (simd-vneg)              [ emit-simd-vneg                ] }
-!         { (simd-v+-)               [ emit-simd-v+-                 ] }
-!         { (simd-vs+)               [ emit-simd-vs+                 ] }
-!         { (simd-vs-)               [ emit-simd-vs-                 ] }
-!         { (simd-vs*)               [ emit-simd-vs*                 ] }
-!         { (simd-v*)                [ emit-simd-v*                  ] }
-!         { (simd-v/)                [ emit-simd-v/                  ] }
-!         { (simd-vmin)              [ emit-simd-vmin                ] }
-!         { (simd-vmax)              [ emit-simd-vmax                ] }
-!         { (simd-v.)                [ emit-simd-v.                  ] }
-!         { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
-!         { (simd-sum)               [ emit-simd-sum                 ] }
-!         { (simd-vabs)              [ emit-simd-vabs                ] }
-!         { (simd-vbitand)           [ emit-simd-vand                ] }
-!         { (simd-vbitandn)          [ emit-simd-vandn               ] }
-!         { (simd-vbitor)            [ emit-simd-vor                 ] }
-!         { (simd-vbitxor)           [ emit-simd-vxor                ] }
-!         { (simd-vbitnot)           [ emit-simd-vnot                ] }
-!         { (simd-vand)              [ emit-simd-vand                ] }
-!         { (simd-vandn)             [ emit-simd-vandn               ] }
-!         { (simd-vor)               [ emit-simd-vor                 ] }
-!         { (simd-vxor)              [ emit-simd-vxor                ] }
-!         { (simd-vnot)              [ emit-simd-vnot                ] }
-!         { (simd-vlshift)           [ emit-simd-vlshift             ] }
-!         { (simd-vrshift)           [ emit-simd-vrshift             ] }
-!         { (simd-hlshift)           [ emit-simd-hlshift             ] }
-!         { (simd-hrshift)           [ emit-simd-hrshift             ] }
-!         { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
-!         { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
-!         { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
-!         { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
-!         { (simd-v<=)               [ emit-simd-v<=                 ] }
-!         { (simd-v<)                [ emit-simd-v<                  ] }
-!         { (simd-v=)                [ emit-simd-v=                  ] }
-!         { (simd-v>)                [ emit-simd-v>                  ] }
-!         { (simd-v>=)               [ emit-simd-v>=                 ] }
-!         { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
-!         { (simd-vany?)             [ emit-simd-vany?               ] }
-!         { (simd-vall?)             [ emit-simd-vall?               ] }
-!         { (simd-vnone?)            [ emit-simd-vnone?              ] }
-!         { (simd-v>float)           [ emit-simd-v>float             ] }
-!         { (simd-v>integer)         [ emit-simd-v>integer           ] }
-!         { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
-!         { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
-!         { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
-!         { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
-!         { (simd-with)              [ emit-simd-with                ] }
-!         { (simd-gather-2)          [ emit-simd-gather-2            ] }
-!         { (simd-gather-4)          [ emit-simd-gather-4            ] }
-!         { (simd-select)            [ emit-simd-select              ] }
-!         { alien-vector             [ emit-alien-vector             ] }
-!         { set-alien-vector         [ emit-set-alien-vector         ] }
-!     } enable-intrinsics ;
-! 
-! enable-simd
+: enable-simd ( -- )
+    {
+        { (simd-v+)                [ emit-simd-v+                  ] }
+        { (simd-v-)                [ emit-simd-v-                  ] }
+        { (simd-vneg)              [ emit-simd-vneg                ] }
+        { (simd-v+-)               [ emit-simd-v+-                 ] }
+        { (simd-vs+)               [ emit-simd-vs+                 ] }
+        { (simd-vs-)               [ emit-simd-vs-                 ] }
+        { (simd-vs*)               [ emit-simd-vs*                 ] }
+        { (simd-v*)                [ emit-simd-v*                  ] }
+        { (simd-v/)                [ emit-simd-v/                  ] }
+        { (simd-vmin)              [ emit-simd-vmin                ] }
+        { (simd-vmax)              [ emit-simd-vmax                ] }
+        { (simd-v.)                [ emit-simd-v.                  ] }
+        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
+        { (simd-sum)               [ emit-simd-sum                 ] }
+        { (simd-vabs)              [ emit-simd-vabs                ] }
+        { (simd-vbitand)           [ emit-simd-vand                ] }
+        { (simd-vbitandn)          [ emit-simd-vandn               ] }
+        { (simd-vbitor)            [ emit-simd-vor                 ] }
+        { (simd-vbitxor)           [ emit-simd-vxor                ] }
+        { (simd-vbitnot)           [ emit-simd-vnot                ] }
+        { (simd-vand)              [ emit-simd-vand                ] }
+        { (simd-vandn)             [ emit-simd-vandn               ] }
+        { (simd-vor)               [ emit-simd-vor                 ] }
+        { (simd-vxor)              [ emit-simd-vxor                ] }
+        { (simd-vnot)              [ emit-simd-vnot                ] }
+        { (simd-vlshift)           [ emit-simd-vlshift             ] }
+        { (simd-vrshift)           [ emit-simd-vrshift             ] }
+        { (simd-hlshift)           [ emit-simd-hlshift             ] }
+        { (simd-hrshift)           [ emit-simd-hrshift             ] }
+        { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
+        { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
+        { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
+        { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
+        { (simd-v<=)               [ emit-simd-v<=                 ] }
+        { (simd-v<)                [ emit-simd-v<                  ] }
+        { (simd-v=)                [ emit-simd-v=                  ] }
+        { (simd-v>)                [ emit-simd-v>                  ] }
+        { (simd-v>=)               [ emit-simd-v>=                 ] }
+        { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
+        { (simd-vany?)             [ emit-simd-vany?               ] }
+        { (simd-vall?)             [ emit-simd-vall?               ] }
+        { (simd-vnone?)            [ emit-simd-vnone?              ] }
+        { (simd-v>float)           [ emit-simd-v>float             ] }
+        { (simd-v>integer)         [ emit-simd-v>integer           ] }
+        { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
+        { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
+        { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
+        { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
+        { (simd-with)              [ emit-simd-with                ] }
+        { (simd-gather-2)          [ emit-simd-gather-2            ] }
+        { (simd-gather-4)          [ emit-simd-gather-4            ] }
+        { (simd-select)            [ emit-simd-select              ] }
+        { alien-vector             [ emit-alien-vector             ] }
+        { set-alien-vector         [ emit-set-alien-vector         ] }
+    } enable-intrinsics ;
+
+enable-simd
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor
index 46cced3cb7..1fb947921c 100644
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -3,22 +3,13 @@ effects fry io kernel kernel.private math math.functions
 math.private math.vectors math.vectors.simd
 math.vectors.simd.private prettyprint random sequences system
 tools.test vocabs assocs compiler.cfg.debugger words
-locals math.vectors.specialization combinators cpu.architecture
-math.vectors.conversion.backend
-math.vectors.simd.intrinsics namespaces byte-arrays alien
+locals combinators cpu.architecture namespaces byte-arrays alien
 specialized-arrays classes.struct eval classes.algebra sets
 quotations math.constants compiler.units ;
 QUALIFIED-WITH: alien.c-types c
 SPECIALIZED-ARRAY: c:float
-SIMD: c:char
-SIMDS: c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double ;
 IN: math.vectors.simd.tests
 
-! Make sure the functor doesn't generate bogus vocabularies
-2 [ [ "USE: math.vectors.simd SIMD: rubinius" eval( -- ) ] must-fail ] times
-
-[ f ] [ "math.vectors.simd.instances.rubinius" vocab ] unit-test
-
 ! Test type propagation
 [ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
 
@@ -38,10 +29,6 @@ IN: math.vectors.simd.tests
 
 [ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test
 
-[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test
-
-[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test
-
 ! Test puns; only on x86
 cpu x86? [
     [ double-2{ 4 1024 } ] [
@@ -55,26 +42,76 @@ CONSTANT: simd-classes
     {
         char-16
         uchar-16
-        char-32
-        uchar-32
         short-8
         ushort-8
-        short-16
-        ushort-16
         int-4
         uint-4
-        int-8
-        uint-8
         longlong-2
         ulonglong-2
-        longlong-4
-        ulonglong-4
         float-4
-        float-8
         double-2
-        double-4
     }
 
+SYMBOLS: -> +vector+ +any-vector+ +scalar+ +boolean+ +nonnegative+ +literal+ ;
+
+CONSTANT: vector-words
+    H{
+        { [v-] { +vector+ +vector+ -> +vector+ } }
+        { distance { +vector+ +vector+ -> +nonnegative+ } }
+        { n*v { +scalar+ +vector+ -> +vector+ } }
+        { n+v { +scalar+ +vector+ -> +vector+ } }
+        { n-v { +scalar+ +vector+ -> +vector+ } }
+        { n/v { +scalar+ +vector+ -> +vector+ } }
+        { norm { +vector+ -> +nonnegative+ } }
+        { norm-sq { +vector+ -> +nonnegative+ } }
+        { normalize { +vector+ -> +vector+ } }
+        { v* { +vector+ +vector+ -> +vector+ } }
+        { vs* { +vector+ +vector+ -> +vector+ } }
+        { v*n { +vector+ +scalar+ -> +vector+ } }
+        { v+ { +vector+ +vector+ -> +vector+ } }
+        { vs+ { +vector+ +vector+ -> +vector+ } }
+        { v+- { +vector+ +vector+ -> +vector+ } }
+        { v+n { +vector+ +scalar+ -> +vector+ } }
+        { v- { +vector+ +vector+ -> +vector+ } }
+        { vneg { +vector+ -> +vector+ } }
+        { vs- { +vector+ +vector+ -> +vector+ } }
+        { v-n { +vector+ +scalar+ -> +vector+ } }
+        { v. { +vector+ +vector+ -> +scalar+ } }
+        { v/ { +vector+ +vector+ -> +vector+ } }
+        { v/n { +vector+ +scalar+ -> +vector+ } }
+        { vceiling { +vector+ -> +vector+ } }
+        { vfloor { +vector+ -> +vector+ } }
+        { vmax { +vector+ +vector+ -> +vector+ } }
+        { vmin { +vector+ +vector+ -> +vector+ } }
+        { vneg { +vector+ -> +vector+ } }
+        { vtruncate { +vector+ -> +vector+ } }
+        { sum { +vector+ -> +scalar+ } }
+        { vabs { +vector+ -> +vector+ } }
+        { vsqrt { +vector+ -> +vector+ } }
+        { vbitand { +vector+ +vector+ -> +vector+ } }
+        { vbitandn { +vector+ +vector+ -> +vector+ } }
+        { vbitor { +vector+ +vector+ -> +vector+ } }
+        { vbitxor { +vector+ +vector+ -> +vector+ } }
+        { vbitnot { +vector+ -> +vector+ } }
+        { vand { +vector+ +vector+ -> +vector+ } }
+        { vandn { +vector+ +vector+ -> +vector+ } }
+        { vor { +vector+ +vector+ -> +vector+ } }
+        { vxor { +vector+ +vector+ -> +vector+ } }
+        { vnot { +vector+ -> +vector+ } }
+        { vlshift { +vector+ +scalar+ -> +vector+ } }
+        { vrshift { +vector+ +scalar+ -> +vector+ } }
+        { (vmerge-head) { +vector+ +vector+ -> +vector+ } }
+        { (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
+        { v<= { +vector+ +vector+ -> +vector+ } }
+        { v< { +vector+ +vector+ -> +vector+ } }
+        { v= { +vector+ +vector+ -> +vector+ } }
+        { v> { +vector+ +vector+ -> +vector+ } }
+        { v>= { +vector+ +vector+ -> +vector+ } }
+        { vunordered? { +vector+ +vector+ -> +vector+ } }
+    }
+
+: vector-word-inputs ( schema -- seq ) { -> } split first ;
+
 : with-ctors ( -- seq )
     simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
 
@@ -166,26 +203,15 @@ CONSTANT: simd-classes
 : remove-boolean-words ( alist -- alist' )
     boolean-ops unique assoc-diff ;
 
-: remove-special-words ( alist -- alist' )
-    ! These have their own tests later
-    {
-        hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
-        vany? vall? vnone?
-        (v>float) (v>integer)
-        (vpack-signed) (vpack-unsigned)
-        (vunpack-head) (vunpack-tail)
-    } unique assoc-diff ;
-
 : ops-to-check ( elt-class -- alist )
     [ vector-words >alist ] dip
     float = [ remove-integer-words ] [ remove-float-words ] if
-    remove-boolean-words
-    remove-special-words ;
+    remove-boolean-words ;
 
 : check-vector-ops ( class elt-class compare-quot -- )
     [
         [ nip ops-to-check ] 2keep
-        '[ first2 inputs _ _ check-vector-op ]
+        '[ first2 vector-word-inputs _ _ check-vector-op ]
     ] dip check-optimizer ; inline
 
 : (approx=) ( x y -- ? )
@@ -427,27 +453,6 @@ TUPLE: inconsistent-vector-test bool branch ;
 [ t f f ]
 [ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test
 
-[ f t t ]
-[ float-8{ t t t t t t t t } { float-8 } test-vector-tests ] unit-test
-[ f t f ]
-[ float-8{ f t t t t f t t } { float-8 } test-vector-tests ] unit-test
-[ t f f ]
-[ float-8{ f f f f f f f f } { float-8 } test-vector-tests ] unit-test
-
-[ f t t ]
-[ double-4{ t t t t } { double-4 } test-vector-tests ] unit-test
-[ f t f ]
-[ double-4{ f t t f } { double-4 } test-vector-tests ] unit-test
-[ t f f ]
-[ double-4{ f f f f } { double-4 } test-vector-tests ] unit-test
-
-[ f t t ]
-[ int-8{ t t t t t t t t } { int-8 } test-vector-tests ] unit-test
-[ f t f ]
-[ int-8{ f t t t t f f f } { int-8 } test-vector-tests ] unit-test
-[ t f f ]
-[ int-8{ f f f f f f f f } { int-8 } test-vector-tests ] unit-test
-
 "== Checking element access" print
 
 ! Test element access -- it should box bignums for int-4 on x86
@@ -467,14 +472,6 @@ TUPLE: inconsistent-vector-test bool branch ;
 [ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test
 [ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test
 
-[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test
-[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
-[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
-
-[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
-[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test
-[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test
-
 "== Checking broadcast" print
 : test-broadcast ( seq -- failures )
     [ length >array ] keep
@@ -488,14 +485,6 @@ TUPLE: inconsistent-vector-test bool branch ;
 [ { } ] [ longlong-2{ 1 2 } test-broadcast ] unit-test
 [ { } ] [ ulonglong-2{ 1 2 } test-broadcast ] unit-test
 
-[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-broadcast ] unit-test
-[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
-[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-broadcast ] unit-test
-
-[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-broadcast ] unit-test
-[ { } ] [ longlong-4{ 1 2 3 4 } test-broadcast ] unit-test
-[ { } ] [ ulonglong-4{ 1 2 3 4 } test-broadcast ] unit-test
-
 ! Make sure we use the fallback in the correct situations
 [ int-4{ 3 3 3 3 } ] [ int-4{ 12 34 3 17 } 2 [ { int-4 fixnum } declare vbroadcast ] compile-call ] unit-test
 
@@ -529,37 +518,37 @@ TUPLE: inconsistent-vector-test bool branch ;
 STRUCT: simd-struct
 { x float-4 }
 { y longlong-2 }
-{ z double-4 }
-{ w int-8 } ;
+{ z double-2 }
+{ w int-4 } ;
 
 [ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
 
 [
     float-4{ 1 2 3 4 }
     longlong-2{ 2 1 }
-    double-4{ 4 3 2 1 }
-    int-8{ 1 2 3 4 5 6 7 8 }
+    double-2{ 4 3 }
+    int-4{ 1 2 3 4 }
 ] [
     simd-struct <struct>
     float-4{ 1 2 3 4 } >>x
     longlong-2{ 2 1 } >>y
-    double-4{ 4 3 2 1 } >>z
-    int-8{ 1 2 3 4 5 6 7 8 } >>w
+    double-2{ 4 3 } >>z
+    int-4{ 1 2 3 4 } >>w
     { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
 ] unit-test
 
 [
     float-4{ 1 2 3 4 }
     longlong-2{ 2 1 }
-    double-4{ 4 3 2 1 }
-    int-8{ 1 2 3 4 5 6 7 8 }
+    double-2{ 4 3 }
+    int-4{ 1 2 3 4 }
 ] [
     [
         simd-struct <struct>
         float-4{ 1 2 3 4 } >>x
         longlong-2{ 2 1 } >>y
-        double-4{ 4 3 2 1 } >>z
-        int-8{ 1 2 3 4 5 6 7 8 } >>w
+        double-2{ 4 3 } >>z
+        int-4{ 1 2 3 4 } >>w
         { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
     ] compile-call
 ] unit-test
@@ -570,8 +559,8 @@ STRUCT: simd-struct
 
 ! CSSA bug
 [ 8000000 ] [
-    int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
-    [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
+    int-4{ 1000 1000 1000 1000 }
+    [ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
 ] unit-test
 
 ! Coalescing was too aggressive
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index c02c713b48..532e1a2232 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -123,6 +123,8 @@ GENERIC: simd-rep ( simd -- rep )
 
 <PRIVATE
 
+DEFER: simd-construct-op
+
 ! SIMD concrete type functor
 
 FUNCTOR: define-simd-128 ( T -- )
@@ -149,7 +151,6 @@ TUPLE: A < simd-128 ;
 M: A new-underlying    drop \ A boa ; inline
 M: A simd-rep          drop A-rep ; inline
 M: A simd-element-type drop ELT ; inline
-M: A length            drop N ; inline
 
 M: A set-nth-unsafe
     [ ELT boolean>element ] 2dip
@@ -162,10 +163,14 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline
 : A-with ( n -- v ) \ A new simd-with ; inline
 : A-cast ( v -- v' ) \ A new simd-cast ; inline
 
-\ A-boa { \ A simd-boa } >quotation BOA-EFFECT define-inline
+\ A-boa \ A new N {
+    { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] }
+    { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] }
+    [ swap '[ _ _ nsequence ] ]
+} case BOA-EFFECT define-inline
 
-! M: A pprint-delims drop \ A{ \ } ;
-! SYNTAX: A{ \ } [ >A ] parse-literal ;
+M: A pprint-delims drop \ A{ \ } ;
+SYNTAX: A{ \ } [ >A ] parse-literal ;
 
 c:<c-type>
     byte-array >>class
@@ -186,17 +191,6 @@ PRIVATE>
 
 >>
 
-SIMD-128: char-16
-SIMD-128: uchar-16
-SIMD-128: short-8
-SIMD-128: ushort-8
-SIMD-128: int-4
-SIMD-128: uint-4
-SIMD-128: longlong-2
-SIMD-128: ulonglong-2
-SIMD-128: float-4
-SIMD-128: double-2
-
 : assert-positive ( x -- y ) ;
 
 ! SIMD vectors as sequences
@@ -351,6 +345,19 @@ M: simd-128 norm-sq dup v. assert-positive ; inline
 M: simd-128 norm      norm-sq sqrt ; inline
 M: simd-128 distance  v- norm ; inline
 
+! SIMD instances
+
+SIMD-128: char-16
+SIMD-128: uchar-16
+SIMD-128: short-8
+SIMD-128: ushort-8
+SIMD-128: int-4
+SIMD-128: uint-4
+SIMD-128: longlong-2
+SIMD-128: ulonglong-2
+SIMD-128: float-4
+SIMD-128: double-2
+
 ! misc
 
 M: simd-128 vshuffle ( u perm -- v )

From 59d85f7ad62f3dd1154d4b0e37a2ae8d10fbef11 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 18 Nov 2009 21:29:51 -0800
Subject: [PATCH 22/46] fix primitive emit fallback for simd intrinsics

---
 .../intrinsics/simd/backend/backend.factor    |  8 +++-
 .../cfg/intrinsics/simd/simd-tests.factor     | 46 +++++++++++++++++--
 2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
index f2ba9af41d..1a229672b9 100644
--- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -3,7 +3,7 @@ USING: accessors arrays assocs classes combinators
 combinators.short-circuit compiler.cfg.builder.blocks
 compiler.cfg.registers compiler.cfg.stacks
 compiler.cfg.stacks.local compiler.tree.propagation.info
-cpu.architecture effects fry generalizations help.lint.checks
+cpu.architecture effects fry generalizations
 kernel locals macros math namespaces quotations sequences
 splitting stack-checker words ;
 IN: compiler.cfg.intrinsics.simd.backend
@@ -145,6 +145,12 @@ CONSTANT: can-has-words
 
 ! Intrinsic code emission
 
+MACRO: check-elements ( quots -- )
+    [ length '[ _ firstn ] ]
+    [ '[ _ spread ] ]
+    [ length 1 - \ and <repetition> [ ] like ]
+    tri 3append ;
+
 MACRO: if-literals-match ( quots -- )
     [ length ] [ ] [ length ] tri
     ! n quots n
diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
index 27b3b38b01..2d38e541a4 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@@ -1,9 +1,9 @@
 ! (c)2009 Joe Groff bsd license
 USING: arrays assocs biassocs byte-arrays byte-arrays.hex
-classes compiler.cfg.comparisons compiler.cfg.instructions
+classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
 compiler.cfg.intrinsics.simd compiler.cfg.registers
-compiler.cfg.stacks.local compiler.tree
-compiler.tree.propagation.info cpu.architecture fry kernel
+compiler.cfg.stacks.height compiler.cfg.stacks.local compiler.tree
+compiler.tree.propagation.info cpu.architecture fry hashtables kernel
 locals make namespaces sequences system tools.test words ;
 IN: compiler.cfg.intrinsics.simd.tests
 
@@ -34,8 +34,25 @@ IN: compiler.cfg.intrinsics.simd.tests
         } }
     } ;
 
+: test-node-nonliteral-rep ( -- node )
+    T{ #call
+        { in-d  { 1 2 3 4 } }
+        { out-d { 5 } }
+        { info H{
+            { 1 T{ value-info { class byte-array } } }
+            { 2 T{ value-info { class byte-array } } }
+            { 3 T{ value-info { class byte-array } } }
+            { 4 T{ value-info { class object } } }
+            { 5 T{ value-info { class byte-array } } }
+        } }
+    } ;
+
 : test-compiler-env ( -- x )
     H{ } clone
+        T{ basic-block { id 0 } }
+            [ \ basic-block pick set-at ]
+            [ 0 swap associate \ ds-heights pick set-at ]
+            [ 0 swap associate \ rs-heights pick set-at ] tri
         T{ current-height { d 0 } { r 0 } { emit-d 0 } { emit-r 0 } } \ current-height pick set-at
         H{ } clone \ local-peek-set pick set-at
         H{ } clone \ replace-mapping pick set-at
@@ -61,6 +78,13 @@ IN: compiler.cfg.intrinsics.simd.tests
         ] with-variable
     ] make-classes ; inline
 
+: test-emit-nonliteral-rep ( cpu quot -- node )
+    [
+        [ new \ cpu ] dip '[
+            test-compiler-env [ test-node-nonliteral-rep @ ] bind
+        ] with-variable
+    ] make-classes ; inline
+
 CONSTANT: signed-reps
     { char-16-rep short-8-rep int-4-rep longlong-2-rep float-4-rep double-2-rep }
 CONSTANT: all-reps
@@ -504,5 +528,19 @@ unit-test
 
 ! select
 [ { ##shuffle-vector-imm ##vector>scalar } ]
-[ shuffle-imm-cpu int-4-rep [ emit-simd-select ] test-emit ]
+[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ]
 unit-test
+
+! test with nonliteral/invalid reps
+[ { ##inc-d ##branch } ]
+[ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
+unit-test
+
+[ { ##branch } ]
+[ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
+unit-test
+
+[ { ##branch } ]
+[ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
+unit-test
+

From c07f3212f9f718a09ebf39037ce07af815bc0882 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 18 Nov 2009 22:18:42 -0800
Subject: [PATCH 23/46] fix effect of set-alien-vector

---
 basis/math/vectors/simd/simd.factor | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 532e1a2232..84dfce065e 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -87,8 +87,8 @@ PRIVATE>
 
 PRIVATE>
 
-: alien-vector     ( c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
-: set-alien-vector ( c-ptr n rep -- value ) \ set-alien-vector bad-simd-call ;
+: alien-vector     (       c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
+: set-alien-vector ( value c-ptr n rep --       ) \ set-alien-vector bad-simd-call ;
 
 <PRIVATE
 

From 736be2716243efdd0442803e642783effbd1d27f Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 19 Nov 2009 11:29:15 -0800
Subject: [PATCH 24/46] move methods back into simd functor so the intrinsics
 can compile

---
 basis/math/vectors/simd/simd.factor | 264 +++++++++++++---------------
 1 file changed, 127 insertions(+), 137 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 84dfce065e..5d96df8b7f 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -118,13 +118,61 @@ GENERIC: simd-element-type ( obj -- c-type )
 GENERIC: simd-rep ( simd -- rep )
 
 <<
+: assert-positive ( x -- y ) ;
+
 : rep-length ( rep -- n )
     16 swap rep-component-type heap-size /i ; foldable
+>>
 
+<<
 <PRIVATE
 
 DEFER: simd-construct-op
 
+! Unboxers for SIMD operations
+: if-both-vectors ( a b rep t f -- )
+    [ 2over [ simd-128? ] both? ] 2dip if ; inline
+
+: if-both-vectors-match ( a b rep t f -- )
+    [ 3dup [ drop [ simd-128? ] both? ] [ '[ simd-rep _ eq? ] both? ] 3bi and ]
+    2dip if ; inline
+
+: simd-construct-op ( exemplar quot: ( rep -- v ) -- v )
+    [ dup simd-rep ] dip curry make-underlying ; inline
+
+: simd-unbox ( a -- a (a) )
+    [ ] [ underlying>> ] bi ; inline
+
+: v->v-op ( a rep quot: ( (a) rep -- (c) ) fallback-quot -- c )
+    drop [ simd-unbox ] 2dip 2curry make-underlying ; inline
+
+: vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c )
+    drop [ simd-unbox ] 3dip 3curry make-underlying ; inline
+
+: v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n )
+    drop [ underlying>> ] 2dip call ; inline
+
+: (vv->v-op) ( a b rep quot: ( (a) (b) rep -- (c) ) -- c )
+    [ [ simd-unbox ] [ underlying>> ] bi* ] 2dip 3curry make-underlying ; inline
+
+: (vv->n-op) ( a b rep quot: ( (a) (b) rep -- n ) -- n )
+    [ [ underlying>> ] bi@ ] 2dip 3curry call ; inline
+    
+: vv->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
+    [ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
+
+: vv'->v-op ( a b rep quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
+    [ '[ _ (vv->v-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors ; inline
+
+: vv->n-op ( a b rep quot: ( (a) (b) rep -- n ) fallback-quot -- n )
+    [ '[ _ (vv->n-op) ] ] [ '[ drop @ ] ] bi* if-both-vectors-match ; inline
+
+PRIVATE>
+>>
+
+<<
+<PRIVATE
+
 ! SIMD concrete type functor
 
 FUNCTOR: define-simd-128 ( T -- )
@@ -163,6 +211,85 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline
 : A-with ( n -- v ) \ A new simd-with ; inline
 : A-cast ( v -- v' ) \ A new simd-cast ; inline
 
+! SIMD vectors as sequences
+
+M: A hashcode* underlying>> hashcode* ; inline
+M: A clone [ clone ] change-underlying ; inline
+M: A length drop N ; inline
+M: A nth-unsafe swap \ A-rep (simd-select) ; inline
+M: A c:byte-length drop 16 ; inline
+
+M: A new-sequence
+    2dup length =
+    [ nip [ 16 (byte-array) ] make-underlying ]
+    [ length bad-simd-length ] if ; inline
+
+M: A equal?
+    \ A [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
+
+! SIMD primitive operations
+
+M: A v+                \ A [ (simd-v+)                ] [ call-next-method ] vv->v-op ; inline
+M: A v-                \ A [ (simd-v-)                ] [ call-next-method ] vv->v-op ; inline
+M: A vneg              \ A [ (simd-vneg)              ] [ call-next-method ] v->v-op  ; inline
+M: A v+-               \ A [ (simd-v+-)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs+               \ A [ (simd-vs+)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs-               \ A [ (simd-vs-)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs*               \ A [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
+M: A v*                \ A [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
+M: A v/                \ A [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
+M: A vmin              \ A [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
+M: A vmax              \ A [ (simd-vmax)              ] [ call-next-method ] vv->v-op ; inline
+M: A v.                \ A [ (simd-v.)                ] [ call-next-method ] vv->n-op ; inline
+M: A vsqrt             \ A [ (simd-vsqrt)             ] [ call-next-method ] v->v-op  ; inline
+M: A sum               \ A [ (simd-sum)               ] [ call-next-method ] v->n-op  ; inline
+M: A vabs              \ A [ (simd-vabs)              ] [ call-next-method ] v->v-op  ; inline
+M: A vbitand           \ A [ (simd-vbitand)           ] [ call-next-method ] vv->v-op ; inline
+M: A vbitandn          \ A [ (simd-vbitandn)          ] [ call-next-method ] vv->v-op ; inline
+M: A vbitor            \ A [ (simd-vbitor)            ] [ call-next-method ] vv->v-op ; inline
+M: A vbitxor           \ A [ (simd-vbitxor)           ] [ call-next-method ] vv->v-op ; inline
+M: A vbitnot           \ A [ (simd-vbitnot)           ] [ call-next-method ] v->v-op  ; inline
+M: A vand              \ A [ (simd-vand)              ] [ call-next-method ] vv->v-op ; inline
+M: A vandn             \ A [ (simd-vandn)             ] [ call-next-method ] vv->v-op ; inline
+M: A vor               \ A [ (simd-vor)               ] [ call-next-method ] vv->v-op ; inline
+M: A vxor              \ A [ (simd-vxor)              ] [ call-next-method ] vv->v-op ; inline
+M: A vnot              \ A [ (simd-vnot)              ] [ call-next-method ] v->v-op  ; inline
+M: A vlshift           \ A [ (simd-vlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A vrshift           \ A [ (simd-vrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A hlshift           \ A [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A hrshift           \ A [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A vshuffle-elements \ A [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
+M: A vshuffle-bytes    \ A [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv->v-op ; inline
+M: A (vmerge-head)     \ A [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
+M: A (vmerge-tail)     \ A [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
+M: A v<=               \ A [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
+M: A v<                \ A [ (simd-v<)                ] [ call-next-method ] vv->v-op ; inline
+M: A v=                \ A [ (simd-v=)                ] [ call-next-method ] vv->v-op ; inline
+M: A v>                \ A [ (simd-v>)                ] [ call-next-method ] vv->v-op ; inline
+M: A v>=               \ A [ (simd-v>=)               ] [ call-next-method ] vv->v-op ; inline
+M: A vunordered?       \ A [ (simd-vunordered?)       ] [ call-next-method ] vv->v-op ; inline
+M: A vany?             \ A [ (simd-vany?)             ] [ call-next-method ] v->n-op  ; inline
+M: A vall?             \ A [ (simd-vall?)             ] [ call-next-method ] v->n-op  ; inline
+M: A vnone?            \ A [ (simd-vnone?)            ] [ call-next-method ] v->n-op  ; inline
+
+! SIMD high-level specializations
+
+M: A vbroadcast [ swap nth ] keep simd-with ; inline
+M: A n+v [ simd-with ] keep v+ ; inline
+M: A n-v [ simd-with ] keep v- ; inline
+M: A n*v [ simd-with ] keep v* ; inline
+M: A n/v [ simd-with ] keep v/ ; inline
+M: A v+n over simd-with v+ ; inline
+M: A v-n over simd-with v- ; inline
+M: A v*n over simd-with v* ; inline
+M: A v/n over simd-with v/ ; inline
+M: A norm-sq dup v. assert-positive ; inline
+M: A norm      norm-sq sqrt ; inline
+M: A distance  v- norm ; inline
+
+! M: simd-128 >pprint-sequence ;
+! M: simd-128 pprint* pprint-object ;
+
 \ A-boa \ A new N {
     { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] }
     { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] }
@@ -191,85 +318,8 @@ PRIVATE>
 
 >>
 
-: assert-positive ( x -- y ) ;
-
-! SIMD vectors as sequences
-
-M: simd-128 hashcode* underlying>> hashcode* ; inline
-M: simd-128 clone [ clone ] change-underlying ; inline
-M: simd-128 length simd-rep rep-length ; inline
-M: simd-128 nth-unsafe [ nip ] 2keep simd-rep (simd-select) ; inline
-M: simd-128 c:byte-length drop 16 ; inline
-
-M: simd-128 new-sequence
-    2dup length =
-    [ nip [ 16 (byte-array) ] make-underlying ]
-    [ length bad-simd-length ] if ; inline
-
-! M: simd-128 >pprint-sequence ;
-! M: simd-128 pprint* pprint-object ;
-
 INSTANCE: simd-128 sequence
 
-! Unboxers for SIMD operations
-<<
-<PRIVATE
-
-: if-both-vectors ( a b t f -- )
-    [ 2dup [ simd-128? ] both? ] 2dip if ; inline
-
-: if-both-vectors-match ( a b t f -- )
-    [ 2dup [ [ simd-128? ] both? ] [ [ simd-rep ] bi@ eq? ] 2bi and ]
-    2dip if ; inline
-
-: simd-construct-op ( exemplar quot: ( rep -- v ) -- v )
-    [ dup simd-rep ] dip curry make-underlying ; inline
-
-: simd-unbox ( a -- a (a) a-rep )
-    [ ] [ underlying>> ] [ simd-rep ] tri ; inline
-
-: simd-v->v-op ( a quot: ( (a) rep -- (c) ) -- c )
-    [ simd-unbox ] dip 2curry make-underlying ; inline
-
-: simd-vn->v-op ( a n quot: ( (a) n rep -- (c) ) -- c )
-    [ simd-unbox ] [ swap ] [ 3curry ] tri* make-underlying ; inline
-
-: simd-v->n-op ( a quot: ( (a) rep -- n ) -- n )
-    [ [ underlying>> ] [ simd-rep ] bi ] dip call ; inline
-
-: ((simd-vv->v-op)) ( a b quot: ( (a) (b) rep -- (c) ) -- c )
-    [ simd-unbox ] [ underlying>> swap ] [ 3curry ] tri* make-underlying ; inline
-
-: ((simd-vv->n-op)) ( a b quot: ( (a) (b) rep -- n ) -- n )
-    [ [ underlying>> ] [ simd-rep ] bi ]
-    [ underlying>> swap ] [ ] tri* call ; inline
-    
-: (simd-vv->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
-    [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors-match ; inline
-
-: (simd-vv'->v-op) ( a b quot: ( (a) (b) rep -- (c) ) fallback-quot -- c )
-    [ '[ _ ((simd-vv->v-op)) ] ] dip if-both-vectors ; inline
-
-: (simd-vv->n-op) ( a b quot: ( (a) (b) rep -- n ) fallback-quot -- n )
-    [ '[ _ ((simd-vv->n-op)) ] ] dip if-both-vectors-match ; inline
-
-: (simd-method-fallback) ( accum word -- accum )
-    [ current-method get literalize \ (call-next-method) [ ] 2sequence suffix! ]
-    dip suffix! ; 
-
-SYNTAX: simd-vv->v-op
-    \ (simd-vv->v-op) (simd-method-fallback) ; 
-SYNTAX: simd-vv'->v-op
-    \ (simd-vv'->v-op) (simd-method-fallback) ;
-SYNTAX: simd-vv->n-op
-    \ (simd-vv->n-op) (simd-method-fallback) ; 
-
-PRIVATE>
->>
-
-M: simd-128 equal?
-    [ v= vall? ] [ 2drop f ] if-both-vectors-match ; inline
-
 ! SIMD constructors
 
 : simd-with ( n seq -- v )
@@ -285,66 +335,6 @@ MACRO: simd-boa ( class -- )
 : simd-cast ( v seq -- v' )
     [ underlying>> ] dip new-underlying ; inline
 
-! SIMD primitive operations
-
-M: simd-128 v+                 [ (simd-v+)                 ] simd-vv->v-op ; inline
-M: simd-128 v-                 [ (simd-v-)                 ] simd-vv->v-op ; inline
-M: simd-128 vneg               [ (simd-vneg)               ] simd-v->v-op  ; inline
-M: simd-128 v+-                [ (simd-v+-)                ] simd-vv->v-op ; inline
-M: simd-128 vs+                [ (simd-vs+)                ] simd-vv->v-op ; inline
-M: simd-128 vs-                [ (simd-vs-)                ] simd-vv->v-op ; inline
-M: simd-128 vs*                [ (simd-vs*)                ] simd-vv->v-op ; inline
-M: simd-128 v*                 [ (simd-v*)                 ] simd-vv->v-op ; inline
-M: simd-128 v/                 [ (simd-v/)                 ] simd-vv->v-op ; inline
-M: simd-128 vmin               [ (simd-vmin)               ] simd-vv->v-op ; inline
-M: simd-128 vmax               [ (simd-vmax)               ] simd-vv->v-op ; inline
-M: simd-128 v.                 [ (simd-v.)                 ] simd-vv->n-op ; inline
-M: simd-128 vsqrt              [ (simd-vsqrt)              ] simd-v->v-op  ; inline
-M: simd-128 sum                [ (simd-sum)                ] simd-v->n-op  ; inline
-M: simd-128 vabs               [ (simd-vabs)               ] simd-v->v-op  ; inline
-M: simd-128 vbitand            [ (simd-vbitand)            ] simd-vv->v-op ; inline
-M: simd-128 vbitandn           [ (simd-vbitandn)           ] simd-vv->v-op ; inline
-M: simd-128 vbitor             [ (simd-vbitor)             ] simd-vv->v-op ; inline
-M: simd-128 vbitxor            [ (simd-vbitxor)            ] simd-vv->v-op ; inline
-M: simd-128 vbitnot            [ (simd-vbitnot)            ] simd-v->v-op  ; inline
-M: simd-128 vand               [ (simd-vand)               ] simd-vv->v-op ; inline
-M: simd-128 vandn              [ (simd-vandn)              ] simd-vv->v-op ; inline
-M: simd-128 vor                [ (simd-vor)                ] simd-vv->v-op ; inline
-M: simd-128 vxor               [ (simd-vxor)               ] simd-vv->v-op ; inline
-M: simd-128 vnot               [ (simd-vnot)               ] simd-v->v-op  ; inline
-M: simd-128 vlshift            [ (simd-vlshift)            ] simd-vn->v-op ; inline
-M: simd-128 vrshift            [ (simd-vrshift)            ] simd-vn->v-op ; inline
-M: simd-128 hlshift            [ (simd-hlshift)            ] simd-vn->v-op ; inline
-M: simd-128 hrshift            [ (simd-hrshift)            ] simd-vn->v-op ; inline
-M: simd-128 vshuffle-elements  [ (simd-vshuffle-elements)  ] simd-vn->v-op ; inline
-M: simd-128 vshuffle-bytes     [ (simd-vshuffle-bytes)     ] simd-vv->v-op ; inline
-M: simd-128 (vmerge-head)      [ (simd-vmerge-head)        ] simd-vv->v-op ; inline
-M: simd-128 (vmerge-tail)      [ (simd-vmerge-tail)        ] simd-vv->v-op ; inline
-M: simd-128 v<=                [ (simd-v<=)                ] simd-vv->v-op ; inline
-M: simd-128 v<                 [ (simd-v<)                 ] simd-vv->v-op ; inline
-M: simd-128 v=                 [ (simd-v=)                 ] simd-vv->v-op ; inline
-M: simd-128 v>                 [ (simd-v>)                 ] simd-vv->v-op ; inline
-M: simd-128 v>=                [ (simd-v>=)                ] simd-vv->v-op ; inline
-M: simd-128 vunordered?        [ (simd-vunordered?)        ] simd-vv->v-op ; inline
-M: simd-128 vany?              [ (simd-vany?)              ] simd-v->n-op  ; inline
-M: simd-128 vall?              [ (simd-vall?)              ] simd-v->n-op  ; inline
-M: simd-128 vnone?             [ (simd-vnone?)             ] simd-v->n-op  ; inline
-
-! SIMD high-level specializations
-
-M: simd-128 vbroadcast [ swap nth ] keep simd-with ; inline
-M: simd-128 n+v [ simd-with ] keep v+ ; inline
-M: simd-128 n-v [ simd-with ] keep v- ; inline
-M: simd-128 n*v [ simd-with ] keep v* ; inline
-M: simd-128 n/v [ simd-with ] keep v/ ; inline
-M: simd-128 v+n over simd-with v+ ; inline
-M: simd-128 v-n over simd-with v- ; inline
-M: simd-128 v*n over simd-with v* ; inline
-M: simd-128 v/n over simd-with v/ ; inline
-M: simd-128 norm-sq dup v. assert-positive ; inline
-M: simd-128 norm      norm-sq sqrt ; inline
-M: simd-128 distance  v- norm ; inline
-
 ! SIMD instances
 
 SIMD-128: char-16

From 152b0d2df59a74a115a18444cc8b79fcb949c1a3 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 19 Nov 2009 11:53:46 -0800
Subject: [PATCH 25/46] break simd intrinsics into a separate vocab so they can
 be intrinsified before the simd methods compile

---
 .../compiler/cfg/intrinsics/simd/simd.factor  |   2 +-
 .../cfg/value-numbering/simd/simd.factor      |   2 +-
 .../tree/propagation/simd/simd.factor         |   2 +-
 basis/cpu/architecture/architecture.factor    |   5 +-
 basis/math/vectors/simd/simd.factor           | 101 ++++--------------
 5 files changed, 27 insertions(+), 85 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 54f105ca02..5f4b71a846 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien alien.c-types byte-arrays fry
 classes.algebra cpu.architecture kernel math sequences
-math.vectors math.vectors.simd math.vectors.simd.private
+math.vectors math.vectors.simd.intrinsics
 macros generalizations combinators combinators.short-circuit
 arrays locals compiler.tree.propagation.info
 compiler.cfg.builder.blocks
diff --git a/basis/compiler/cfg/value-numbering/simd/simd.factor b/basis/compiler/cfg/value-numbering/simd/simd.factor
index 9bb4453313..16d38bc5bb 100644
--- a/basis/compiler/cfg/value-numbering/simd/simd.factor
+++ b/basis/compiler/cfg/value-numbering/simd/simd.factor
@@ -4,7 +4,7 @@ USING: accessors combinators combinators.short-circuit arrays
 fry kernel layouts math namespaces sequences cpu.architecture
 math.bitwise math.order classes
 vectors locals make alien.c-types io.binary grouping
-math.vectors.simd
+math.vectors.simd.intrinsics
 compiler.cfg
 compiler.cfg.registers
 compiler.cfg.comparisons
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index b71a34e938..679cc09273 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors byte-arrays combinators fry sequences
 compiler.tree.propagation.info cpu.architecture kernel words math
-math.intervals math.vectors.simd math.vectors.simd.private ;
+math.intervals math.vectors.simd.intrinsics ;
 IN: compiler.tree.propagation.simd
 
 {
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index f4d150033b..86d68caa66 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -1,6 +1,6 @@
 ! Copyright (C) 2006, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs generic kernel kernel.private
+USING: accessors alien.c-types arrays assocs generic kernel kernel.private
 math memory namespaces make sequences layouts system hashtables
 classes alien byte-arrays combinators words sets fry ;
 IN: cpu.architecture
@@ -160,6 +160,9 @@ M: ulonglong-scalar-rep rep-size drop 8 ;
 
 GENERIC: rep-component-type ( rep -- n )
 
+: rep-length ( rep -- n )
+    16 swap rep-component-type heap-size /i ; foldable
+
 ! Methods defined in alien.c-types
 
 GENERIC: scalar-rep-of ( rep -- rep' )
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 5d96df8b7f..9265df7343 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,18 +1,12 @@
 USING: accessors alien.c-types arrays byte-arrays classes combinators
 cpu.architecture effects fry functors generalizations generic
 generic.parser kernel lexer literals macros math math.functions
-math.vectors math.vectors.private namespaces parser
+math.vectors math.vectors.private math.vectors.simd.intrinsics namespaces parser
 prettyprint.custom quotations sequences sequences.private vocabs
 vocabs.loader words ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors.simd
 
-DEFER: vconvert
-DEFER: simd-with
-DEFER: simd-boa
-DEFER: simd-cast
-
-ERROR: bad-simd-call word ;
 ERROR: bad-simd-length got expected ;
 
 <<
@@ -30,68 +24,6 @@ PRIVATE>
 
 <PRIVATE
 
-! SIMD intrinsics
-
-: (simd-v+)                ( a b rep -- c ) \ v+ bad-simd-call ;
-: (simd-v-)                ( a b rep -- c ) \ v- bad-simd-call ;
-: (simd-vneg)              ( a   rep -- c ) \ vneg bad-simd-call ;
-: (simd-v+-)               ( a b rep -- c ) \ v+- bad-simd-call ;
-: (simd-vs+)               ( a b rep -- c ) \ vs+ bad-simd-call ;
-: (simd-vs-)               ( a b rep -- c ) \ vs- bad-simd-call ;
-: (simd-vs*)               ( a b rep -- c ) \ vs* bad-simd-call ;
-: (simd-v*)                ( a b rep -- c ) \ v* bad-simd-call ;
-: (simd-v/)                ( a b rep -- c ) \ v/ bad-simd-call ;
-: (simd-vmin)              ( a b rep -- c ) \ vmin bad-simd-call ;
-: (simd-vmax)              ( a b rep -- c ) \ vmax bad-simd-call ;
-: (simd-v.)                ( a b rep -- n ) \ v. bad-simd-call ;
-: (simd-vsqrt)             ( a   rep -- c ) \ vsqrt bad-simd-call ;
-: (simd-sum)               ( a   rep -- n ) \ sum bad-simd-call ;
-: (simd-vabs)              ( a   rep -- c ) \ vabs bad-simd-call ;
-: (simd-vbitand)           ( a b rep -- c ) \ vbitand bad-simd-call ;
-: (simd-vbitandn)          ( a b rep -- c ) \ vbitandn bad-simd-call ;
-: (simd-vbitor)            ( a b rep -- c ) \ vbitor bad-simd-call ;
-: (simd-vbitxor)           ( a b rep -- c ) \ vbitxor bad-simd-call ;
-: (simd-vbitnot)           ( a   rep -- c ) \ vbitnot bad-simd-call ;
-: (simd-vand)              ( a b rep -- c ) \ vand bad-simd-call ;
-: (simd-vandn)             ( a b rep -- c ) \ vandn bad-simd-call ;
-: (simd-vor)               ( a b rep -- c ) \ vor bad-simd-call ;
-: (simd-vxor)              ( a b rep -- c ) \ vxor bad-simd-call ;
-: (simd-vnot)              ( a   rep -- c ) \ vnot bad-simd-call ;
-: (simd-vlshift)           ( a n rep -- c ) \ vlshift bad-simd-call ;
-: (simd-vrshift)           ( a n rep -- c ) \ vrshift bad-simd-call ;
-: (simd-hlshift)           ( a n rep -- c ) \ hlshift bad-simd-call ;
-: (simd-hrshift)           ( a n rep -- c ) \ hrshift bad-simd-call ;
-: (simd-vshuffle-elements) ( a n rep -- c ) \ vshuffle-elements bad-simd-call ;
-: (simd-vshuffle-bytes)    ( a b rep -- c ) \ vshuffle-bytes bad-simd-call ;
-: (simd-vmerge-head)       ( a b rep -- c ) \ (vmerge-head) bad-simd-call ;
-: (simd-vmerge-tail)       ( a b rep -- c ) \ (vmerge-tail) bad-simd-call ;
-: (simd-v<=)               ( a b rep -- c ) \ v<= bad-simd-call ;
-: (simd-v<)                ( a b rep -- c ) \ v< bad-simd-call ;
-: (simd-v=)                ( a b rep -- c ) \ v= bad-simd-call ;
-: (simd-v>)                ( a b rep -- c ) \ v> bad-simd-call ;
-: (simd-v>=)               ( a b rep -- c ) \ v>= bad-simd-call ;
-: (simd-vunordered?)       ( a b rep -- c ) \ vunordered? bad-simd-call ;
-: (simd-vany?)             ( a   rep -- ? ) \ vany? bad-simd-call ;
-: (simd-vall?)             ( a   rep -- ? ) \ vall? bad-simd-call ;
-: (simd-vnone?)            ( a   rep -- ? ) \ vnone? bad-simd-call ;
-: (simd-v>float)           ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-v>integer)         ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vpack-signed)      ( a b rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vpack-unsigned)    ( a b rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vunpack-head)      ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-vunpack-tail)      ( a   rep -- c ) \ vconvert bad-simd-call ;
-: (simd-with)              (   n rep -- v ) \ simd-with bad-simd-call ;
-: (simd-gather-2)          ( m n rep -- v ) \ simd-boa bad-simd-call ;
-: (simd-gather-4)          ( m n o p rep -- v ) \ simd-boa bad-simd-call ;
-: (simd-select)            ( a n rep -- n ) \ nth bad-simd-call ;
-
-PRIVATE>
-
-: alien-vector     (       c-ptr n rep -- value ) \ alien-vector bad-simd-call ;
-: set-alien-vector ( value c-ptr n rep --       ) \ set-alien-vector bad-simd-call ;
-
-<PRIVATE
-
 ! Helper for boolean vector literals
 
 : vector-true-value ( class -- value )
@@ -117,13 +49,6 @@ TUPLE: simd-128
 GENERIC: simd-element-type ( obj -- c-type )
 GENERIC: simd-rep ( simd -- rep )
 
-<<
-: assert-positive ( x -- y ) ;
-
-: rep-length ( rep -- n )
-    16 swap rep-component-type heap-size /i ; foldable
->>
-
 <<
 <PRIVATE
 
@@ -216,7 +141,25 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline
 M: A hashcode* underlying>> hashcode* ; inline
 M: A clone [ clone ] change-underlying ; inline
 M: A length drop N ; inline
-M: A nth-unsafe swap \ A-rep (simd-select) ; inline
+M: A nth-unsafe
+    swap {
+        {  0 [  0 \ A-rep (simd-select) ] }
+        {  1 [  1 \ A-rep (simd-select) ] }
+        {  2 [  2 \ A-rep (simd-select) ] }
+        {  3 [  3 \ A-rep (simd-select) ] }
+        {  4 [  4 \ A-rep (simd-select) ] }
+        {  5 [  5 \ A-rep (simd-select) ] }
+        {  6 [  6 \ A-rep (simd-select) ] }
+        {  7 [  7 \ A-rep (simd-select) ] }
+        {  8 [  8 \ A-rep (simd-select) ] }
+        {  9 [  9 \ A-rep (simd-select) ] }
+        { 10 [ 10 \ A-rep (simd-select) ] }
+        { 11 [ 11 \ A-rep (simd-select) ] }
+        { 12 [ 12 \ A-rep (simd-select) ] }
+        { 13 [ 13 \ A-rep (simd-select) ] }
+        { 14 [ 14 \ A-rep (simd-select) ] }
+        { 15 [ 15 \ A-rep (simd-select) ] }
+    } case ; inline 
 M: A c:byte-length drop 16 ; inline
 
 M: A new-sequence
@@ -353,10 +296,6 @@ SIMD-128: double-2
 M: simd-128 vshuffle ( u perm -- v )
     vshuffle-bytes ; inline
 
-"compiler.tree.propagation.simd" require
-"compiler.cfg.intrinsics.simd" require
-"compiler.cfg.value-numbering.simd" require
-
 "mirrors" vocab [
     "math.vectors.simd.mirrors" require
 ] when

From 7f546daead278cf3b293dd24b57c9321bd45996b Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 19 Nov 2009 12:15:24 -0800
Subject: [PATCH 26/46] fill in longlong cases for %scalar>integer (won't work
 on 32bit yet!)

---
 basis/cpu/x86/x86.factor | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index a55489dcee..b752935f9c 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -1323,8 +1323,15 @@ M: x86 %shr-vector-imm-reps %shr-vector-reps ;
 
 M: x86 %integer>scalar drop MOVD ;
 
+! XXX the longlong versions won't work on x86.32
 M:: x86 %scalar>integer ( dst src rep -- )
     rep {
+        { longlong-scalar-rep [
+            dst src MOVD
+        ] }
+        { ulonglong-scalar-rep [
+            dst src MOVD
+        ] }
         { int-scalar-rep [
             dst 32-bit-version-of src MOVD
             dst dst 32-bit-version-of

From 6794d1465245b3d6323b559504e9aad18601d21b Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 19 Nov 2009 12:17:14 -0800
Subject: [PATCH 27/46] typo

---
 basis/math/vectors/simd/simd.factor | 84 ++++++++++++++---------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 9265df7343..0e40e787df 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -172,48 +172,48 @@ M: A equal?
 
 ! SIMD primitive operations
 
-M: A v+                \ A [ (simd-v+)                ] [ call-next-method ] vv->v-op ; inline
-M: A v-                \ A [ (simd-v-)                ] [ call-next-method ] vv->v-op ; inline
-M: A vneg              \ A [ (simd-vneg)              ] [ call-next-method ] v->v-op  ; inline
-M: A v+-               \ A [ (simd-v+-)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs+               \ A [ (simd-vs+)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs-               \ A [ (simd-vs-)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs*               \ A [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
-M: A v*                \ A [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
-M: A v/                \ A [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
-M: A vmin              \ A [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
-M: A vmax              \ A [ (simd-vmax)              ] [ call-next-method ] vv->v-op ; inline
-M: A v.                \ A [ (simd-v.)                ] [ call-next-method ] vv->n-op ; inline
-M: A vsqrt             \ A [ (simd-vsqrt)             ] [ call-next-method ] v->v-op  ; inline
-M: A sum               \ A [ (simd-sum)               ] [ call-next-method ] v->n-op  ; inline
-M: A vabs              \ A [ (simd-vabs)              ] [ call-next-method ] v->v-op  ; inline
-M: A vbitand           \ A [ (simd-vbitand)           ] [ call-next-method ] vv->v-op ; inline
-M: A vbitandn          \ A [ (simd-vbitandn)          ] [ call-next-method ] vv->v-op ; inline
-M: A vbitor            \ A [ (simd-vbitor)            ] [ call-next-method ] vv->v-op ; inline
-M: A vbitxor           \ A [ (simd-vbitxor)           ] [ call-next-method ] vv->v-op ; inline
-M: A vbitnot           \ A [ (simd-vbitnot)           ] [ call-next-method ] v->v-op  ; inline
-M: A vand              \ A [ (simd-vand)              ] [ call-next-method ] vv->v-op ; inline
-M: A vandn             \ A [ (simd-vandn)             ] [ call-next-method ] vv->v-op ; inline
-M: A vor               \ A [ (simd-vor)               ] [ call-next-method ] vv->v-op ; inline
-M: A vxor              \ A [ (simd-vxor)              ] [ call-next-method ] vv->v-op ; inline
-M: A vnot              \ A [ (simd-vnot)              ] [ call-next-method ] v->v-op  ; inline
-M: A vlshift           \ A [ (simd-vlshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A vrshift           \ A [ (simd-vrshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A hlshift           \ A [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A hrshift           \ A [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-elements \ A [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-bytes    \ A [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv->v-op ; inline
-M: A (vmerge-head)     \ A [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
-M: A (vmerge-tail)     \ A [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
-M: A v<=               \ A [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
-M: A v<                \ A [ (simd-v<)                ] [ call-next-method ] vv->v-op ; inline
-M: A v=                \ A [ (simd-v=)                ] [ call-next-method ] vv->v-op ; inline
-M: A v>                \ A [ (simd-v>)                ] [ call-next-method ] vv->v-op ; inline
-M: A v>=               \ A [ (simd-v>=)               ] [ call-next-method ] vv->v-op ; inline
-M: A vunordered?       \ A [ (simd-vunordered?)       ] [ call-next-method ] vv->v-op ; inline
-M: A vany?             \ A [ (simd-vany?)             ] [ call-next-method ] v->n-op  ; inline
-M: A vall?             \ A [ (simd-vall?)             ] [ call-next-method ] v->n-op  ; inline
-M: A vnone?            \ A [ (simd-vnone?)            ] [ call-next-method ] v->n-op  ; inline
+M: A v+                \ A-rep [ (simd-v+)                ] [ call-next-method ] vv->v-op ; inline
+M: A v-                \ A-rep [ (simd-v-)                ] [ call-next-method ] vv->v-op ; inline
+M: A vneg              \ A-rep [ (simd-vneg)              ] [ call-next-method ] v->v-op  ; inline
+M: A v+-               \ A-rep [ (simd-v+-)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs+               \ A-rep [ (simd-vs+)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs-               \ A-rep [ (simd-vs-)               ] [ call-next-method ] vv->v-op ; inline
+M: A vs*               \ A-rep [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
+M: A v*                \ A-rep [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
+M: A v/                \ A-rep [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
+M: A vmin              \ A-rep [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
+M: A vmax              \ A-rep [ (simd-vmax)              ] [ call-next-method ] vv->v-op ; inline
+M: A v.                \ A-rep [ (simd-v.)                ] [ call-next-method ] vv->n-op ; inline
+M: A vsqrt             \ A-rep [ (simd-vsqrt)             ] [ call-next-method ] v->v-op  ; inline
+M: A sum               \ A-rep [ (simd-sum)               ] [ call-next-method ] v->n-op  ; inline
+M: A vabs              \ A-rep [ (simd-vabs)              ] [ call-next-method ] v->v-op  ; inline
+M: A vbitand           \ A-rep [ (simd-vbitand)           ] [ call-next-method ] vv->v-op ; inline
+M: A vbitandn          \ A-rep [ (simd-vbitandn)          ] [ call-next-method ] vv->v-op ; inline
+M: A vbitor            \ A-rep [ (simd-vbitor)            ] [ call-next-method ] vv->v-op ; inline
+M: A vbitxor           \ A-rep [ (simd-vbitxor)           ] [ call-next-method ] vv->v-op ; inline
+M: A vbitnot           \ A-rep [ (simd-vbitnot)           ] [ call-next-method ] v->v-op  ; inline
+M: A vand              \ A-rep [ (simd-vand)              ] [ call-next-method ] vv->v-op ; inline
+M: A vandn             \ A-rep [ (simd-vandn)             ] [ call-next-method ] vv->v-op ; inline
+M: A vor               \ A-rep [ (simd-vor)               ] [ call-next-method ] vv->v-op ; inline
+M: A vxor              \ A-rep [ (simd-vxor)              ] [ call-next-method ] vv->v-op ; inline
+M: A vnot              \ A-rep [ (simd-vnot)              ] [ call-next-method ] v->v-op  ; inline
+M: A vlshift           \ A-rep [ (simd-vlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A vrshift           \ A-rep [ (simd-vrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A hlshift           \ A-rep [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A hrshift           \ A-rep [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
+M: A vshuffle-bytes    \ A-rep [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv->v-op ; inline
+M: A (vmerge-head)     \ A-rep [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
+M: A (vmerge-tail)     \ A-rep [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
+M: A v<=               \ A-rep [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
+M: A v<                \ A-rep [ (simd-v<)                ] [ call-next-method ] vv->v-op ; inline
+M: A v=                \ A-rep [ (simd-v=)                ] [ call-next-method ] vv->v-op ; inline
+M: A v>                \ A-rep [ (simd-v>)                ] [ call-next-method ] vv->v-op ; inline
+M: A v>=               \ A-rep [ (simd-v>=)               ] [ call-next-method ] vv->v-op ; inline
+M: A vunordered?       \ A-rep [ (simd-vunordered?)       ] [ call-next-method ] vv->v-op ; inline
+M: A vany?             \ A-rep [ (simd-vany?)             ] [ call-next-method ] v->n-op  ; inline
+M: A vall?             \ A-rep [ (simd-vall?)             ] [ call-next-method ] v->n-op  ; inline
+M: A vnone?            \ A-rep [ (simd-vnone?)            ] [ call-next-method ] v->n-op  ; inline
 
 ! SIMD high-level specializations
 

From d94ffe6d786eaff0c2497e303826ded907b80e53 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Mon, 23 Nov 2009 21:24:55 -0800
Subject: [PATCH 28/46] scalar fallbacks for simd intrinsics

---
 .../tree/propagation/simd/simd.factor         | 133 +++++-----
 basis/cpu/architecture/architecture.factor    |  15 +-
 .../vectors/simd/intrinsics/intrinsics.factor | 232 ++++++++++++++++++
 basis/math/vectors/simd/simd-docs.factor      |   3 +-
 basis/math/vectors/simd/simd.factor           |   3 +
 5 files changed, 327 insertions(+), 59 deletions(-)
 create mode 100644 basis/math/vectors/simd/intrinsics/intrinsics.factor

diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 679cc09273..93eb2a8ecc 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -1,57 +1,76 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays combinators fry sequences
-compiler.tree.propagation.info cpu.architecture kernel words math
+USING: accessors byte-arrays combinators continuations fry sequences
+compiler.tree.propagation.info cpu.architecture kernel words make math
 math.intervals math.vectors.simd.intrinsics ;
 IN: compiler.tree.propagation.simd
 
-{
-    (simd-v+)
-    (simd-v-)
-    (simd-vneg)
-    (simd-vabs)
-    (simd-v+-)
-    (simd-v*)
-    (simd-v/)
-    (simd-vmin)
-    (simd-vmax)
-    (simd-sum)
-    (simd-vsqrt)
-    (simd-vbitand)
-    (simd-vbitandn)
-    (simd-vbitor)
-    (simd-vbitxor)
-    (simd-vbitnot)
-    (simd-vand)
-    (simd-vandn)
-    (simd-vor)
-    (simd-vxor)
-    (simd-vnot)
-    (simd-vlshift)
-    (simd-vrshift)
-    (simd-hlshift)
-    (simd-hrshift)
-    (simd-vshuffle-bytes)
-    (simd-vshuffle-elements)
-    (simd-vmerge-head)
-    (simd-vmerge-tail)
-    (simd-v>float)
-    (simd-v>integer)
-    (simd-vpack-signed)
-    (simd-vpack-unsigned)
-    (simd-vunpack-head)
-    (simd-vunpack-tail)
-    (simd-v<=)
-    (simd-v<)
-    (simd-v=)
-    (simd-v>)
-    (simd-v>=)
-    (simd-vunordered?)
-    (simd-with)
-    (simd-gather-2)
-    (simd-gather-4)
-    alien-vector
-} [ { byte-array } "default-output-classes" set-word-prop ] each
+CONSTANT: vector>vector-intrinsics
+    {
+        (simd-v+)
+        (simd-v-)
+        (simd-vneg)
+        (simd-v+-)
+        (simd-vs+)
+        (simd-vs-)
+        (simd-vs*)
+        (simd-v*)
+        (simd-v/)
+        (simd-vmin)
+        (simd-vmax)
+        (simd-vsqrt)
+        (simd-vabs)
+        (simd-vbitand)
+        (simd-vbitandn)
+        (simd-vbitor)
+        (simd-vbitxor)
+        (simd-vbitnot)
+        (simd-vand)
+        (simd-vandn)
+        (simd-vor)
+        (simd-vxor)
+        (simd-vnot)
+        (simd-vlshift)
+        (simd-vrshift)
+        (simd-hlshift)
+        (simd-hrshift)
+        (simd-vshuffle-elements)
+        (simd-vshuffle-bytes)
+        (simd-vmerge-head)
+        (simd-vmerge-tail)
+        (simd-v<=)
+        (simd-v<)
+        (simd-v=)
+        (simd-v>)
+        (simd-v>=)
+        (simd-vunordered?)
+        (simd-v>float)
+        (simd-v>integer)
+        (simd-vpack-signed)
+        (simd-vpack-unsigned)
+        (simd-vunpack-head)
+        (simd-vunpack-tail)
+        (simd-with)
+        (simd-gather-2)
+        (simd-gather-4)
+        alien-vector
+    }
+
+CONSTANT: vector-other-intrinsics
+    {
+        (simd-v.)
+        (simd-sum)
+        (simd-vany?)
+        (simd-vall?)
+        (simd-vnone?)
+        (simd-select)
+        set-alien-vector
+    }
+
+: vector-intrinsics ( -- x )
+    vector>vector-intrinsics vector-other-intrinsics append ;
+
+vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop ] each
 
 : scalar-output-class ( rep -- class )
     dup literal?>> [
@@ -79,12 +98,16 @@ IN: compiler.tree.propagation.simd
     real [0,inf] <class/interval-info> value-info-intersect
 ] "outputs" set-word-prop
 
-! If SIMD is not available, inline alien-vector and set-alien-vector
-! to get a speedup
+: try-intrinsic ( node intrinsic-quot -- ? )
+    '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ;
+
 : inline-unless-intrinsic ( word -- )
-    dup '[ drop _ dup "intrinsic" word-prop [ drop f ] [ def>> ] if ]
+    dup '[
+        _ swap over "intrinsic" word-prop
+        ! word node intrinsic
+        [ try-intrinsic [ drop f ] [ def>> ] if ]
+        [ def>> ] if*
+    ]
     "custom-inlining" set-word-prop ;
 
-\ alien-vector inline-unless-intrinsic
-
-\ set-alien-vector inline-unless-intrinsic
+vector-intrinsics [ inline-unless-intrinsic ] each
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 86d68caa66..6631cec189 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -105,7 +105,7 @@ scalar-rep ;
         { ushort-scalar-rep    short-scalar-rep }
         { uint-scalar-rep      int-scalar-rep }
         { ulonglong-scalar-rep longlong-scalar-rep }
-    } ?at drop ;
+    } ?at drop ; foldable
 
 : widen-vector-rep ( rep -- rep' )
     {
@@ -116,7 +116,18 @@ scalar-rep ;
         { ushort-8-rep    uint-4-rep      }
         { uint-4-rep      ulonglong-2-rep }
         { float-4-rep     double-2-rep    }
-    } at ;
+    } at ; foldable
+
+: narrow-vector-rep ( rep -- rep' )
+    {
+        { short-8-rep     char-16-rep     }
+        { int-4-rep       short-8-rep     }
+        { longlong-2-rep  int-4-rep       }
+        { ushort-8-rep    uchar-16-rep    }
+        { uint-4-rep      ushort-8-rep    }
+        { ulonglong-2-rep uint-4-rep      }
+        { double-2-rep    float-4-rep     }
+    } at ; foldable
 
 ! Register classes
 SINGLETONS: int-regs float-regs ;
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
new file mode 100644
index 0000000000..a236db00c9
--- /dev/null
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -0,0 +1,232 @@
+! (c)2009 Slava Pestov, Joe Groff bsd license
+USING: accessors alien alien.c-types alien.data combinators
+cords cpu.architecture fry generalizations kernel libc locals
+math math.libm math.order math.ranges math.vectors sequences
+sequences.private specialized-arrays vocabs.loader ;
+QUALIFIED-WITH: alien.c-types c
+SPECIALIZED-ARRAYS:
+    c:char c:short c:int c:longlong
+    c:uchar c:ushort c:uint c:ulonglong
+    c:float c:double ;
+IN: math.vectors.simd.intrinsics
+
+: assert-positive ( x -- y ) ;
+
+<PRIVATE
+
+: >bitwise-vector-rep ( rep -- rep' )
+    {
+        { float-4-rep    [ uint-4-rep      ] }
+        { double-2-rep   [ ulonglong-2-rep ] }
+        [ ]
+    } case ; foldable
+
+: >uint-vector-rep ( rep -- rep' )
+    {
+        { longlong-2-rep [ ulonglong-2-rep ] }
+        { int-4-rep      [ uint-4-rep      ] }
+        { short-8-rep    [ ushort-8-rep    ] }
+        { char-16-rep    [ uchar-16-rep    ] }
+        [ ]
+    } case ; foldable
+
+: >int-vector-rep ( rep -- rep' )
+    {
+        { float-4-rep  [ int-4-rep      ] }
+        { double-2-rep [ longlong-2-rep ] }
+    } case ; foldable
+
+: >float-vector-rep ( rep -- rep' )
+    {
+        { int-4-rep      [ float-4-rep  ] }
+        { longlong-2-rep [ double-2-rep ] }
+    } case ; foldable
+
+: [byte>rep-array] ( rep -- class )
+    {
+        { char-16-rep      [ [ byte-array>char-array      ] ] }
+        { uchar-16-rep     [ [ byte-array>uchar-array     ] ] }
+        { short-8-rep      [ [ byte-array>short-array     ] ] }
+        { ushort-8-rep     [ [ byte-array>ushort-array    ] ] }
+        { int-4-rep        [ [ byte-array>int-array       ] ] }
+        { uint-4-rep       [ [ byte-array>uint-array      ] ] }
+        { longlong-2-rep   [ [ byte-array>longlong-array  ] ] }
+        { ulonglong-2-rep  [ [ byte-array>ulonglong-array ] ] }
+        { float-4-rep      [ [ byte-array>float-array     ] ] }
+        { double-2-rep     [ [ byte-array>double-array    ] ] }
+    } case ; foldable
+
+: [>rep-array] ( rep -- class )
+    {
+        { char-16-rep      [ [ >char-array      ] ] }
+        { uchar-16-rep     [ [ >uchar-array     ] ] }
+        { short-8-rep      [ [ >short-array     ] ] }
+        { ushort-8-rep     [ [ >ushort-array    ] ] }
+        { int-4-rep        [ [ >int-array       ] ] }
+        { uint-4-rep       [ [ >uint-array      ] ] }
+        { longlong-2-rep   [ [ >longlong-array  ] ] }
+        { ulonglong-2-rep  [ [ >ulonglong-array ] ] }
+        { float-4-rep      [ [ >float-array     ] ] }
+        { double-2-rep     [ [ >double-array    ] ] }
+    } case ; foldable
+
+: [<rep-array>] ( rep -- class )
+    {
+        { char-16-rep      [ [ 16 (char-array)      ] ] }
+        { uchar-16-rep     [ [ 16 (uchar-array)     ] ] }
+        { short-8-rep      [ [  8 (short-array)     ] ] }
+        { ushort-8-rep     [ [  8 (ushort-array)    ] ] }
+        { int-4-rep        [ [  4 (int-array)       ] ] }
+        { uint-4-rep       [ [  4 (uint-array)      ] ] }
+        { longlong-2-rep   [ [  2 (longlong-array)  ] ] }
+        { ulonglong-2-rep  [ [  2 (ulonglong-array) ] ] }
+        { float-4-rep      [ [  4 (float-array)     ] ] }
+        { double-2-rep     [ [  2 (double-array)    ] ] }
+    } case ; foldable
+
+: rep-tf-values ( rep -- t f )
+    float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
+
+: >rep-array ( a rep -- a' )
+    [byte>rep-array] call( a -- a' ) ; inline
+: 2>rep-array ( a b rep -- a' b' )
+    [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
+: <rep-array> ( rep -- a' )
+    [<rep-array>] call( -- a' ) ; inline
+
+: components-map ( a rep quot -- c )
+    [ >rep-array ] dip map underlying>> ; inline
+: components-2map ( a b rep quot -- c )
+    [ 2>rep-array ] dip 2map underlying>> ; inline
+: components-reduce ( a rep quot -- x )
+    [ >rep-array [ ] ] dip map-reduce ; inline
+
+: bitwise-components-map ( a rep quot -- c )
+    [ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
+: bitwise-components-2map ( a b rep quot -- c )
+    [ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
+: bitwise-components-reduce ( a rep quot -- x )
+    [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
+
+:: (vshuffle) ( a elts rep -- c )
+    a rep >rep-array :> a'
+    rep <rep-array> :> c'
+    elts [| from to |
+        from a' nth-unsafe
+        rep rep-length 1 - bitand
+        to c' set-nth-unsafe
+    ] each-index
+    c' underlying>> ; inline
+
+PRIVATE>
+
+: (simd-v+)                ( a b rep -- c ) [ + ] components-2map ;
+: (simd-v-)                ( a b rep -- c ) [ - ] components-2map ;
+: (simd-vneg)              ( a   rep -- c ) [ neg ] components-map ;
+:: (simd-v+-)              ( a b rep -- c ) 
+    a b rep 2>rep-array :> ( a' b' )
+    rep <rep-array> :> c'
+    0  rep length 1 -  2 <range> [| n |
+        n     a' nth-unsafe n     b' nth-unsafe -
+        n     c' set-nth-unsafe
+
+        n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
+        n 1 + c' set-nth-unsafe
+    ] each
+    c' underlying>> ;
+: (simd-vs+)               ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ;
+: (simd-vs-)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
+: (simd-vs*)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
+: (simd-v*)                ( a b rep -- c ) [ * ] components-2map ;
+: (simd-v/)                ( a b rep -- c ) [ / ] components-2map ;
+: (simd-vmin)              ( a b rep -- c ) [ min ] components-2map ;
+: (simd-vmax)              ( a b rep -- c ) [ max ] components-2map ;
+: (simd-v.)                ( a b rep -- n )
+    [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
+    1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
+: (simd-vsqrt)             ( a   rep -- c ) [ fsqrt ] components-map ;
+: (simd-sum)               ( a   rep -- n ) [ + ] components-reduce ;
+: (simd-vabs)              ( a   rep -- c ) [ abs ] components-map ;
+: (simd-vbitand)           ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
+: (simd-vbitandn)          ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
+: (simd-vbitor)            ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
+: (simd-vbitxor)           ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
+: (simd-vbitnot)           ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
+: (simd-vand)              ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
+: (simd-vandn)             ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
+: (simd-vor)               ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
+: (simd-vxor)              ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
+: (simd-vnot)              ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
+: (simd-vlshift)           ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
+: (simd-vrshift)           ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
+: (simd-hlshift)           ( a n rep -- c )
+    drop tail-slice 16 0 pad-tail ;
+: (simd-hrshift)           ( a n rep -- c )
+    drop head-slice 16 0 pad-head ;
+: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
+: (simd-vshuffle-bytes)    ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
+:: (simd-vmerge-head)      ( a b rep -- c )
+    a b rep 2>rep-array :> ( a' b' )
+    rep <rep-array> :> c'
+    rep rep-length 2 /i iota [| n |
+        n a' nth-unsafe n 2 *     c' set-nth-unsafe
+        n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
+    ] each
+    c' underlying>> ;
+:: (simd-vmerge-tail)      ( a b rep -- c )
+    a b rep 2>rep-array :> ( a' b' )
+    rep <rep-array> :> c'
+    rep rep-length 2 /i :> len
+    len iota [| n |
+        n len + a' nth-unsafe n 2 *     c' set-nth-unsafe
+        n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
+    ] each
+    c' underlying>> ;
+: (simd-v<=)               ( a b rep -- c )
+    dup rep-tf-values '[ <= _ _ ? ] components-2map ; 
+: (simd-v<)                ( a b rep -- c )
+    dup rep-tf-values '[ <  _ _ ? ] components-2map ;
+: (simd-v=)                ( a b rep -- c )
+    dup rep-tf-values '[ =  _ _ ? ] components-2map ;
+: (simd-v>)                ( a b rep -- c )
+    dup rep-tf-values '[ >  _ _ ? ] components-2map ;
+: (simd-v>=)               ( a b rep -- c )
+    dup rep-tf-values '[ >= _ _ ? ] components-2map ;
+: (simd-vunordered?)       ( a b rep -- c )
+    dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
+: (simd-vany?)             ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero? not ;
+: (simd-vall?)             ( a   rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
+: (simd-vnone?)            ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero?     ;
+: (simd-v>float)           ( a   rep -- c )
+    [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ;
+: (simd-v>integer)         ( a   rep -- c )
+    [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ;
+: (simd-vpack-signed)      ( a b rep -- c )
+    [ 2>rep-array cord-append ]
+    [ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
+    '[ _ c-type-clamp ] swap map-as ;
+: (simd-vpack-unsigned)    ( a b rep -- c )
+    [ 2>rep-array cord-append ]
+    [ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
+    '[ _ c-type-clamp ] swap map-as ;
+: (simd-vunpack-head)      ( a   rep -- c ) 
+    [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
+    [ head-slice ] dip call( a' -- c' ) underlying>> ;
+: (simd-vunpack-tail)      ( a   rep -- c )
+    [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
+    [ tail-slice ] dip call( a' -- c' ) underlying>> ;
+: (simd-with)              (   n rep -- v )
+    [ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as ;
+: (simd-gather-2)          ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
+: (simd-gather-4)          ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
+: (simd-select)            ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
+
+: alien-vector     (       c-ptr n rep -- value )
+    [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
+: set-alien-vector ( value c-ptr n rep --       )
+    [ swap <displaced-alien> swap ] dip rep-size memcpy ;
+
+"compiler.cfg.intrinsics.simd" require
+"compiler.tree.propagation.simd" require
+"compiler.cfg.value-numbering.simd" require
+
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index d600b0bc24..8aeea4267d 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -1,5 +1,5 @@
 USING: classes.tuple.private cpu.architecture help.markup
-help.syntax kernel.private math math.vectors
+help.syntax kernel.private math math.vectors math.vectors.simd.intrinsics
 sequences ;
 IN: math.vectors.simd
 
@@ -163,7 +163,6 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
 { $list
     "They operate on raw byte arrays, with a separate “representation” parameter passed in to determine the type of the operands and result."
     "They are unsafe; passing values which are not byte arrays, or byte arrays with the wrong size, will dereference invalid memory and possibly crash Factor."
-    { "They do not have software fallbacks; if the current CPU does not have SIMD support, a " { $link bad-simd-call } " error will be thrown." }
 }
 "The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD."
 $nl
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 0e40e787df..5289f3f393 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -95,6 +95,9 @@ DEFER: simd-construct-op
 PRIVATE>
 >>
 
+DEFER: simd-with
+DEFER: simd-cast
+
 <<
 <PRIVATE
 

From 65d8060075c64883865fe98bf9159ade8c19a7ab Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 11:37:28 -0800
Subject: [PATCH 29/46] fix simd intrinsic compilation

---
 basis/compiler/cfg/builder/builder.factor        |  6 ++++++
 .../cfg/intrinsics/simd/backend/backend.factor   |  4 +++-
 basis/compiler/cfg/intrinsics/simd/simd.factor   | 16 ++++++++--------
 basis/compiler/tree/propagation/simd/simd.factor | 16 ++++++++++++----
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/basis/compiler/cfg/builder/builder.factor b/basis/compiler/cfg/builder/builder.factor
index cf6215c5cd..f1b3447fc7 100755
--- a/basis/compiler/cfg/builder/builder.factor
+++ b/basis/compiler/cfg/builder/builder.factor
@@ -45,6 +45,12 @@ SYMBOL: loops
         end-stack-analysis
     ] with-scope ; inline
 
+: with-dummy-cfg-builder ( node quot -- )
+    [
+        [ V{ } clone procedures ] 2dip
+        '[ _ t t [ _ call( node -- ) ] with-cfg-builder ] with-variable
+    ] { } make drop ;
+
 GENERIC: emit-node ( node -- )
 
 : emit-nodes ( nodes -- )
diff --git a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
index 1a229672b9..cf61a560d2 100644
--- a/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
+++ b/basis/compiler/cfg/intrinsics/simd/backend/backend.factor
@@ -151,6 +151,8 @@ MACRO: check-elements ( quots -- )
     [ length 1 - \ and <repetition> [ ] like ]
     tri 3append ;
 
+ERROR: bad-simd-intrinsic node ;
+
 MACRO: if-literals-match ( quots -- )
     [ length ] [ ] [ length ] tri
     ! n quots n
@@ -165,7 +167,7 @@ MACRO: if-literals-match ( quots -- )
             ! node literals quot
             [ _ firstn ] dip call
             drop
-        ] [ 2drop emit-primitive ] if
+        ] [ 2drop bad-simd-intrinsic ] if
     ] ;
 
 CONSTANT: [unary]       [ ds-drop  ds-pop ]
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 5f4b71a846..a96a0b7cb3 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -77,14 +77,6 @@ IN: compiler.cfg.intrinsics.simd
         [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
     } v-vector-op ;
 
-:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
-    cc order-cc {
-        { cc<  [ src1 src2 rep ^^max-vector src1 rep cc/= ^^compare-vector ] }
-        { cc<= [ src1 src2 rep ^^min-vector src1 rep cc=  ^^compare-vector ] }
-        { cc>  [ src1 src2 rep ^^min-vector src1 rep cc/= ^^compare-vector ] }
-        { cc>= [ src1 src2 rep ^^max-vector src1 rep cc=  ^^compare-vector ] }
-    } case ;
-
 :: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
     {cc,swap} first2 :> ( cc swap? )
     swap?
@@ -107,6 +99,14 @@ IN: compiler.cfg.intrinsics.simd
         not? [ rep ^not-vector ] when
     ] if ;
 
+:: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
+    cc order-cc {
+        { cc<  [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
+        { cc<= [ src1 src2 rep ^^min-vector src1 rep cc=  ^(compare-vector) ] }
+        { cc>  [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
+        { cc>= [ src1 src2 rep ^^max-vector src1 rep cc=  ^(compare-vector) ] }
+    } case ;
+
 : ^compare-vector ( src1 src2 rep cc -- dst )
     {
         [ ^(compare-vector) ]
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 93eb2a8ecc..6002b15c1c 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -1,8 +1,9 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays combinators continuations fry sequences
-compiler.tree.propagation.info cpu.architecture kernel words make math
-math.intervals math.vectors.simd.intrinsics ;
+USING: accessors assocs byte-arrays combinators compiler.cfg.builder
+continuations fry sequences compiler.tree.propagation.info
+cpu.architecture kernel words make math math.intervals
+math.vectors.simd.intrinsics ;
 IN: compiler.tree.propagation.simd
 
 CONSTANT: vector>vector-intrinsics
@@ -98,8 +99,15 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop
     real [0,inf] <class/interval-info> value-info-intersect
 ] "outputs" set-word-prop
 
+: clone-with-value-infos ( node -- node' )
+    clone dup in-d>> [ dup value-info ] H{ } map>assoc >>info ;
+
 : try-intrinsic ( node intrinsic-quot -- ? )
-    '[ [ _ call( node -- ) ] { } make drop t ] [ 2drop f ] recover ;
+    '[
+        _ clone-with-value-infos
+        _ with-dummy-cfg-builder
+        t
+    ] [ drop f ] recover ;
 
 : inline-unless-intrinsic ( word -- )
     dup '[

From 9c388bf7813cf2f291c0eea22a24cd796e2ce8b0 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 12:50:27 -0800
Subject: [PATCH 30/46] update compiler.cfg.intrinsics.simd tests

---
 .../cfg/intrinsics/simd/simd-tests.factor      | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
index 2d38e541a4..c2e233902e 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@@ -1,10 +1,11 @@
 ! (c)2009 Joe Groff bsd license
 USING: arrays assocs biassocs byte-arrays byte-arrays.hex
 classes compiler.cfg compiler.cfg.comparisons compiler.cfg.instructions
-compiler.cfg.intrinsics.simd compiler.cfg.registers
-compiler.cfg.stacks.height compiler.cfg.stacks.local compiler.tree
-compiler.tree.propagation.info cpu.architecture fry hashtables kernel
-locals make namespaces sequences system tools.test words ;
+compiler.cfg.intrinsics.simd compiler.cfg.intrinsics.simd.backend
+compiler.cfg.registers compiler.cfg.stacks.height
+compiler.cfg.stacks.local compiler.tree compiler.tree.propagation.info
+cpu.architecture fry hashtables kernel locals make namespaces sequences
+system tools.test words ;
 IN: compiler.cfg.intrinsics.simd.tests
 
 :: test-node ( rep -- node ) 
@@ -532,15 +533,12 @@ unit-test
 unit-test
 
 ! test with nonliteral/invalid reps
-[ { ##inc-d ##branch } ]
 [ simple-ops-cpu [ emit-simd-v+ ] test-emit-nonliteral-rep ]
-unit-test
+[ bad-simd-intrinsic? ] must-fail-with
 
-[ { ##branch } ]
 [ simple-ops-cpu f [ emit-simd-v+ ] test-emit ]
-unit-test
+[ bad-simd-intrinsic? ] must-fail-with
 
-[ { ##branch } ]
 [ simple-ops-cpu 3 [ emit-simd-v+ ] test-emit ]
-unit-test
+[ bad-simd-intrinsic? ] must-fail-with
 

From c98eb8494337e128263b7f8cdbf0fa0db2011623 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 18:30:12 -0800
Subject: [PATCH 31/46] make math.vectors.simd tests pass again

---
 .../compiler/cfg/intrinsics/simd/simd.factor  |  9 +-
 .../vectors/simd/intrinsics/intrinsics.factor | 28 +++---
 basis/math/vectors/simd/simd-tests.factor     |  9 +-
 basis/math/vectors/simd/simd.factor           | 91 +++++++------------
 basis/math/vectors/vectors.factor             |  6 --
 5 files changed, 57 insertions(+), 86 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index a96a0b7cb3..109ac6ce8e 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -253,14 +253,15 @@ IN: compiler.cfg.intrinsics.simd
             src rep ^unpack-vector-head :> head
             src rep ^unpack-vector-tail :> tail
             rep widen-vector-rep :> wide-rep
-            head tail wide-rep ^^add-vector wide-rep ^(sum-vector)
+            head tail wide-rep ^^add-vector wide-rep
+            ^(sum-vector)
         ] }
     } v-vector-op ;
 
 : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
 
-: ^shuffle-vector-imm ( src1 src2 rep -- dst )
-    {
+: ^shuffle-vector-imm ( src1 shuffle rep -- dst )
+    [ rep-length 0 pad-tail ] keep {
         [ ^^shuffle-vector-imm ]
         [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
     } vl-vector-op ;
@@ -358,7 +359,7 @@ IN: compiler.cfg.intrinsics.simd
 : emit-simd-v. ( node -- )
     {
         [ ^^dot-vector ]
-        [ [ ^^mul-vector ] [ ^sum-vector ] bi ]
+        { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
     } emit-vv-vector-op ;
 
 : emit-simd-vsqrt ( node -- )
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
index a236db00c9..187c6db586 100644
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -112,8 +112,8 @@ IN: math.vectors.simd.intrinsics
     a rep >rep-array :> a'
     rep <rep-array> :> c'
     elts [| from to |
-        from a' nth-unsafe
-        rep rep-length 1 - bitand
+        from rep rep-length 1 - bitand
+           a' nth-unsafe
         to c' set-nth-unsafe
     ] each-index
     c' underlying>> ; inline
@@ -134,9 +134,12 @@ PRIVATE>
         n 1 + c' set-nth-unsafe
     ] each
     c' underlying>> ;
-: (simd-vs+)               ( a b rep -- c ) dup '[ + _ c-type-clamp ] components-2map ;
-: (simd-vs-)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
-: (simd-vs*)               ( a b rep -- c ) dup '[ - _ c-type-clamp ] components-2map ;
+: (simd-vs+)               ( a b rep -- c )
+    dup rep-component-type '[ + _ c-type-clamp ] components-2map ;
+: (simd-vs-)               ( a b rep -- c )
+    dup rep-component-type '[ - _ c-type-clamp ] components-2map ;
+: (simd-vs*)               ( a b rep -- c )
+    dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
 : (simd-v*)                ( a b rep -- c ) [ * ] components-2map ;
 : (simd-v/)                ( a b rep -- c ) [ / ] components-2map ;
 : (simd-vmin)              ( a b rep -- c ) [ min ] components-2map ;
@@ -160,9 +163,9 @@ PRIVATE>
 : (simd-vlshift)           ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
 : (simd-vrshift)           ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
 : (simd-hlshift)           ( a n rep -- c )
-    drop tail-slice 16 0 pad-tail ;
+    drop head-slice* 16 0 pad-head ;
 : (simd-hrshift)           ( a n rep -- c )
-    drop head-slice 16 0 pad-head ;
+    drop tail-slice 16 0 pad-tail ;
 : (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
 : (simd-vshuffle-bytes)    ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
 :: (simd-vmerge-head)      ( a b rep -- c )
@@ -198,17 +201,17 @@ PRIVATE>
 : (simd-vall?)             ( a   rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
 : (simd-vnone?)            ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero?     ;
 : (simd-v>float)           ( a   rep -- c )
-    [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) ;
+    [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
 : (simd-v>integer)         ( a   rep -- c )
-    [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) ;
+    [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
 : (simd-vpack-signed)      ( a b rep -- c )
     [ 2>rep-array cord-append ]
     [ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
-    '[ _ c-type-clamp ] swap map-as ;
+    '[ _ c-type-clamp ] swap map-as underlying>> ;
 : (simd-vpack-unsigned)    ( a b rep -- c )
     [ 2>rep-array cord-append ]
     [ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
-    '[ _ c-type-clamp ] swap map-as ;
+    '[ _ c-type-clamp ] swap map-as underlying>> ;
 : (simd-vunpack-head)      ( a   rep -- c ) 
     [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
     [ head-slice ] dip call( a' -- c' ) underlying>> ;
@@ -216,7 +219,8 @@ PRIVATE>
     [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
     [ tail-slice ] dip call( a' -- c' ) underlying>> ;
 : (simd-with)              (   n rep -- v )
-    [ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as ;
+    [ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as 
+    underlying>> ;
 : (simd-gather-2)          ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
 : (simd-gather-4)          ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
 : (simd-select)            ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor
index 1fb947921c..b590589345 100644
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -5,7 +5,8 @@ math.vectors.simd.private prettyprint random sequences system
 tools.test vocabs assocs compiler.cfg.debugger words
 locals combinators cpu.architecture namespaces byte-arrays alien
 specialized-arrays classes.struct eval classes.algebra sets
-quotations math.constants compiler.units ;
+quotations math.constants compiler.units splitting ;
+FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ;
 QUALIFIED-WITH: alien.c-types c
 SPECIALIZED-ARRAY: c:float
 IN: math.vectors.simd.tests
@@ -261,8 +262,8 @@ simd-classes&reps [
 
 : check-boolean-ops ( class elt-class compare-quot -- seq )
     [
-        [ boolean-ops [ dup word-schema ] { } map>assoc ] 2dip
-        '[ first2 inputs _ _ check-boolean-op ]
+        [ boolean-ops [ dup vector-words at ] { } map>assoc ] 2dip
+        '[ first2 vector-word-inputs _ _ check-boolean-op ]
     ] dip check-optimizer ; inline
 
 simd-classes&reps [
@@ -558,7 +559,7 @@ STRUCT: simd-struct
 [ ] [ char-16 new 1array stack. ] unit-test
 
 ! CSSA bug
-[ 8000000 ] [
+[ 4000000 ] [
     int-4{ 1000 1000 1000 1000 }
     [ { int-4 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
 ] unit-test
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 5289f3f393..bde69b5dbd 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -49,6 +49,9 @@ TUPLE: simd-128
 GENERIC: simd-element-type ( obj -- c-type )
 GENERIC: simd-rep ( simd -- rep )
 
+M: object simd-element-type drop f ;
+M: object simd-rep drop f ;
+
 <<
 <PRIVATE
 
@@ -62,9 +65,6 @@ DEFER: simd-construct-op
     [ 3dup [ drop [ simd-128? ] both? ] [ '[ simd-rep _ eq? ] both? ] 3bi and ]
     2dip if ; inline
 
-: simd-construct-op ( exemplar quot: ( rep -- v ) -- v )
-    [ dup simd-rep ] dip curry make-underlying ; inline
-
 : simd-unbox ( a -- a (a) )
     [ ] [ underlying>> ] bi ; inline
 
@@ -74,6 +74,9 @@ DEFER: simd-construct-op
 : vn->v-op ( a n rep quot: ( (a) n rep -- (c) ) fallback-quot -- c )
     drop [ simd-unbox ] 3dip 3curry make-underlying ; inline
 
+: vn->n-op ( a n rep quot: ( (a) n rep -- n ) fallback-quot -- n )
+    drop [ underlying>> ] 3dip call ; inline
+
 : v->n-op ( a rep quot: ( (a) rep -- n ) fallback-quot -- n )
     drop [ underlying>> ] 2dip call ; inline
 
@@ -95,9 +98,6 @@ DEFER: simd-construct-op
 PRIVATE>
 >>
 
-DEFER: simd-with
-DEFER: simd-cast
-
 <<
 <PRIVATE
 
@@ -113,8 +113,9 @@ A-with DEFINES       ${T}-with
 A-cast DEFINES       ${T}-cast
 A{     DEFINES       ${T}{
 
-ELT   [ A-rep rep-component-type ]
-N     [ A-rep rep-length ]
+ELT     [ A-rep rep-component-type ]
+N       [ A-rep rep-length ]
+COERCER [ ELT c-type-class "coercer" word-prop [ ] or ]
 
 SET-NTH [ ELT dup c:c-setter c:array-accessor ]
 
@@ -136,8 +137,8 @@ M: A set-nth-unsafe
 
 M: A like drop dup \ A instance? [ >A ] unless ; inline
 
-: A-with ( n -- v ) \ A new simd-with ; inline
-: A-cast ( v -- v' ) \ A new simd-cast ; inline
+: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
+: A-cast ( v -- v' ) underlying>> \ A boa ; inline
 
 ! SIMD vectors as sequences
 
@@ -145,24 +146,7 @@ M: A hashcode* underlying>> hashcode* ; inline
 M: A clone [ clone ] change-underlying ; inline
 M: A length drop N ; inline
 M: A nth-unsafe
-    swap {
-        {  0 [  0 \ A-rep (simd-select) ] }
-        {  1 [  1 \ A-rep (simd-select) ] }
-        {  2 [  2 \ A-rep (simd-select) ] }
-        {  3 [  3 \ A-rep (simd-select) ] }
-        {  4 [  4 \ A-rep (simd-select) ] }
-        {  5 [  5 \ A-rep (simd-select) ] }
-        {  6 [  6 \ A-rep (simd-select) ] }
-        {  7 [  7 \ A-rep (simd-select) ] }
-        {  8 [  8 \ A-rep (simd-select) ] }
-        {  9 [  9 \ A-rep (simd-select) ] }
-        { 10 [ 10 \ A-rep (simd-select) ] }
-        { 11 [ 11 \ A-rep (simd-select) ] }
-        { 12 [ 12 \ A-rep (simd-select) ] }
-        { 13 [ 13 \ A-rep (simd-select) ] }
-        { 14 [ 14 \ A-rep (simd-select) ] }
-        { 15 [ 15 \ A-rep (simd-select) ] }
-    } case ; inline 
+    swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
 M: A c:byte-length drop 16 ; inline
 
 M: A new-sequence
@@ -171,7 +155,7 @@ M: A new-sequence
     [ length bad-simd-length ] if ; inline
 
 M: A equal?
-    \ A [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
+    \ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
 
 ! SIMD primitive operations
 
@@ -205,7 +189,7 @@ M: A vrshift           \ A-rep [ (simd-vrshift)           ] [ call-next-method ]
 M: A hlshift           \ A-rep [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
 M: A hrshift           \ A-rep [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
 M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-bytes    \ A-rep [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv->v-op ; inline
+M: A vshuffle-bytes    \ A-rep [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv'->v-op ; inline
 M: A (vmerge-head)     \ A-rep [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
 M: A (vmerge-tail)     \ A-rep [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
 M: A v<=               \ A-rep [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
@@ -220,15 +204,15 @@ M: A vnone?            \ A-rep [ (simd-vnone?)            ] [ call-next-method ]
 
 ! SIMD high-level specializations
 
-M: A vbroadcast [ swap nth ] keep simd-with ; inline
-M: A n+v [ simd-with ] keep v+ ; inline
-M: A n-v [ simd-with ] keep v- ; inline
-M: A n*v [ simd-with ] keep v* ; inline
-M: A n/v [ simd-with ] keep v/ ; inline
-M: A v+n over simd-with v+ ; inline
-M: A v-n over simd-with v- ; inline
-M: A v*n over simd-with v* ; inline
-M: A v/n over simd-with v/ ; inline
+M: A vbroadcast swap nth A-with ; inline
+M: A n+v [ A-with ] dip v+ ; inline
+M: A n-v [ A-with ] dip v- ; inline
+M: A n*v [ A-with ] dip v* ; inline
+M: A n/v [ A-with ] dip v/ ; inline
+M: A v+n A-with v+ ; inline
+M: A v-n A-with v- ; inline
+M: A v*n A-with v* ; inline
+M: A v/n A-with v/ ; inline
 M: A norm-sq dup v. assert-positive ; inline
 M: A norm      norm-sq sqrt ; inline
 M: A distance  v- norm ; inline
@@ -236,11 +220,13 @@ M: A distance  v- norm ; inline
 ! M: simd-128 >pprint-sequence ;
 ! M: simd-128 pprint* pprint-object ;
 
-\ A-boa \ A new N {
-    { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] }
-    { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] }
-    [ swap '[ _ _ nsequence ] ]
-} case BOA-EFFECT define-inline
+\ A-boa
+[ COERCER N napply ] N {
+    { 2 [ [ A-rep (simd-gather-2) A boa ] ] }
+    { 4 [ [ A-rep (simd-gather-4) A boa ] ] }
+    [ \ A new '[ _ _ nsequence ] ]
+} case compose
+BOA-EFFECT define-inline
 
 M: A pprint-delims drop \ A{ \ } ;
 SYNTAX: A{ \ } [ >A ] parse-literal ;
@@ -248,7 +234,7 @@ SYNTAX: A{ \ } [ >A ] parse-literal ;
 c:<c-type>
     byte-array >>class
     A >>boxed-class
-    [ A-rep alien-vector \ A boa ] >>getter
+    [ A-rep alien-vector A boa ] >>getter
     [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter
     16 >>size
     16 >>align
@@ -266,21 +252,6 @@ PRIVATE>
 
 INSTANCE: simd-128 sequence
 
-! SIMD constructors
-
-: simd-with ( n seq -- v )
-    [ (simd-with) ] simd-construct-op ; inline
-
-MACRO: simd-boa ( class -- )
-    new dup length {
-        { 2 [ '[ _ [ (simd-gather-2) ] simd-construct-op ] ] }
-        { 4 [ '[ _ [ (simd-gather-4) ] simd-construct-op ] ] }
-        [ swap '[ _ _ nsequence ] ]
-    } case ;
-
-: simd-cast ( v seq -- v' )
-    [ underlying>> ] dip new-underlying ; inline
-
 ! SIMD instances
 
 SIMD-128: char-16
diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor
index d524ba309f..c0b129e6d2 100644
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@@ -108,10 +108,6 @@ M: object vshuffle-elements
     swap [ '[ _ nth ] ] keep map-as ;
 
 GENERIC# vshuffle-bytes 1 ( u perm -- v )
-M: object vshuffle-bytes
-    underlying>> [
-        swap [ '[ 15 bitand _ nth ] ] keep map-as
-    ] curry change-underlying ;
 
 GENERIC: vshuffle ( u perm -- v )
 M: array vshuffle ( u perm -- v )
@@ -123,9 +119,7 @@ GENERIC# vrshift 1 ( u n -- w )
 M: object vrshift neg '[ _ shift ] map ;
 
 GENERIC# hlshift 1 ( u n -- w )
-M: object hlshift '[ _ <byte-array> prepend 16 head ] change-underlying ;
 GENERIC# hrshift 1 ( u n -- w )
-M: object hrshift '[ _ <byte-array> append 16 tail* ] change-underlying ;
 
 GENERIC: (vmerge-head) ( u v -- h )
 M: object (vmerge-head) over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;

From d344023b1c3f4b20bbe7d318d68f591b893c7d9e Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 20:53:40 -0800
Subject: [PATCH 32/46] update math.vectors.conversion

---
 .../vectors/conversion/backend/backend.factor | 21 ---------
 .../conversion/conversion-tests.factor        | 46 +++++--------------
 .../math/vectors/conversion/conversion.factor | 30 ++++++++----
 .../vectors/simd/intrinsics/intrinsics.factor |  8 ++--
 basis/math/vectors/simd/simd.factor           |  4 +-
 5 files changed, 37 insertions(+), 72 deletions(-)
 delete mode 100644 basis/math/vectors/conversion/backend/backend.factor

diff --git a/basis/math/vectors/conversion/backend/backend.factor b/basis/math/vectors/conversion/backend/backend.factor
deleted file mode 100644
index d47fab1b0e..0000000000
--- a/basis/math/vectors/conversion/backend/backend.factor
+++ /dev/null
@@ -1,21 +0,0 @@
-! (c)Joe Groff bsd license
-USING: accessors alien.c-types arrays assocs classes combinators
-cords fry kernel math math.vectors sequences ;
-IN: math.vectors.conversion.backend
-
-: saturate-map-as ( v quot result -- w )
-    [ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline
-
-: (v>float) ( i to-type -- f )
-    [ >float ] swap new map-as ;
-: (v>integer) ( f to-type -- i )
-    [ >integer ] swap new map-as ;
-: (vpack-signed) ( a b to-type -- ab )
-    [ cord-append [ ] ] dip new saturate-map-as ;
-: (vpack-unsigned) ( a b to-type -- ab )
-    [ cord-append [ ] ] dip new saturate-map-as ;
-: (vunpack-head) ( ab to-type -- a )
-    [ dup length 2 /i head-slice ] dip new like ;
-: (vunpack-tail) ( ab to-type -- b )
-    [ dup length 2 /i tail-slice ] dip new like ;
-
diff --git a/basis/math/vectors/conversion/conversion-tests.factor b/basis/math/vectors/conversion/conversion-tests.factor
index 0f48b47756..c91bdb369e 100644
--- a/basis/math/vectors/conversion/conversion-tests.factor
+++ b/basis/math/vectors/conversion/conversion-tests.factor
@@ -3,16 +3,6 @@ USING: accessors arrays compiler continuations generalizations
 kernel kernel.private locals math.vectors.conversion math.vectors.simd
 sequences stack-checker tools.test ;
 FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
-SIMD: uchar
-SIMD: char
-SIMD: ushort
-SIMD: short
-SIMD: uint
-SIMD: int
-SIMD: ulonglong
-SIMD: longlong
-SIMD: float
-SIMD: double
 IN: math.vectors.conversion.tests
 
 ERROR: optimized-vconvert-inconsistent
@@ -59,12 +49,12 @@ MACRO:: test-vconvert ( from-type to-type -- )
 [ double-2{ -5.0 1.0 } ]
 [ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test
 
-[ longlong-4{ -5 1 2 6 } ]
-[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test
+[ longlong-2{ -5 1 } ]
+[ double-2{ -5.0 1.0 } double-2 longlong-2 test-vconvert ] unit-test
 
 ! TODO we should be able to do double->int pack
-! [ int-8{ -5 1 2 6 12 34 -56 78 } ]
-[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ]
+! [ int-4{ -5 1 12 34 } ]
+[ double-2{ -5.0 1.0 } double-2{ 12.0 34.0 } double-2 int-4 test-vconvert ]
 [ error>> bad-vconvert? ] must-fail-with
 
 [ float-4{ -1.25 2.0 3.0 -4.0 } ]
@@ -76,10 +66,10 @@ MACRO:: test-vconvert ( from-type to-type -- )
 [ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
 [ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test
 
-[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ]
+[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
 [
-    int-8{ -1 2 3 -40000 3 2 1 0 }
-    int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert
+    int-4{ -1 2 3 -40000 }
+    int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert
 ] unit-test
 
 [ ushort-8{ 0 2 3 0 5 60000 0 65535 } ]
@@ -97,15 +87,6 @@ MACRO:: test-vconvert ( from-type to-type -- )
     uchar-16 ushort-8 test-vconvert
 ] unit-test
 
-! TODO we should be able to do 256->128 pack
-! [ float-4{ -1.25 2.0 3.0 -4.0 } ]
-[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ]
-[ error>> bad-vconvert? ] must-fail-with
-
-! [ int-4{ -1 2 3 -4 } ]
-[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ]
-[ error>> bad-vconvert? ] must-fail-with
-
 [ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ]
 [ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test
 
@@ -121,8 +102,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
 [ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ]
 [ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test
 
-[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
-[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
+[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
+[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
 
 [ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ]
 [ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test
@@ -130,13 +111,8 @@ MACRO:: test-vconvert ( from-type to-type -- )
 [ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ]
 [ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test
 
-[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
-[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
-
-! TODO we should be able to do 128->256 unpack
-! [ longlong-4{ 1 2 3 4 } ]
-[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ]
-[ error>> bad-vconvert? ] must-fail-with
+[ longlong-2{ 1 2 } longlong-2{ 3 4 } ]
+[ uint-4{ 1 2 3 4 } uint-4 longlong-2 test-vconvert ] unit-test
 
 ! TODO we should be able to do multi-tier pack/unpack
 ! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ]
diff --git a/basis/math/vectors/conversion/conversion.factor b/basis/math/vectors/conversion/conversion.factor
index fd58b11dc8..50bb9c8726 100644
--- a/basis/math/vectors/conversion/conversion.factor
+++ b/basis/math/vectors/conversion/conversion.factor
@@ -1,7 +1,7 @@
 ! (c)Joe Groff bsd license
 USING: accessors alien.c-types arrays assocs classes combinators
 combinators.short-circuit cords fry kernel locals math
-math.vectors math.vectors.conversion.backend sequences ;
+math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ;
 FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
 IN: math.vectors.conversion
 
@@ -30,11 +30,11 @@ ERROR: bad-vconvert-input value expected-type ;
         }
         {
             [ from-element float-type? ]
-            [ [ to-type (v>integer) ] ]
+            [ from-type new simd-rep '[ underlying>> _ (simd-v>integer) to-type boa ] ]
         }
         {
             [ to-element   float-type? ]
-            [ [ to-type (v>float)   ] ]
+            [ from-type new simd-rep '[ underlying>> _ (simd-v>float)   to-type boa ] ]
         }
     } cond
     [ from-type check-vconvert-type ] prepose ;
@@ -47,10 +47,18 @@ ERROR: bad-vconvert-input value expected-type ;
     } 0|| [ from-type to-type bad-vconvert ] when ;
 
 :: [[vpack-unsigned]] ( from-type to-type -- quot )
-    [ [ from-type check-vconvert-type ] bi@ to-type (vpack-unsigned) ] ;
+    from-type new simd-rep
+    '[
+        [ from-type check-vconvert-type underlying>> ] bi@
+        _ (simd-vpack-unsigned) to-type boa
+    ] ;
 
 :: [[vpack-signed]] ( from-type to-type -- quot )
-    [ [ from-type check-vconvert-type ] bi@ to-type (vpack-signed) ] ;
+    from-type new simd-rep
+    '[
+        [ from-type check-vconvert-type underlying>> ] bi@
+        _ (simd-vpack-signed)   to-type boa
+    ] ;
 
 :: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot )
     from-size to-size /i log2 :> steps
@@ -68,9 +76,11 @@ ERROR: bad-vconvert-input value expected-type ;
     } 0|| [ from-type to-type bad-vconvert ] when ;
 
 :: [[vunpack]] ( from-type to-type -- quot )
-    [
-        from-type check-vconvert-type
-        [ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi
+    from-type new simd-rep
+    '[
+        from-type check-vconvert-type underlying>> _
+        [ (simd-vunpack-head) to-type boa ]
+        [ (simd-vunpack-tail) to-type boa ] 2bi
     ] ;
 
 :: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot )
@@ -81,8 +91,8 @@ ERROR: bad-vconvert-input value expected-type ;
 PRIVATE>
 
 MACRO:: vconvert ( from-type to-type -- )
-    from-type new [ element-type ] [ byte-length ] bi :> ( from-element from-length )
-    to-type   new [ element-type ] [ byte-length ] bi :> ( to-element   to-length   )
+    from-type new [ simd-element-type ] [ byte-length ] bi :> ( from-element from-length )
+    to-type   new [ simd-element-type ] [ byte-length ] bi :> ( to-element   to-length   )
     from-element heap-size :> from-size
     to-element   heap-size :> to-size   
 
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
index 187c6db586..d2e0305fa3 100644
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -201,16 +201,16 @@ PRIVATE>
 : (simd-vall?)             ( a   rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
 : (simd-vnone?)            ( a   rep -- ? ) [ bitor  ] bitwise-components-reduce zero?     ;
 : (simd-v>float)           ( a   rep -- c )
-    [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
+    [ >rep-array [ >float ] ] [ >float-vector-rep <rep-array> ] bi map-as underlying>> ;
 : (simd-v>integer)         ( a   rep -- c )
-    [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
+    [ >rep-array [ >integer ] ] [ >int-vector-rep <rep-array> ] bi map-as underlying>> ;
 : (simd-vpack-signed)      ( a b rep -- c )
     [ 2>rep-array cord-append ]
-    [ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
+    [ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
     '[ _ c-type-clamp ] swap map-as underlying>> ;
 : (simd-vpack-unsigned)    ( a b rep -- c )
     [ 2>rep-array cord-append ]
-    [ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
+    [ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
     '[ _ c-type-clamp ] swap map-as underlying>> ;
 : (simd-vunpack-head)      ( a   rep -- c ) 
     [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index bde69b5dbd..e89edd3de3 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -217,8 +217,8 @@ M: A norm-sq dup v. assert-positive ; inline
 M: A norm      norm-sq sqrt ; inline
 M: A distance  v- norm ; inline
 
-! M: simd-128 >pprint-sequence ;
-! M: simd-128 pprint* pprint-object ;
+M: A >pprint-sequence ;
+M: A pprint* pprint-object ;
 
 \ A-boa
 [ COERCER N napply ] N {

From 3f5b3c2c1bfa9b6b9394a5d53a11984895f988f4 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 22:20:43 -0800
Subject: [PATCH 33/46] fix math.vectors.conversion help-lint

---
 .../vectors/conversion/conversion-docs.factor     | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/basis/math/vectors/conversion/conversion-docs.factor b/basis/math/vectors/conversion/conversion-docs.factor
index 9fe5ac4c17..7f2a349c52 100644
--- a/basis/math/vectors/conversion/conversion-docs.factor
+++ b/basis/math/vectors/conversion/conversion-docs.factor
@@ -22,7 +22,7 @@ HELP: vconvert
 }
 { $description "Converts SIMD vectors of " { $snippet "from-type" } " to " { $snippet "to-type" } ". The number of inputs and outputs depends on the relationship of the two types:"
 { $list
-{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-8" } " to " { $snippet "float-8" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
+{ "If " { $snippet "to-type" } " is a floating-point vector type with the same byte length and element count as the integer vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "float-4" } " or from " { $snippet "longlong-2" } " to " { $snippet "double-2" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and converts its elements to floating-point, outputting one vector of " { $snippet "to-type" } "." }
 { "Likewise, if " { $snippet "to-type" } " is an integer vector type with the same byte length and element count as the floating-point vector type " { $snippet "from-type" } ", " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and truncates its elements to integers, outputting one vector of " { $snippet "to-type" } "." }
 { "If " { $snippet "to-type" } " is a vector type with the same byte length as and twice the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "int-4" } " to " { $snippet "ushort-8" } ", from " { $snippet "double-2" } " to " { $snippet "float-4" } ", or from " { $snippet "short-8" } " to " { $snippet "char-16" } "), " { $snippet "vconvert" } " takes two vectors of " { $snippet "from-type" } " and packs them into one vector of " { $snippet "to-type" } ", saturating values too large or small to be representable as elements of " { $snippet "to-type" } "." }
 { "If " { $snippet "to-type" } " is a vector type with the same byte length as and half the element count of the vector type " { $snippet "from-type" } " (for example, from " { $snippet "ushort-8" } " to " { $snippet "int-4" } ", from " { $snippet "float-4" } " to " { $snippet "double-2" } ", or from " { $snippet "char-16" } " to " { $snippet "short-8" } "), " { $snippet "vconvert" } " takes one vector of " { $snippet "from-type" } " and unpacks it into two vectors of " { $snippet "to-type" } "." }
@@ -39,26 +39,23 @@ HELP: vconvert
 "Conversion between integer and float vectors:"
 { $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
 prettyprint ;
-SIMDS: int float longlong double ;
 
-int-8{ 0 1 2 3 4 5 6 7 } int-8 float-8 vconvert .
+int-4{ 0 1 2 3 } int-4 float-4 vconvert .
 double-2{ 1.25 3.75 } double-2 longlong-2 vconvert ."""
-"""float-8{ 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 }
+"""float-4{ 0.0 1.0 2.0 3.0 }
 longlong-2{ 1 3 }""" }
 "Packing conversions:"
 { $example """USING: alien.c-types math.vectors.conversion math.vectors.simd
 prettyprint ;
-SIMDS: ushort int float double ;
 
 int-4{ -8 70000 6000 50 } int-4{ 4 3 2 -1 } int-4 ushort-8 vconvert .
-double-4{ 0.0 1.5 1.0e100 2.0 }
-double-4{ -1.0e100 0.0 1.0 2.0 } double-4 float-8 vconvert ."""
+double-2{ 0.0 1.0e100 }
+double-2{ -1.0e100 0.0 } double-2 float-4 vconvert ."""
 """ushort-8{ 0 65535 6000 50 4 3 2 0 }
-float-8{ 0.0 1.5 1/0. 2.0 -1/0. 0.0 1.0 2.0 }""" }
+float-4{ 0.0 1/0. -1/0. 0.0 }""" }
 "Unpacking conversions:"
 { $example """USING: alien.c-types kernel math.vectors.conversion
 math.vectors.simd prettyprint ;
-SIMDS: uchar short ;
 
 uchar-16{ 8 70 60 50 4 30 200 1 9 10 110 102 133 143 115 0 }
 uchar-16 short-8 vconvert [ . ] bi@"""

From 9128f1f160e4098e19dfeeeac1c03f0125d57c58 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 22:23:22 -0800
Subject: [PATCH 34/46] remove mentions of 256-bit vectors from
 math.vectors.simd

---
 basis/math/vectors/simd/simd-docs.factor | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index 8aeea4267d..98a7b9273c 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -19,9 +19,9 @@ $nl
 ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
 "At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type."
 $nl
-"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
+"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } ")."
 $nl
-"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
+"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } ") and integer SIMD (all types). Integer SIMD is missing a few features; in particular, the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
 $nl
 "SSE3 introduces horizontal adds (summing all components of a single vector register), which are useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
 $nl
@@ -40,24 +40,14 @@ $nl
 { $code
     "char-16"
     "uchar-16"
-    "char-32"
-    "uchar-32"
     "short-8"
     "ushort-8"
-    "short-16"
-    "ushort-16"
     "int-4"
     "uint-4"
-    "int-8"
-    "uint-8"
     "longlong-2"
     "ulonglong-2"
-    "longlong-4"
-    "ulonglong-4"
     "float-4"
-    "float-8"
     "double-2"
-    "double-4"
 } ;
 
 ARTICLE: "math.vectors.simd.words" "SIMD vector words"
@@ -88,8 +78,8 @@ SIMD: double
 SYMBOLS: x y ;
 
 [
-    double-4{ 1.5 2.0 3.7 0.4 } x set
-    double-4{ 1.5 2.0 3.7 0.4 } y set
+    float-4{ 1.5 2.0 3.7 0.4 } x set
+    float-4{ 1.5 2.0 3.7 0.4 } y set
     x get y get v+
 ] optimizer-report.""" }
 "The following word benefits from SIMD optimization, because it begins with an unsafe declaration:"
@@ -183,7 +173,7 @@ $nl
 ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
 "No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
 $nl
-"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
+"In particular, horizontal operations on " { $snippet "float-4" } " vectors are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
 
 ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
 "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."

From 50f7dff4228860f041e372fc61ebef9c6b97052d Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 22:44:12 -0800
Subject: [PATCH 35/46] change name of 'unsign-rep' to more sensible
 'signed-rep'

---
 basis/compiler/cfg/intrinsics/simd/simd.factor |  8 ++++----
 basis/cpu/architecture/architecture.factor     |  2 +-
 basis/cpu/x86/x86.factor                       | 18 +++++++++---------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 109ac6ce8e..845902c2e6 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -20,7 +20,7 @@ IN: compiler.cfg.intrinsics.simd
 ! compound vector ops
 
 : sign-bit-mask ( rep -- byte-array )
-    unsign-rep {
+    signed-rep {
         { char-16-rep [ uchar-array{
             HEX: 80 HEX: 80 HEX: 80 HEX: 80
             HEX: 80 HEX: 80 HEX: 80 HEX: 80
@@ -48,7 +48,7 @@ IN: compiler.cfg.intrinsics.simd
     } case ;
 
 : ^load-add-sub-vector ( rep -- dst )
-    unsign-rep {
+    signed-rep {
         { float-4-rep    [ float-array{ -0.0  0.0 -0.0  0.0 } underlying>> ^^load-constant ] }
         { double-2-rep   [ double-array{ -0.0  0.0 } underlying>> ^^load-constant ] }
         { char-16-rep    [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
@@ -115,7 +115,7 @@ IN: compiler.cfg.intrinsics.simd
             rep sign-bit-mask ^^load-constant :> sign-bits
             src1 sign-bits rep ^^xor-vector
             src2 sign-bits rep ^^xor-vector
-            rep unsign-rep cc ^(compare-vector)
+            rep signed-rep cc ^(compare-vector)
         ] }
     } vv-cc-vector-op ;
 
@@ -247,7 +247,7 @@ IN: compiler.cfg.intrinsics.simd
     ] [ ^^vector>scalar ] bi ;
 
 : ^sum-vector ( src rep -- dst )
-    unsign-rep {
+    signed-rep {
         { float-vector-rep [ ^(sum-vector) ] }
         { int-vector-rep [| src rep |
             src rep ^unpack-vector-head :> head
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 6631cec189..9158379f70 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -95,7 +95,7 @@ double-rep
 vector-rep
 scalar-rep ;
 
-: unsign-rep ( rep -- rep' )
+: signed-rep ( rep -- rep' )
     {
         { uint-4-rep           int-4-rep }
         { ulonglong-2-rep      longlong-2-rep }
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index b752935f9c..b1735b88f2 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -650,7 +650,7 @@ M: x86 %fill-vector-reps
     } available-reps ;
 
 ! M:: x86 %broadcast-vector ( dst src rep -- )
-!     rep unsign-rep {
+!     rep signed-rep {
 !         { float-4-rep [
 !             dst src float-4-rep %copy
 !             dst dst { 0 0 0 0 } SHUFPS
@@ -687,7 +687,7 @@ M: x86 %fill-vector-reps
 !     } available-reps ;
 
 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
-    rep unsign-rep {
+    rep signed-rep {
         { float-4-rep [
             dst src1 float-4-rep %copy
             dst src2 UNPCKLPS
@@ -710,7 +710,7 @@ M: x86 %gather-vector-4-reps
     } available-reps ;
 
 M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
-    rep unsign-rep {
+    rep signed-rep {
         { double-2-rep [
             dst src1 double-2-rep %copy
             dst src2 MOVLHPS
@@ -763,7 +763,7 @@ M: x86 %gather-vector-2-reps
 
 M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
     dst src rep %copy
-    dst shuffle rep unsign-rep {
+    dst shuffle rep signed-rep {
         { double-2-rep [ >float-4-shuffle float-4-shuffle ] }
         { float-4-rep [ float-4-shuffle ] }
         { int-4-rep [ int-4-shuffle ] }
@@ -786,7 +786,7 @@ M: x86 %shuffle-vector-reps
 
 M: x86 %merge-vector-head
     [ two-operand ] keep
-    unsign-rep {
+    signed-rep {
         { double-2-rep   [ MOVLHPS ] }
         { float-4-rep    [ UNPCKLPS ] }
         { longlong-2-rep [ PUNPCKLQDQ ] }
@@ -797,7 +797,7 @@ M: x86 %merge-vector-head
 
 M: x86 %merge-vector-tail
     [ two-operand ] keep
-    unsign-rep {
+    signed-rep {
         { double-2-rep   [ UNPCKHPD ] }
         { float-4-rep    [ UNPCKHPS ] }
         { longlong-2-rep [ PUNPCKHQDQ ] }
@@ -826,7 +826,7 @@ M: x86 %signed-pack-vector-reps
 
 M: x86 %unsigned-pack-vector
     [ two-operand ] keep
-    unsign-rep {
+    signed-rep {
         { int-4-rep   [ PACKUSDW ] }
         { short-8-rep [ PACKUSWB ] }
     } case ;
@@ -896,7 +896,7 @@ M: x86 %float>integer-vector-reps
     } case ;
 
 :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
-    rep unsign-rep :> rep'
+    rep signed-rep :> rep'
     dst src rep' {
         { longlong-2-rep [ int64 call ] }
         { int-4-rep      [ int32 call ] }
@@ -1173,7 +1173,7 @@ M: x86 %dot-vector-reps
 
 M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
     [ two-operand ] keep
-    unsign-rep {
+    signed-rep {
         { float-4-rep  [ HADDPS ] }
         { double-2-rep [ HADDPD ] }
         { int-4-rep    [ PHADDD ] }

From 46f0aa64215131739b08c64ac3aa88f844a6c6a2 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 22:56:42 -0800
Subject: [PATCH 36/46] move cords to sequences.cords

---
 basis/math/vectors/conversion/conversion.factor      | 6 ++++--
 basis/math/vectors/simd/intrinsics/intrinsics.factor | 6 +++---
 basis/{ => sequences}/cords/authors.txt              | 0
 basis/{ => sequences}/cords/cords-tests.factor       | 4 ++--
 basis/{ => sequences}/cords/cords.factor             | 2 +-
 basis/{ => sequences}/cords/summary.txt              | 0
 basis/{ => sequences}/cords/tags.txt                 | 0
 7 files changed, 10 insertions(+), 8 deletions(-)
 rename basis/{ => sequences}/cords/authors.txt (100%)
 rename basis/{ => sequences}/cords/cords-tests.factor (62%)
 rename basis/{ => sequences}/cords/cords.factor (98%)
 rename basis/{ => sequences}/cords/summary.txt (100%)
 rename basis/{ => sequences}/cords/tags.txt (100%)

diff --git a/basis/math/vectors/conversion/conversion.factor b/basis/math/vectors/conversion/conversion.factor
index 50bb9c8726..6148962ee0 100644
--- a/basis/math/vectors/conversion/conversion.factor
+++ b/basis/math/vectors/conversion/conversion.factor
@@ -1,8 +1,10 @@
 ! (c)Joe Groff bsd license
 USING: accessors alien.c-types arrays assocs classes combinators
-combinators.short-circuit cords fry kernel locals math
+combinators.short-circuit fry kernel locals math
 math.vectors math.vectors.simd math.vectors.simd.intrinsics sequences ;
-FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
+FROM: alien.c-types =>
+    char uchar short ushort int uint longlong ulonglong
+    float double ;
 IN: math.vectors.conversion
 
 ERROR: bad-vconvert from-type to-type ;
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
index d2e0305fa3..30db6d5e13 100644
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -1,8 +1,8 @@
 ! (c)2009 Slava Pestov, Joe Groff bsd license
 USING: accessors alien alien.c-types alien.data combinators
-cords cpu.architecture fry generalizations kernel libc locals
-math math.libm math.order math.ranges math.vectors sequences
-sequences.private specialized-arrays vocabs.loader ;
+sequences.cords cpu.architecture fry generalizations kernel
+libc locals math math.libm math.order math.ranges math.vectors
+sequences sequences.private specialized-arrays vocabs.loader ;
 QUALIFIED-WITH: alien.c-types c
 SPECIALIZED-ARRAYS:
     c:char c:short c:int c:longlong
diff --git a/basis/cords/authors.txt b/basis/sequences/cords/authors.txt
similarity index 100%
rename from basis/cords/authors.txt
rename to basis/sequences/cords/authors.txt
diff --git a/basis/cords/cords-tests.factor b/basis/sequences/cords/cords-tests.factor
similarity index 62%
rename from basis/cords/cords-tests.factor
rename to basis/sequences/cords/cords-tests.factor
index 898e4e51c8..2999365926 100644
--- a/basis/cords/cords-tests.factor
+++ b/basis/sequences/cords/cords-tests.factor
@@ -1,5 +1,5 @@
-USING: cords strings tools.test kernel sequences ;
-IN: cords.tests
+USING: sequences.cords strings tools.test kernel sequences ;
+IN: sequences.cords.tests
 
 [ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test
 [ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test
diff --git a/basis/cords/cords.factor b/basis/sequences/cords/cords.factor
similarity index 98%
rename from basis/cords/cords.factor
rename to basis/sequences/cords/cords.factor
index ad17da9652..4b88432313 100644
--- a/basis/cords/cords.factor
+++ b/basis/sequences/cords/cords.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs sequences sorting binary-search math
 math.order arrays combinators kernel ;
-IN: cords
+IN: sequences.cords
 
 <PRIVATE
 
diff --git a/basis/cords/summary.txt b/basis/sequences/cords/summary.txt
similarity index 100%
rename from basis/cords/summary.txt
rename to basis/sequences/cords/summary.txt
diff --git a/basis/cords/tags.txt b/basis/sequences/cords/tags.txt
similarity index 100%
rename from basis/cords/tags.txt
rename to basis/sequences/cords/tags.txt

From d95c6eb4c831a2e078aa2dcade26adbd8b8f384b Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Tue, 24 Nov 2009 23:18:01 -0800
Subject: [PATCH 37/46] streamline cords, add a functor for making specialized
 cords

---
 basis/sequences/cords/cords-tests.factor |  1 -
 basis/sequences/cords/cords.factor       | 87 ++++++++----------------
 2 files changed, 29 insertions(+), 59 deletions(-)

diff --git a/basis/sequences/cords/cords-tests.factor b/basis/sequences/cords/cords-tests.factor
index 2999365926..fb9c440733 100644
--- a/basis/sequences/cords/cords-tests.factor
+++ b/basis/sequences/cords/cords-tests.factor
@@ -2,4 +2,3 @@ USING: sequences.cords strings tools.test kernel sequences ;
 IN: sequences.cords.tests
 
 [ "hello world" ] [ "hello" " world" cord-append dup like ] unit-test
-[ "hello world" ] [ { "he" "llo" " world" } cord-concat dup like ] unit-test
diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor
index 4b88432313..f183e4fd2d 100644
--- a/basis/sequences/cords/cords.factor
+++ b/basis/sequences/cords/cords.factor
@@ -1,72 +1,43 @@
 ! Copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs sequences sorting binary-search math
-math.order arrays combinators kernel ;
+math.order arrays classes combinators kernel functors ;
 IN: sequences.cords
 
-<PRIVATE
+MIXIN: cord
 
-TUPLE: simple-cord
-    { first read-only } { second read-only } ;
+TUPLE: generic-cord
+    { head read-only } { tail read-only } ;
+INSTANCE: generic-cord cord
 
-M: simple-cord length
-    [ first>> length ] [ second>> length ] bi + ; inline
+M: cord length
+    [ head>> length ] [ tail>> length ] bi + ; inline
 
-M: simple-cord virtual-exemplar first>> ; inline
+M: cord virtual-exemplar head>> ; inline
 
-M: simple-cord virtual@
-    2dup first>> length <
-    [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline
-
-TUPLE: multi-cord
-    { count read-only } { seqs read-only } ;
-
-M: multi-cord length count>> ; inline
-
-M: multi-cord virtual@
-    dupd
-    seqs>> [ first <=> ] with search nip
-    [ first - ] [ second ] bi ; inline
-
-M: multi-cord virtual-exemplar
-    seqs>> [ f ] [ first second ] if-empty ; inline
-
-: <cord> ( seqs -- cord )
-    dup length 2 = [
-        first2 simple-cord boa
-    ] [
-        [ 0 [ length + ] accumulate ] keep zip multi-cord boa
-    ] if ; inline
-
-PRIVATE>
-
-UNION: cord simple-cord multi-cord ;
+M: cord virtual@
+    2dup head>> length <
+    [ head>> ] [ [ head>> length - ] [ tail>> ] bi ] if ; inline
 
 INSTANCE: cord virtual-sequence
 
-INSTANCE: multi-cord virtual-sequence
+GENERIC: cord-append ( seq1 seq2 -- cord )
 
-: cord-append ( seq1 seq2 -- cord )
-    {
-        { [ over empty? ] [ nip ] }
-        { [ dup empty? ] [ drop ] }
-        { [ 2dup [ cord? ] both? ] [ [ seqs>> values ] bi@ append <cord> ] }
-        { [ over cord? ] [ [ seqs>> values ] dip suffix <cord> ] }
-        { [ dup cord? ] [ seqs>> values swap prefix <cord> ] }
-        [ 2array <cord> ]
-    } cond ; inline
+M: object cord-append
+    generic-cord boa ; inline
 
-: cord-concat ( seqs -- cord )
-    {
-        { [ dup empty? ] [ drop f ] }
-        { [ dup length 1 = ] [ first ] }
-        [
-            [
-                {
-                    { [ dup cord? ] [ seqs>> values ] }
-                    { [ dup empty? ] [ drop { } ] }
-                    [ 1array ]
-                } cond
-            ] map concat <cord>
-        ]
-    } cond ; inline
+FUNCTOR: define-specialized-cord ( T C -- )
+
+T-cord DEFINES-CLASS ${C}
+
+WHERE
+
+TUPLE: T-cord
+    { head T read-only } { tail T read-only } ;
+INSTANCE: T-cord cord
+
+M: T cord-append
+    2dup [ T instance? ] both?
+    [ T-cord boa ] [ generic-cord boa ] if ; inline
+
+;FUNCTOR

From 5f32a6d7265896f497650c80d1afdbdedbc06b3b Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 15:44:57 -0800
Subject: [PATCH 38/46] vector op specializations on cords

---
 basis/sequences/cords/cords.factor | 74 +++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor
index f183e4fd2d..e59858677b 100644
--- a/basis/sequences/cords/cords.factor
+++ b/basis/sequences/cords/cords.factor
@@ -1,7 +1,8 @@
 ! Copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs sequences sorting binary-search math
-math.order arrays classes combinators kernel functors ;
+USING: accessors assocs sequences sorting binary-search fry math
+math.order arrays classes combinators kernel functors math.functions
+math.vectors ;
 IN: sequences.cords
 
 MIXIN: cord
@@ -41,3 +42,72 @@ M: T cord-append
     [ T-cord boa ] [ generic-cord boa ] if ; inline
 
 ;FUNCTOR
+
+: cord-map ( cord quot -- cord' )
+    [ [ head>> ] dip call ]
+    [ [ tail>> ] dip call ] 2bi cord-append ; inline
+
+: cord-2map ( cord cord quot -- cord' )
+    [ [ [ head>> ] bi@ ] dip call ]
+    [ [ [ tail>> ] bi@ ] dip call ] 3bi cord-append ; inline
+
+: cord-both ( cord quot -- h t )
+    [ [ head>> ] [ tail>> ] bi ] dip bi@ ; inline
+
+: cord-2both ( cord cord quot -- h t )
+    [ [ [ head>> ] bi@ ] dip call ]
+    [ [ [ tail>> ] bi@ ] dip call ] 3bi ; inline
+
+M: cord v+                [ v+                ] cord-2map ; inline
+M: cord v-                [ v-                ] cord-2map ; inline
+M: cord vneg              [ vneg              ] cord-map  ; inline
+M: cord v+-               [ v+-               ] cord-2map ; inline
+M: cord vs+               [ vs+               ] cord-2map ; inline
+M: cord vs-               [ vs-               ] cord-2map ; inline
+M: cord vs*               [ vs*               ] cord-2map ; inline
+M: cord v*                [ v*                ] cord-2map ; inline
+M: cord v/                [ v/                ] cord-2map ; inline
+M: cord vmin              [ vmin              ] cord-2map ; inline
+M: cord vmax              [ vmax              ] cord-2map ; inline
+M: cord v.                [ v.                ] cord-2both + ; inline
+M: cord vsqrt             [ vsqrt             ] cord-map  ; inline
+M: cord sum               [ sum               ] cord-both + ; inline
+M: cord vabs              [ vabs              ] cord-map  ; inline
+M: cord vbitand           [ vbitand           ] cord-2map ; inline
+M: cord vbitandn          [ vbitandn          ] cord-2map ; inline
+M: cord vbitor            [ vbitor            ] cord-2map ; inline
+M: cord vbitxor           [ vbitxor           ] cord-2map ; inline
+M: cord vbitnot           [ vbitnot           ] cord-map  ; inline
+M: cord vand              [ vand              ] cord-2map ; inline
+M: cord vandn             [ vandn             ] cord-2map ; inline
+M: cord vor               [ vor               ] cord-2map ; inline
+M: cord vxor              [ vxor              ] cord-2map ; inline
+M: cord vnot              [ vnot              ] cord-map  ; inline
+M: cord vlshift           '[ _ vlshift        ] cord-map  ; inline
+M: cord vrshift           '[ _ vrshift        ] cord-map  ; inline
+M: cord (vmerge-head)     [ head>> ] bi@ (vmerge) cord-append ; inline
+M: cord (vmerge-tail)     [ tail>> ] bi@ (vmerge) cord-append ; inline
+M: cord v<=               [ v<=               ] cord-2map ; inline
+M: cord v<                [ v<                ] cord-2map ; inline
+M: cord v=                [ v=                ] cord-2map ; inline
+M: cord v>                [ v>                ] cord-2map ; inline
+M: cord v>=               [ v>=               ] cord-2map ; inline
+M: cord vunordered?       [ vunordered?       ] cord-2map ; inline
+M: cord vany?             [ vany?             ] cord-both or  ; inline
+M: cord vall?             [ vall?             ] cord-both and ; inline
+M: cord vnone?            [ vnone?            ] cord-both and ; inline
+
+M: cord n+v [ n+v ] with cord-map ; inline
+M: cord n-v [ n-v ] with cord-map ; inline
+M: cord n*v [ n*v ] with cord-map ; inline
+M: cord n/v [ n/v ] with cord-map ; inline
+M: cord v+n '[ _ v+n ] cord-map ; inline
+M: cord v-n '[ _ v-n ] cord-map ; inline
+M: cord v*n '[ _ v*n ] cord-map ; inline
+M: cord v/n '[ _ v/n ] cord-map ; inline
+
+M: cord norm-sq  dup v. ; inline
+M: cord norm     norm-sq sqrt ; inline
+M: cord distance v- norm ; inline
+
+

From 578a0e3fd5c99e96c73d0c1ea78f653e3e2d7dec Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 16:36:34 -0800
Subject: [PATCH 39/46] math.vectors.simd.cords vocab that defines vector cords

---
 basis/math/vectors/simd/cords/cords.factor | 87 ++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 basis/math/vectors/simd/cords/cords.factor

diff --git a/basis/math/vectors/simd/cords/cords.factor b/basis/math/vectors/simd/cords/cords.factor
new file mode 100644
index 0000000000..e099f6e830
--- /dev/null
+++ b/basis/math/vectors/simd/cords/cords.factor
@@ -0,0 +1,87 @@
+USING: accessors alien.c-types arrays byte-arrays
+cpu.architecture effects functors generalizations kernel lexer
+math math.vectors.simd math.vectors.simd.intrinsics parser
+prettyprint.custom quotations sequences sequences.cords words ;
+IN: math.vectors.simd.cords
+
+<<
+<PRIVATE
+
+FUNCTOR: (define-simd-128-cord) ( A/2 A -- )
+
+A-rep    IS            ${A/2}-rep
+>A/2     IS            >${A/2}
+A/2-boa  IS            ${A/2}-boa
+A/2-with IS            ${A/2}-with
+A/2-cast IS            ${A/2}-cast
+
+>A     DEFINES       >${A}
+A-boa  DEFINES       ${A}-boa
+A-with DEFINES       ${A}-with
+A-cast DEFINES       ${A}-cast
+A{     DEFINES       ${A}{
+
+N       [ A-rep rep-length ]
+BOA-EFFECT [ N 2 * "n" <repetition> >array { "v" } <effect> ]
+
+WHERE
+
+: >A ( seq -- A )
+    [ N head >A/2 ]
+    [ N tail >A/2 ] bi cord-append ;
+
+\ A-boa
+{ N ndip A/2-boa cord-append } { A/2-boa } >quotation prefix >quotation
+BOA-EFFECT define-inline
+
+: A-with ( n -- v )
+    [ A/2-with ] [ A/2-with ] bi cord-append ;
+
+: A-cast ( v -- v' )
+    [ A/2-cast ] cord-map ;
+
+M: A >pprint-sequence ;
+M: A pprint* pprint-object ;
+
+M: A pprint-delims drop \ A{ \ } ;
+SYNTAX: A{ \ } [ >A ] parse-literal ;
+
+<c-type>
+    byte-array >>class
+    A >>boxed-class
+    [
+        [      A-rep alien-vector A/2 boa ]
+        [ 16 + A-rep alien-vector A/2 boa ] 2bi cord-append
+    ] >>getter
+    [
+        [ [ head>> underlying>> ] 2dip      A-rep set-alien-vector ]
+        [ [ tail>> underlying>> ] 2dip 16 + A-rep set-alien-vector ] 3bi
+    ] >>setter
+    32 >>size
+    16 >>align
+    A-rep >>rep
+\ A typedef
+
+;FUNCTOR
+
+: define-simd-128-cord ( A/2 T -- )
+    [ define-specialized-cord ]
+    [ create-in (define-simd-128-cord) ] 2bi ;
+
+SYNTAX: SIMD-128-CORD:
+    scan-word scan define-simd-128-cord ;
+
+PRIVATE>
+>>
+
+SIMD-128-CORD: char-16     char-32
+SIMD-128-CORD: uchar-16    uchar-32
+SIMD-128-CORD: short-8     short-16
+SIMD-128-CORD: ushort-8    ushort-16
+SIMD-128-CORD: int-4       int-8
+SIMD-128-CORD: uint-4      uint-8
+SIMD-128-CORD: longlong-2  longlong-4
+SIMD-128-CORD: ulonglong-2 ulonglong-4
+SIMD-128-CORD: float-4     float-8
+SIMD-128-CORD: double-2    double-4
+

From cb8f757c72edce832656238378e6cb2368b8cd35 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 18:02:27 -0800
Subject: [PATCH 40/46] update benchmark.nbody-simd and
 benchmark.raytracer-simd

---
 extra/benchmark/nbody-simd/nbody-simd.factor         | 5 ++---
 extra/benchmark/raytracer-simd/raytracer-simd.factor | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor
index 6648c52639..2797558a4b 100644
--- a/extra/benchmark/nbody-simd/nbody-simd.factor
+++ b/extra/benchmark/nbody-simd/nbody-simd.factor
@@ -2,9 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien.c-types fry kernel locals math
 math.constants math.functions math.vectors math.vectors.simd
-prettyprint combinators.smart sequences hints classes.struct
-specialized-arrays ;
-SIMD: double
+math.vectors.simd.cords prettyprint combinators.smart sequences
+hints classes.struct specialized-arrays ;
 IN: benchmark.nbody-simd
 
 : solar-mass ( -- x ) 4 pi sq * ; inline
diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor
index 5a3c232b5a..45407e5ad2 100644
--- a/extra/benchmark/raytracer-simd/raytracer-simd.factor
+++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor
@@ -3,10 +3,9 @@
 
 USING: arrays accessors io io.files io.files.temp
 io.encodings.binary kernel math math.constants math.functions
-math.vectors math.vectors.simd math.parser make sequences
-sequences.private words hints classes.struct ;
+math.vectors math.vectors.simd math.vectors.simd.cords math.parser
+make sequences sequences.private words hints classes.struct ;
 QUALIFIED-WITH: alien.c-types c
-SIMD: c:double
 IN: benchmark.raytracer-simd
 
 ! parameters

From c916c7c85670f73ac68bb1d75e359d3c17862cee Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 20:06:11 -0800
Subject: [PATCH 41/46] don't try to rewrite redundant test-vector-exprs

---
 .../cfg/value-numbering/rewrite/rewrite.factor         | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
index 746fe0e5ea..0fa0314c3e 100755
--- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
+++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
@@ -42,6 +42,14 @@ M: insn rewrite drop f ;
     ] [ drop f ] if ; inline
 
 : general-compare-expr? ( insn -- ? )
+    {
+        [ compare-expr? ]
+        [ compare-imm-expr? ]
+        [ compare-float-unordered-expr? ]
+        [ compare-float-ordered-expr? ]
+    } 1|| ;
+
+: general-or-vector-compare-expr? ( insn -- ? )
     {
         [ compare-expr? ]
         [ compare-imm-expr? ]
@@ -52,7 +60,7 @@ M: insn rewrite drop f ;
 
 : rewrite-boolean-comparison? ( insn -- ? )
     dup ##branch-t? [
-        src1>> vreg>expr general-compare-expr?
+        src1>> vreg>expr general-or-vector-compare-expr?
     ] [ drop f ] if ; inline
  
 : >compare-expr< ( expr -- in1 in2 cc )

From 4027002db7474eb37633c5e8eccb8eff85876ac0 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 20:21:54 -0800
Subject: [PATCH 42/46] remove SIMD:s

---
 basis/io/mmap/mmap-docs.factor                               | 1 -
 basis/math/bitwise/bitwise-tests.factor                      | 1 -
 basis/math/vectors/simd/simd-docs.factor                     | 4 ----
 extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor     | 1 -
 extra/benchmark/simd-1/simd-1.factor                         | 1 -
 extra/benchmark/terrain-generation/terrain-generation.factor | 1 -
 extra/gpu/demos/bunny/bunny.factor                           | 1 -
 extra/grid-meshes/grid-meshes.factor                         | 1 -
 extra/math/matrices/simd/simd-tests.factor                   | 1 -
 extra/math/matrices/simd/simd.factor                         | 1 -
 extra/terrain/terrain.factor                                 | 1 -
 11 files changed, 14 deletions(-)

diff --git a/basis/io/mmap/mmap-docs.factor b/basis/io/mmap/mmap-docs.factor
index 33ba6850a5..3eabfc4e7f 100644
--- a/basis/io/mmap/mmap-docs.factor
+++ b/basis/io/mmap/mmap-docs.factor
@@ -87,7 +87,6 @@ ARTICLE: "io.mmap.examples" "Memory-mapped file examples"
 "Normalize a file containing packed quadrupes of floats:"
 { $code
     "USING: kernel io.mmap math.vectors math.vectors.simd" "sequences specialized-arrays ;"
-    "SIMD: float"
     "SPECIALIZED-ARRAY: float-4"
     ""
     "\"mydata.dat\" float-4 ["
diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor
index d10e4ccc87..a5919d3ec3 100644
--- a/basis/math/bitwise/bitwise-tests.factor
+++ b/basis/math/bitwise/bitwise-tests.factor
@@ -41,7 +41,6 @@ CONSTANT: b 2
 [ 0 ] [ BIN: 0 bit-count ] unit-test
 [ 1 ] [ BIN: 1 bit-count ] unit-test
 
-SIMD: uint
 SPECIALIZED-ARRAY: uint
 SPECIALIZED-ARRAY: uint-4
 
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index 98a7b9273c..540838bdd5 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -74,7 +74,6 @@ $nl
 { $code
 """USING: compiler.tree.debugger math.vectors
 math.vectors.simd ;
-SIMD: double
 SYMBOLS: x y ;
 
 [
@@ -86,7 +85,6 @@ SYMBOLS: x y ;
 { $code
 """USING: compiler.tree.debugger kernel.private
 math.vectors math.vectors.simd ;
-SIMD: float
 IN: simd-demo
 
 : interpolate ( v a b -- w )
@@ -100,7 +98,6 @@ $nl
 { $code
 """USING: compiler.tree.debugger hints
 math.vectors math.vectors.simd ;
-SIMD: float
 IN: simd-demo
 
 : interpolate ( v a b -- w )
@@ -116,7 +113,6 @@ $nl
 "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
 { $code
 """USING: compiler.tree.debugger math.vectors math.vectors.simd ;
-SIMD: float
 IN: simd-demo
 
 STRUCT: actor
diff --git a/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor b/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor
index 1b57bb902f..563bf4558c 100644
--- a/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor
+++ b/extra/benchmark/3d-matrix-vector/3d-matrix-vector.factor
@@ -1,7 +1,6 @@
 USING: kernel locals math math.matrices.simd math.order math.vectors
 math.vectors.simd prettyprint sequences typed ;
 QUALIFIED-WITH: alien.c-types c
-SIMD: c:float
 IN: benchmark.3d-matrix-vector
 
 : v2min ( xy -- xx )
diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor
index ff0cb98a00..e20b82c3c4 100644
--- a/extra/benchmark/simd-1/simd-1.factor
+++ b/extra/benchmark/simd-1/simd-1.factor
@@ -3,7 +3,6 @@
 USING: kernel io math math.functions math.parser math.vectors
 math.vectors.simd sequences specialized-arrays ;
 QUALIFIED-WITH: alien.c-types c
-SIMD: c:float
 SPECIALIZED-ARRAY: float-4
 IN: benchmark.simd-1
 
diff --git a/extra/benchmark/terrain-generation/terrain-generation.factor b/extra/benchmark/terrain-generation/terrain-generation.factor
index b158dba5dd..41c1152cbd 100644
--- a/extra/benchmark/terrain-generation/terrain-generation.factor
+++ b/extra/benchmark/terrain-generation/terrain-generation.factor
@@ -1,7 +1,6 @@
 ! (c)Joe Groff bsd license
 USING: io kernel math.vectors.simd terrain.generation threads ;
 FROM: alien.c-types => float ;
-SIMD: float
 IN: benchmark.terrain-generation
 
 : terrain-generation-benchmark ( -- )
diff --git a/extra/gpu/demos/bunny/bunny.factor b/extra/gpu/demos/bunny/bunny.factor
index 09853263ce..ea91e226a8 100755
--- a/extra/gpu/demos/bunny/bunny.factor
+++ b/extra/gpu/demos/bunny/bunny.factor
@@ -11,7 +11,6 @@ specialized-vectors ;
 FROM: alien.c-types => float ;
 SPECIALIZED-ARRAY: float
 SPECIALIZED-VECTOR: uint
-SIMD: float
 IN: gpu.demos.bunny
 
 GLSL-SHADER-FILE: bunny-vertex-shader vertex-shader "bunny.v.glsl"
diff --git a/extra/grid-meshes/grid-meshes.factor b/extra/grid-meshes/grid-meshes.factor
index ebde0b2641..47f649868e 100644
--- a/extra/grid-meshes/grid-meshes.factor
+++ b/extra/grid-meshes/grid-meshes.factor
@@ -3,7 +3,6 @@ USING: accessors alien.data.map arrays destructors fry grouping
 kernel math math.ranges math.vectors.simd opengl opengl.gl sequences
 sequences.product specialized-arrays ;
 FROM: alien.c-types => float ;
-SIMD: float
 SPECIALIZED-ARRAY: float-4
 IN: grid-meshes
 
diff --git a/extra/math/matrices/simd/simd-tests.factor b/extra/math/matrices/simd/simd-tests.factor
index 25482c8e1e..b27abcae67 100644
--- a/extra/math/matrices/simd/simd-tests.factor
+++ b/extra/math/matrices/simd/simd-tests.factor
@@ -3,7 +3,6 @@ USING: classes.struct math.matrices.simd math.vectors.simd math
 literals math.constants math.functions specialized-arrays tools.test ;
 QUALIFIED-WITH: alien.c-types c
 FROM: math.matrices => m~ ;
-SIMD: c:float
 SPECIALIZED-ARRAY: float-4
 IN: math.matrices.simd.tests
 
diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor
index 97290964eb..4e1fd0e96c 100644
--- a/extra/math/matrices/simd/simd.factor
+++ b/extra/math/matrices/simd/simd.factor
@@ -4,7 +4,6 @@ math math.combinatorics math.functions math.matrices.simd math.vectors
 math.vectors.simd sequences sequences.private specialized-arrays
 typed ;
 QUALIFIED-WITH: alien.c-types c
-SIMD: c:float
 SPECIALIZED-ARRAY: float-4
 IN: math.matrices.simd
 
diff --git a/extra/terrain/terrain.factor b/extra/terrain/terrain.factor
index f1da877c3e..55d54d3be1 100644
--- a/extra/terrain/terrain.factor
+++ b/extra/terrain/terrain.factor
@@ -11,7 +11,6 @@ math.matrices.simd noise ui.gestures combinators.short-circuit
 destructors grid-meshes math.vectors.simd ;
 QUALIFIED-WITH: alien.c-types c
 SPECIALIZED-ARRAY: c:float
-SIMD: c:float
 IN: terrain
 
 CONSTANT: FOV $[ 2.0 sqrt 1 + ]

From 7da80f65e736b9d3f54e491eb49c55acd289052d Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Wed, 25 Nov 2009 20:24:09 -0800
Subject: [PATCH 43/46] trick math.vectors.simd into making nicer quotations

---
 basis/math/vectors/simd/simd.factor | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index e89edd3de3..7213286e15 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -234,8 +234,8 @@ SYNTAX: A{ \ } [ >A ] parse-literal ;
 c:<c-type>
     byte-array >>class
     A >>boxed-class
-    [ A-rep alien-vector A boa ] >>getter
-    [ [ underlying>> ] 2dip A-rep set-alien-vector ] >>setter
+    { A-rep alien-vector A boa } >quotation >>getter
+    { [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter
     16 >>size
     16 >>align
     A-rep >>rep

From 0795c60b83bf657b4c103a68546961d865e4db64 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 26 Nov 2009 11:15:35 -0800
Subject: [PATCH 44/46] "norm" doesn't need to be generic, "norm-sq sqrt"
 always works

---
 basis/math/vectors/simd/simd.factor | 3 +--
 basis/math/vectors/vectors.factor   | 5 ++---
 basis/sequences/cords/cords.factor  | 3 +--
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index 7213286e15..036ff22f78 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -214,8 +214,7 @@ M: A v-n A-with v- ; inline
 M: A v*n A-with v* ; inline
 M: A v/n A-with v/ ; inline
 M: A norm-sq dup v. assert-positive ; inline
-M: A norm      norm-sq sqrt ; inline
-M: A distance  v- norm ; inline
+M: A distance v- norm ; inline
 
 M: A >pprint-sequence ;
 M: A pprint* pprint-object ;
diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor
index c0b129e6d2..a69a99c64b 100644
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@@ -199,8 +199,7 @@ M: object v. [ conjugate * ] [ + ] 2map-reduce ;
 GENERIC: norm-sq ( v -- x )
 M: object norm-sq [ absq ] [ + ] map-reduce ;
 
-GENERIC: norm ( v -- x )
-M: object norm norm-sq sqrt ;
+: norm ( v -- x ) norm-sq sqrt ; inline
 
 : normalize ( u -- v ) dup norm v/n ; inline
 
@@ -240,7 +239,7 @@ PRIVATE>
 
 HINTS: M\ object vneg { array } ;
 HINTS: M\ object norm-sq { array } ;
-HINTS: M\ object norm { array } ;
+HINTS: norm { array } ;
 HINTS: M\ object distance { array array } ;
 
 HINTS: M\ object n*v { object array } ;
diff --git a/basis/sequences/cords/cords.factor b/basis/sequences/cords/cords.factor
index e59858677b..fca005fa6e 100644
--- a/basis/sequences/cords/cords.factor
+++ b/basis/sequences/cords/cords.factor
@@ -106,8 +106,7 @@ M: cord v-n '[ _ v-n ] cord-map ; inline
 M: cord v*n '[ _ v*n ] cord-map ; inline
 M: cord v/n '[ _ v/n ] cord-map ; inline
 
-M: cord norm-sq  dup v. ; inline
-M: cord norm     norm-sq sqrt ; inline
+M: cord norm-sq [ norm-sq ] cord-both + ; inline
 M: cord distance v- norm ; inline
 
 

From ac5d3d56525c25260faf3ff1e9ab6d83a68b4943 Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 26 Nov 2009 11:15:46 -0800
Subject: [PATCH 45/46] remove SIMDS:

---
 basis/math/vectors/vectors-docs.factor     | 1 -
 basis/random/sfmt/sfmt.factor              | 1 -
 extra/alien/data/map/map-tests.factor      | 1 -
 extra/noise/noise.factor                   | 1 -
 extra/terrain/generation/generation.factor | 1 -
 5 files changed, 5 deletions(-)

diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor
index b831ac7dbe..6ef7f9ca50 100644
--- a/basis/math/vectors/vectors-docs.factor
+++ b/basis/math/vectors/vectors-docs.factor
@@ -436,7 +436,6 @@ HELP: vshuffle
     { $example
         "USING: alien.c-types combinators math.vectors math.vectors.simd"
         "namespaces prettyprint prettyprint.config ;"
-        "SIMDS: int uchar ;"
         "IN: scratchpad"
         ""
         ": endian-swap ( size -- vector )"
diff --git a/basis/random/sfmt/sfmt.factor b/basis/random/sfmt/sfmt.factor
index 55606217c9..146db91172 100644
--- a/basis/random/sfmt/sfmt.factor
+++ b/basis/random/sfmt/sfmt.factor
@@ -4,7 +4,6 @@ USING: accessors alien.c-types kernel locals math math.ranges
 math.bitwise math.vectors math.vectors.simd random
 sequences specialized-arrays sequences.private classes.struct
 combinators.short-circuit fry ;
-SIMDS: uchar uint ;
 SPECIALIZED-ARRAY: uint
 SPECIALIZED-ARRAY: uint-4
 IN: random.sfmt
diff --git a/extra/alien/data/map/map-tests.factor b/extra/alien/data/map/map-tests.factor
index 7a492ab0c5..b97a356e6e 100644
--- a/extra/alien/data/map/map-tests.factor
+++ b/extra/alien/data/map/map-tests.factor
@@ -3,7 +3,6 @@ USING: alien.data.map fry generalizations kernel locals math.vectors
 math.vectors.conversion math math.vectors.simd sequences
 specialized-arrays tools.test ;
 FROM: alien.c-types => uchar short int float ;
-SIMDS: float int short uchar ;
 SPECIALIZED-ARRAYS: int float float-4 uchar-16 ;
 IN: alien.data.map.tests
 
diff --git a/extra/noise/noise.factor b/extra/noise/noise.factor
index 91e040d35f..a27cc186a0 100644
--- a/extra/noise/noise.factor
+++ b/extra/noise/noise.factor
@@ -4,7 +4,6 @@ math.libm math.matrices.simd math.vectors math.vectors.conversion math.vectors.s
 memoize random random.mersenne-twister sequences sequences.private specialized-arrays
 typed ;
 QUALIFIED-WITH: alien.c-types c
-SIMDS: c:float c:int c:short c:ushort c:uchar ;
 SPECIALIZED-ARRAYS: c:float c:uchar float-4 uchar-16 ;
 IN: noise
 
diff --git a/extra/terrain/generation/generation.factor b/extra/terrain/generation/generation.factor
index 86f532bada..3ed4af3b1d 100644
--- a/extra/terrain/generation/generation.factor
+++ b/extra/terrain/generation/generation.factor
@@ -3,7 +3,6 @@ combinators.smart fry grouping images kernel math
 math.matrices.simd math.order math.vectors noise random
 sequences math.vectors.simd typed ;
 FROM: alien.c-types => float uchar ;
-SIMDS: float uchar ;
 IN: terrain.generation
 
 CONSTANT: terrain-segment-size { 512 512 }

From 66d0cafa94963c9a8464e4cdb9fba7f7f659a5dc Mon Sep 17 00:00:00 2001
From: Joe Groff <arcata@gmail.com>
Date: Thu, 26 Nov 2009 13:28:40 -0800
Subject: [PATCH 46/46] fix buggy simd intrinsics

---
 basis/alien/c-types/c-types.factor                   | 4 +++-
 basis/compiler/cfg/intrinsics/simd/simd.factor       | 6 +++---
 basis/compiler/tree/propagation/simd/simd.factor     | 5 +++--
 basis/math/vectors/simd/intrinsics/intrinsics.factor | 6 +++---
 basis/math/vectors/simd/simd-tests.factor            | 5 +++--
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor
index 027fe046b6..0ee2373b41 100755
--- a/basis/alien/c-types/c-types.factor
+++ b/basis/alien/c-types/c-types.factor
@@ -553,4 +553,6 @@ M: double-2-rep rep-component-type drop double ;
         { [ dup { uchar ushort uint ulong ulonglong } member-eq? ] [ unsigned-interval ] }
     } cond ; foldable
 
-: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
+: c-type-clamp ( value c-type -- value' )
+    dup { float double } member-eq?
+    [ drop ] [ c-type-interval clamp ] if ; inline
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor
index 845902c2e6..a64c657556 100644
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -247,7 +247,7 @@ IN: compiler.cfg.intrinsics.simd
     ] [ ^^vector>scalar ] bi ;
 
 : ^sum-vector ( src rep -- dst )
-    signed-rep {
+    {
         { float-vector-rep [ ^(sum-vector) ] }
         { int-vector-rep [| src rep |
             src rep ^unpack-vector-head :> head
@@ -290,8 +290,8 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-vneg ( node -- )
     {
-        { float-vector-rep [ [ ^load-neg-zero-vector ] [ ^^sub-vector ] bi ] }
-        { int-vector-rep   [ [ ^^zero-vector         ] [ ^^sub-vector ] bi ] }
+        { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
+        { int-vector-rep   [ [ ^^zero-vector         swap ] [ ^^sub-vector ] bi ] }
     } emit-v-vector-op ;
 
 : emit-simd-v+- ( node -- )
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 6002b15c1c..9aab173d7c 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -3,7 +3,7 @@
 USING: accessors assocs byte-arrays combinators compiler.cfg.builder
 continuations fry sequences compiler.tree.propagation.info
 cpu.architecture kernel words make math math.intervals
-math.vectors.simd.intrinsics ;
+math.vectors.simd.intrinsics namespaces ;
 IN: compiler.tree.propagation.simd
 
 CONSTANT: vector>vector-intrinsics
@@ -112,9 +112,10 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop
 : inline-unless-intrinsic ( word -- )
     dup '[
         _ swap over "intrinsic" word-prop
+        "always-inline-simd-intrinsics" get not swap and
         ! word node intrinsic
         [ try-intrinsic [ drop f ] [ def>> ] if ]
-        [ def>> ] if*
+        [ drop def>> ] if*
     ]
     "custom-inlining" set-word-prop ;
 
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor
index 30db6d5e13..eb0e7b1dc8 100644
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -126,7 +126,7 @@ PRIVATE>
 :: (simd-v+-)              ( a b rep -- c ) 
     a b rep 2>rep-array :> ( a' b' )
     rep <rep-array> :> c'
-    0  rep length 1 -  2 <range> [| n |
+    0  rep rep-length 1 -  2 <range> [| n |
         n     a' nth-unsafe n     b' nth-unsafe -
         n     c' set-nth-unsafe
 
@@ -151,12 +151,12 @@ PRIVATE>
 : (simd-sum)               ( a   rep -- n ) [ + ] components-reduce ;
 : (simd-vabs)              ( a   rep -- c ) [ abs ] components-map ;
 : (simd-vbitand)           ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
-: (simd-vbitandn)          ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
+: (simd-vbitandn)          ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
 : (simd-vbitor)            ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
 : (simd-vbitxor)           ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
 : (simd-vbitnot)           ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
 : (simd-vand)              ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
-: (simd-vandn)             ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
+: (simd-vandn)             ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
 : (simd-vor)               ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
 : (simd-vxor)              ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
 : (simd-vnot)              ( a   rep -- c ) [ bitnot ] bitwise-components-map ;
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor
index b590589345..98ed68a906 100644
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -120,7 +120,7 @@ CONSTANT: vector-words
     simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
 
 : check-optimizer ( seq quot eq-quot -- failures )
-    '[
+    dup '[
         @
         [ dup [ class ] { } map-as ] dip '[ _ declare @ ]
         {
@@ -128,8 +128,9 @@ CONSTANT: vector-words
             [ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
             [ [ [ call ] dip call ] call( quot quot -- result ) ]
             [ [ [ call ] dip compile-call ] call( quot quot -- result ) ]
+            [ [ t "always-inline-simd-intrinsics" [ [ call ] dip compile-call ] with-variable ] call( quot quot -- result ) ]
         } 2cleave
-        @ not
+        [ drop @ ] [ nip @ ] 3bi and not
     ] filter ; inline
 
 "== Checking -new constructors" print