More integer SIMD work

- move generated vocab support from specialized-arrays to vocabs.generated - add fuzz testing to math.vectors.simd - add alien type support for integer SIMD vectors - SIMD: parsing word generates a SIMD type, instead of pre-generating them all in math.vectors.simd
2009-09-20 16:48:17 -05:00 · 2009-09-20 16:48:17 -05:00 · dfb43bd2ca
parent ea2bcd69c7
commit dfb43bd2ca
15 changed files with 346 additions and 501 deletions
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@ -24,6 +24,7 @@ IN: compiler.tree.propagation.simd
        literal>> scalar-rep-of {
            { float-rep [ float ] }
            { double-rep [ float ] }
            { int-rep [ integer ] }
        } case
    ] [ drop real ] if
    <class-info>
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -417,8 +417,7 @@ M: x86 %horizontal-add-vector ( dst src rep -- )
 M: x86 %horizontal-add-vector-reps
    {
-        { sse? { float-4-rep } }
+        { sse3? { float-4-rep double-2-rep } }
        { sse2? { double-2-rep short-8-rep uchar-16-rep } }
    } available-reps ;
 M: x86 %unbox-alien ( dst src -- )
--- a/basis/math/vectors/simd/alien/alien-tests.factor
+++ b/basis/math/vectors/simd/alien/alien-tests.factor
@ -1,70 +0,0 @@
 USING: cpu.architecture math.vectors.simd
 math.vectors.simd.intrinsics accessors math.vectors.simd.alien
 kernel classes.struct tools.test compiler sequences byte-arrays
 alien math kernel.private specialized-arrays combinators ;
 SPECIALIZED-ARRAY: float
 IN: math.vectors.simd.alien.tests
 ! Vector alien intrinsics
 [ float-4{ 1 2 3 4 } ] [
    [
        float-4{ 1 2 3 4 }
        underlying>> 0 float-4-rep alien-vector
    ] compile-call float-4 boa
 ] unit-test
 [ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
    16 [ 1 ] B{ } replicate-as 16 <byte-array>
    [
        0 [
            { byte-array c-ptr fixnum } declare
            float-4-rep set-alien-vector
        ] compile-call
    ] keep
 ] unit-test
 [ float-array{ 1 2 3 4 } ] [
    [
        float-array{ 1 2 3 4 } underlying>>
        float-array{ 4 3 2 1 } clone
        [ underlying>> 0 float-4-rep set-alien-vector ] keep
    ] compile-call
 ] unit-test
 STRUCT: simd-struct
 { x float-4 }
 { y double-2 }
 { z double-4 }
 { w float-8 } ;
 [ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
 [
    float-4{ 1 2 3 4 }
    double-2{ 2 1 }
    double-4{ 4 3 2 1 }
    float-8{ 1 2 3 4 5 6 7 8 }
 ] [
    simd-struct <struct>
    float-4{ 1 2 3 4 } >>x
    double-2{ 2 1 } >>y
    double-4{ 4 3 2 1 } >>z
    float-8{ 1 2 3 4 5 6 7 8 } >>w
    { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
 ] unit-test
 [
    float-4{ 1 2 3 4 }
    double-2{ 2 1 }
    double-4{ 4 3 2 1 }
    float-8{ 1 2 3 4 5 6 7 8 }
 ] [
    [
        simd-struct <struct>
        float-4{ 1 2 3 4 } >>x
        double-2{ 2 1 } >>y
        double-4{ 4 3 2 1 } >>z
        float-8{ 1 2 3 4 5 6 7 8 } >>w
        { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
    ] compile-call
 ] unit-test
--- a/basis/math/vectors/simd/alien/alien.factor
+++ b/basis/math/vectors/simd/alien/alien.factor
@ -1,42 +0,0 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: alien accessors alien.c-types byte-arrays compiler.units
 cpu.architecture locals kernel math math.vectors.simd
 math.vectors.simd.intrinsics ;
 IN: math.vectors.simd.alien
 :: define-simd-128-type ( class rep -- )
    <c-type>
        byte-array >>class
        class >>boxed-class
        [ rep alien-vector class boa ] >>getter
        [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
        16 >>size
        8 >>align
        rep >>rep
    class name>> typedef ;
 :: define-simd-256-type ( class rep -- )
    <c-type>
        class >>class
        class >>boxed-class
        [
            [ rep alien-vector ]
            [ 16 + >fixnum rep alien-vector ] 2bi
            class boa
        ] >>getter
        [
            [ [ underlying1>> ] 2dip rep set-alien-vector ]
            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
            3bi
        ] >>setter
        32 >>size
        8 >>align
        rep >>rep
    class name>> typedef ;
 [
    float-4 float-4-rep define-simd-128-type
    double-2 double-2-rep define-simd-128-type
    float-8 float-4-rep define-simd-256-type
    double-4 double-2-rep define-simd-256-type
 ] with-compilation-unit
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@ -5,6 +5,7 @@ effects fry functors generalizations kernel literals locals
 math math.functions math.vectors math.vectors.simd.intrinsics
 math.vectors.specialization parser prettyprint.custom sequences
 sequences.private strings words definitions macros cpu.architecture ;
 QUALIFIED-WITH: math m
 IN: math.vectors.simd.functor
 ERROR: bad-length got expected ;
@ -51,11 +52,10 @@ MACRO: simd-boa ( rep class -- simd-array )
    '[ nip _ swap supported-simd-op? ] assoc-filter
    '[ drop _ key? ] assoc-filter ;
-:: high-level-ops ( ctor -- assoc )
+:: high-level-ops ( ctor elt-class -- assoc )
    ! Some SIMD operations are defined in terms of others.
    {
        { vneg [ [ dup v- ] keep v- ] }
        { v. [ v* sum ] }
        { n+v [ [ ctor execute ] dip v+ ] }
        { v+n [ ctor execute v+ ] }
        { n-v [ [ ctor execute ] dip v- ] }
@ -67,16 +67,36 @@ MACRO: simd-boa ( rep class -- simd-array )
        { norm-sq [ dup v. assert-positive ] }
        { norm [ norm-sq sqrt ] }
        { normalize [ dup norm v/n ] }
-        { distance [ v- norm ] }
+    }
-    } ;
+    ! To compute dot product and distance with integer vectors, we
    ! have to do things less efficiently, with integer overflow checks,
    ! in the general case.
    elt-class m:float = [
        {
            { distance [ v- norm ] }
            { v. [ v* sum ] }
        } append
    ] when ;
 :: simd-vector-words ( class ctor rep assoc -- )
    rep rep-component-type c-type-boxed-class :> elt-class
    class
-    rep rep-component-type c-type-boxed-class
+    elt-class
    assoc rep supported-simd-ops
-    ctor high-level-ops assoc-union
+    ctor elt-class high-level-ops assoc-union
    specialize-vector-words ;
 :: define-simd-128-type ( class rep -- )
    <c-type>
        byte-array >>class
        class >>boxed-class
        [ rep alien-vector class boa ] >>getter
        [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
        16 >>size
        8 >>align
        rep >>rep
    class typedef ;
 FUNCTOR: define-simd-128 ( T -- )
 N            [ 16 T heap-size /i ]
@ -159,11 +179,35 @@ INSTANCE: A sequence
    { sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
 } simd-vector-words
 \ A \ A-rep define-simd-128-type
 PRIVATE>
 ;FUNCTOR
 ! Synthesize 256-bit vectors from a pair of 128-bit vectors
 SLOT: underlying1
 SLOT: underlying2
 :: define-simd-256-type ( class rep -- )
    <c-type>
        class >>class
        class >>boxed-class
        [
            [ rep alien-vector ]
            [ 16 + >fixnum rep alien-vector ] 2bi
            class boa
        ] >>getter
        [
            [ [ underlying1>> ] 2dip rep set-alien-vector ]
            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
            3bi
        ] >>setter
        32 >>size
        8 >>align
        rep >>rep
    class typedef ;
 FUNCTOR: define-simd-256 ( T -- )
 N            [ 32 T heap-size /i ]
@ -235,7 +279,7 @@ M: A pprint* pprint-object ;
 : A-boa ( ... -- simd-array )
    [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
-    \ A boa ;
+    \ A boa ; inline
 \ A-rep 2 boa-effect \ A-boa set-stack-effect
@ -260,4 +304,6 @@ INSTANCE: A sequence
    { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
 } simd-vector-words
 \ A \ A-rep define-simd-256-type
 ;FUNCTOR
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@ -17,23 +17,45 @@ $nl
 "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
 ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
-"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs."
+"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs."
 $nl
-"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance."
+"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
 $nl
-"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
+"SSE2 introduces double-precision and integer SIMD."
 $nl
 "SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words."
 $nl
 "On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
 $nl
 "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
 ARTICLE: "math.vectors.simd.types" "SIMD vector types"
-"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8."
+"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
 $nl
-"The following vector types are defined:"
+"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
-{ $subsection float-4 }
+{ $subsection POSTPONE: SIMD: }
-{ $subsection double-2 }
+"The following vector types are supported:"
-{ $subsection float-8 }
+{ $code
-{ $subsection double-4 }
+    "char-16"
-"For each vector type, several words are defined:"
+    "uchar-16"
    "char-32"
    "uchar-32"
    "short-8"
    "ushort-8"
    "short-16"
    "ushort-16"
    "int-4"
    "uint-4"
    "int-8"
    "uint-8"
    "float-4"
    "float-8"
    "double-2"
    "double-4"
 } ;
 ARTICLE: "math.vectors.simd.words" "SIMD vector words"
 "For each SIMD vector type, several words are defined:"
 { $table
    { "Word" "Stack effect" "Description" }
    { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
@ -41,8 +63,6 @@ $nl
    { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
    { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
 }
 "The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
 $nl
 "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
 { $see-also "c-types-specs" } ;
@ -68,6 +88,8 @@ SYMBOLS: x y ;
 { $code
 <" USING: compiler.tree.debugger kernel.private
 math.vectors math.vectors.simd ;
 SIMD: float-4
 IN: simd-demo
 : interpolate ( v a b -- w )
    { float-4 float-4 float-4 } declare
@ -80,6 +102,8 @@ $nl
 { $code
 <" USING: compiler.tree.debugger hints
 math.vectors math.vectors.simd ;
 SIMD: float-4
 IN: simd-demo
 : interpolate ( v a b -- w )
    [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
@ -94,6 +118,7 @@ $nl
 "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
 { $code
 <" USING: compiler.tree.debugger math.vectors math.vectors.simd ;
 SIMD: float-4
 IN: simd-demo
 STRUCT: actor
@ -151,21 +176,23 @@ $nl
 "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
 ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
-"Struct classes may contain fields which store SIMD data; use one of the following C type names:"
+"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
-{ $code
+$nl
-<" float-4
+"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
 double-2
 float-8
 double-4"> }
 "Passing SIMD data as function parameters is not yet supported." ;
 ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
 "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
 { $subsection "math.vectors.simd.intro" }
 { $subsection "math.vectors.simd.types" }
 { $subsection "math.vectors.simd.words" }
 { $subsection "math.vectors.simd.support" }
 { $subsection "math.vectors.simd.efficiency" }
 { $subsection "math.vectors.simd.alien" }
 { $subsection "math.vectors.simd.intrinsics" } ;
 HELP: SIMD:
 { $syntax "SIMD: type-length" }
 { $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
 { $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
 ABOUT: "math.vectors.simd"
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@ -1,8 +1,30 @@
 USING: accessors arrays classes compiler compiler.tree.debugger
 effects fry io kernel kernel.private math math.functions
 math.private math.vectors math.vectors.simd
 math.vectors.simd.private prettyprint random sequences system
 tools.test vocabs assocs compiler.cfg.debugger words
 locals math.vectors.specialization combinators cpu.architecture
 math.vectors.simd.intrinsics namespaces byte-arrays alien
 specialized-arrays classes.struct ;
 FROM: alien.c-types => c-type-boxed-class ;
 SPECIALIZED-ARRAY: float
 SIMD: char-16
 SIMD: uchar-16
 SIMD: char-32
 SIMD: uchar-32
 SIMD: short-8
 SIMD: ushort-8
 SIMD: short-16
 SIMD: ushort-16
 SIMD: int-4
 SIMD: uint-4
 SIMD: int-8
 SIMD: uint-8
 SIMD: float-4
 SIMD: float-8
 SIMD: double-2
 SIMD: double-4
 IN: math.vectors.simd.tests
 USING: math math.vectors.simd math.vectors.simd.private
 math.vectors math.functions math.private kernel.private compiler
 sequences tools.test compiler.tree.debugger accessors kernel
 system ;
 [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
@ -12,344 +34,6 @@ system ;
 [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
 [ float-4{ 12 12 12 12 } ] [
    12 [ float-4-with ] compile-call
 ] unit-test
 [ float-4{ 1 2 3 4 } ] [
    1 2 3 4 [ float-4-boa ] compile-call
 ] unit-test
 [ float-4{ 11 22 33 44 } ] [
    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
    [ { float-4 float-4 } declare v+ ] compile-call
 ] unit-test
 [ float-4{ -9 -18 -27 -36 } ] [
    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
    [ { float-4 float-4 } declare v- ] compile-call
 ] unit-test
 [ float-4{ 10 40 90 160 } ] [
    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
    [ { float-4 float-4 } declare v* ] compile-call
 ] unit-test
 [ float-4{ 10 100 1000 10000 } ] [
    float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
    [ { float-4 float-4 } declare v/ ] compile-call
 ] unit-test
 [ float-4{ -10 -20 -30 -40 } ] [
    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
    [ { float-4 float-4 } declare vmin ] compile-call
 ] unit-test
 [ float-4{ 10 20 30 40 } ] [
    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
    [ { float-4 float-4 } declare vmax ] compile-call
 ] unit-test
 [ 10.0 ] [
    float-4{ 1 2 3 4 }
    [ { float-4 } declare sum ] compile-call
 ] unit-test
 [ 13.0 ] [
    float-4{ 1 2 3 4 }
    [ { float-4 } declare sum 3.0 + ] compile-call
 ] unit-test
 [ 8.0 ] [
    float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
    [ { float-4 float-4 } declare v. ] compile-call
 ] unit-test
 [ float-4{ 5 10 15 20 } ] [
    5.0 float-4{ 1 2 3 4 }
    [ { float float-4 } declare n*v ] compile-call
 ] unit-test
 [ float-4{ 5 10 15 20 } ] [
    float-4{ 1 2 3 4 } 5.0
    [ { float float-4 } declare v*n ] compile-call
 ] unit-test
 [ float-4{ 10 5 2 5 } ] [
    10.0 float-4{ 1 2 5 2 }
    [ { float float-4 } declare n/v ] compile-call
 ] unit-test
 [ float-4{ 0.5 1 1.5 2 } ] [
    float-4{ 1 2 3 4 } 2
    [ { float float-4 } declare v/n ] compile-call
 ] unit-test
 [ float-4{ 1 0 0 0 } ] [
    float-4{ 10 0 0 0 }
    [ { float-4 } declare normalize ] compile-call
 ] unit-test
 [ 30.0 ] [
    float-4{ 1 2 3 4 }
    [ { float-4 } declare norm-sq ] compile-call
 ] unit-test
 [ t ] [
    float-4{ 1 0 0 0 }
    float-4{ 0 1 0 0 }
    [ { float-4 float-4 } declare distance ] compile-call
    2 sqrt 1.0e-6 ~
 ] unit-test
 [ double-2{ 12 12 } ] [
    12 [ double-2-with ] compile-call
 ] unit-test
 [ double-2{ 1 2 } ] [
    1 2 [ double-2-boa ] compile-call
 ] unit-test
 [ double-2{ 11 22 } ] [
    double-2{ 1 2 } double-2{ 10 20 }
    [ { double-2 double-2 } declare v+ ] compile-call
 ] unit-test
 [ double-2{ -9 -18 } ] [
    double-2{ 1 2 } double-2{ 10 20 }
    [ { double-2 double-2 } declare v- ] compile-call
 ] unit-test
 [ double-2{ 10 40 } ] [
    double-2{ 1 2 } double-2{ 10 20 }
    [ { double-2 double-2 } declare v* ] compile-call
 ] unit-test
 [ double-2{ 10 100 } ] [
    double-2{ 100 2000 } double-2{ 10 20 }
    [ { double-2 double-2 } declare v/ ] compile-call
 ] unit-test
 [ double-2{ -10 -20 } ] [
    double-2{ -10 20 } double-2{ 10 -20 }
    [ { double-2 double-2 } declare vmin ] compile-call
 ] unit-test
 [ double-2{ 10 20 } ] [
    double-2{ -10 20 } double-2{ 10 -20 }
    [ { double-2 double-2 } declare vmax ] compile-call
 ] unit-test
 [ 3.0 ] [
    double-2{ 1 2 }
    [ { double-2 } declare sum ] compile-call
 ] unit-test
 [ 7.0 ] [
    double-2{ 1 2 }
    [ { double-2 } declare sum 4.0 + ] compile-call
 ] unit-test
 [ 16.0 ] [
    double-2{ 1 2 } double-2{ 2 7 }
    [ { double-2 double-2 } declare v. ] compile-call
 ] unit-test
 [ double-2{ 5 10 } ] [
    5.0 double-2{ 1 2 }
    [ { float double-2 } declare n*v ] compile-call
 ] unit-test
 [ double-2{ 5 10 } ] [
    double-2{ 1 2 } 5.0
    [ { float double-2 } declare v*n ] compile-call
 ] unit-test
 [ double-2{ 10 5 } ] [
    10.0 double-2{ 1 2 }
    [ { float double-2 } declare n/v ] compile-call
 ] unit-test
 [ double-2{ 0.5 1 } ] [
    double-2{ 1 2 } 2
    [ { float double-2 } declare v/n ] compile-call
 ] unit-test
 [ double-2{ 0 0 } ] [ double-2 new ] unit-test
 [ double-2{ 1 0 } ] [
    double-2{ 10 0 }
    [ { double-2 } declare normalize ] compile-call
 ] unit-test
 [ 5.0 ] [
    double-2{ 1 2 }
    [ { double-2 } declare norm-sq ] compile-call
 ] unit-test
 [ t ] [
    double-2{ 1 0 }
    double-2{ 0 1 }
    [ { double-2 double-2 } declare distance ] compile-call
    2 sqrt 1.0e-6 ~
 ] unit-test
 [ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
 [ double-4{ 1 2 3 4 } ] [
    1 2 3 4 double-4-boa
 ] unit-test
 [ double-4{ 1 1 1 1 } ] [
    1 double-4-with
 ] unit-test
 [ double-4{ 0 1 2 3 } ] [
    1 double-4-with [ * ] map-index
 ] unit-test
 [ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
 [ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
 [ double-4{ 12 12 12 12 } ] [
    12 [ double-4-with ] compile-call
 ] unit-test
 [ double-4{ 1 2 3 4 } ] [
    1 2 3 4 [ double-4-boa ] compile-call
 ] unit-test
 [ double-4{ 11 22 33 44 } ] [
    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
    [ { double-4 double-4 } declare v+ ] compile-call
 ] unit-test
 [ double-4{ -9 -18 -27 -36 } ] [
    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
    [ { double-4 double-4 } declare v- ] compile-call
 ] unit-test
 [ double-4{ 10 40 90 160 } ] [
    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
    [ { double-4 double-4 } declare v* ] compile-call
 ] unit-test
 [ double-4{ 10 100 1000 10000 } ] [
    double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
    [ { double-4 double-4 } declare v/ ] compile-call
 ] unit-test
 [ double-4{ -10 -20 -30 -40 } ] [
    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
    [ { double-4 double-4 } declare vmin ] compile-call
 ] unit-test
 [ double-4{ 10 20 30 40 } ] [
    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
    [ { double-4 double-4 } declare vmax ] compile-call
 ] unit-test
 [ 10.0 ] [
    double-4{ 1 2 3 4 }
    [ { double-4 } declare sum ] compile-call
 ] unit-test
 [ 13.0 ] [
    double-4{ 1 2 3 4 }
    [ { double-4 } declare sum 3.0 + ] compile-call
 ] unit-test
 [ 8.0 ] [
    double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
    [ { double-4 double-4 } declare v. ] compile-call
 ] unit-test
 [ double-4{ 5 10 15 20 } ] [
    5.0 double-4{ 1 2 3 4 }
    [ { float double-4 } declare n*v ] compile-call
 ] unit-test
 [ double-4{ 5 10 15 20 } ] [
    double-4{ 1 2 3 4 } 5.0
    [ { float double-4 } declare v*n ] compile-call
 ] unit-test
 [ double-4{ 10 5 2 5 } ] [
    10.0 double-4{ 1 2 5 2 }
    [ { float double-4 } declare n/v ] compile-call
 ] unit-test
 [ double-4{ 0.5 1 1.5 2 } ] [
    double-4{ 1 2 3 4 } 2
    [ { float double-4 } declare v/n ] compile-call
 ] unit-test
 [ double-4{ 1 0 0 0 } ] [
    double-4{ 10 0 0 0 }
    [ { double-4 } declare normalize ] compile-call
 ] unit-test
 [ 30.0 ] [
    double-4{ 1 2 3 4 }
    [ { double-4 } declare norm-sq ] compile-call
 ] unit-test
 [ t ] [
    double-4{ 1 0 0 0 }
    double-4{ 0 1 0 0 }
    [ { double-4 double-4 } declare distance ] compile-call
    2 sqrt 1.0e-6 ~
 ] unit-test
 [ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
 [ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
 [ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
 [ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
 [ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
 [ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
 [ float-8{ 3 6 9 12 15 18 21 24 } ] [
    float-8{ 1 2 3 4 5 6 7 8 }
    float-8{ 2 4 6 8 10 12 14 16 }
    [ { float-8 float-8 } declare v+ ] compile-call
 ] unit-test
 [ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
    float-8{ 1 2 3 4 5 6 7 8 }
    float-8{ 2 4 6 8 10 12 14 16 }
    [ { float-8 float-8 } declare v- ] compile-call
 ] unit-test
 [ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
    -0.5
    float-8{ 2 4 6 8 10 12 14 16 }
    [ { float float-8 } declare n*v ] compile-call
 ] unit-test
 [ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
    float-8{ 2 4 6 8 10 12 14 16 }
    -0.5
    [ { float-8 float } declare v*n ] compile-call
 ] unit-test
 [ float-8{ 256 128 64 32 16 8 4 2 } ] [
    256.0
    float-8{ 1 2 4 8 16 32 64 128 }
    [ { float float-8 } declare n/v ] compile-call
 ] unit-test
 [ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
    float-8{ 2 4 6 8 10 12 14 16 }
    -2.0
    [ { float-8 float } declare v/n ] compile-call
 ] unit-test
 ! Test puns; only on x86
 cpu x86? [
    [ double-2{ 4 1024 } ] [
@ -362,3 +46,179 @@ cpu x86? [
        [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
    ] unit-test
 ] when
 ! Fuzz testing
 CONSTANT: simd-classes
    {
        char-16
        uchar-16
        char-32
        uchar-32
        short-8
        ushort-8
        short-16
        ushort-16
        int-4
        uint-4
        int-8
        uint-8
        float-4
        float-8
        double-2
        double-4
    }
 : with-ctors ( -- seq )
    simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
 : boa-ctors ( -- seq )
    simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
 : check-optimizer ( seq inputs quot -- )
    [
        [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
        [ [ call ] dip call ]
        [ [ call ] dip compile-call ] 2tri = not
    ] compose filter ; inline
 "== Checking -new constructors" print
 [ { } ] [
    simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer
 ] unit-test
 [ { } ] [
    simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
 ] unit-test
 "== Checking -with constructors" print
 [ { } ] [
    with-ctors [
        [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
    ] check-optimizer
 ] unit-test
 "== Checking -boa constructors" print
 [ { } ] [
    boa-ctors [
        dup stack-effect in>> length
        [ nip [ 1000 random ] [ ] replicate-as ]
        [ fixnum <array> swap '[ _ declare _ execute ] ]
        2bi
    ] check-optimizer
 ] unit-test
 "== Checking vector operations" print
 : random-vector ( class -- vec )
    new [ drop 1000 random ] map ;
 :: check-vector-op ( word inputs class elt-class -- inputs quot )
    inputs [
        [
            {
                { +vector+ [ class random-vector ] }
                { +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
            } case
        ] [ ] map-as
    ] [
        [
            {
                { +vector+ [ class ] }
                { +scalar+ [ elt-class ] }
            } case
        ] map
    ] bi
    word '[ _ declare _ execute ] ;
 : ops-to-check ( elt-class -- alist )
    [ vector-words >alist ] dip float = [
        [ drop { n/v v/n v/ normalize } member? not ] assoc-filter
    ] unless ;
 : check-vector-ops ( class elt-class -- )
    [ nip ops-to-check ] 2keep
    '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline
 : simd-classes&reps ( -- alist )
    simd-classes [
        dup name>> [ "float" head? ] [ "double" head? ] bi or
        float fixnum ?
    ] { } map>assoc ;
 simd-classes&reps [
    [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test
 ] assoc-each
 ! Other regressions
 [ 8000000 ] [
    int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
    [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
 ] unit-test
 ! Vector alien intrinsics
 [ float-4{ 1 2 3 4 } ] [
    [
        float-4{ 1 2 3 4 }
        underlying>> 0 float-4-rep alien-vector
    ] compile-call float-4 boa
 ] unit-test
 [ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
    16 [ 1 ] B{ } replicate-as 16 <byte-array>
    [
        0 [
            { byte-array c-ptr fixnum } declare
            float-4-rep set-alien-vector
        ] compile-call
    ] keep
 ] unit-test
 [ float-array{ 1 2 3 4 } ] [
    [
        float-array{ 1 2 3 4 } underlying>>
        float-array{ 4 3 2 1 } clone
        [ underlying>> 0 float-4-rep set-alien-vector ] keep
    ] compile-call
 ] unit-test
 STRUCT: simd-struct
 { x float-4 }
 { y double-2 }
 { z double-4 }
 { w float-8 } ;
 [ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
 [
    float-4{ 1 2 3 4 }
    double-2{ 2 1 }
    double-4{ 4 3 2 1 }
    float-8{ 1 2 3 4 5 6 7 8 }
 ] [
    simd-struct <struct>
    float-4{ 1 2 3 4 } >>x
    double-2{ 2 1 } >>y
    double-4{ 4 3 2 1 } >>z
    float-8{ 1 2 3 4 5 6 7 8 } >>w
    { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
 ] unit-test
 [
    float-4{ 1 2 3 4 }
    double-2{ 2 1 }
    double-4{ 4 3 2 1 }
    float-8{ 1 2 3 4 5 6 7 8 }
 ] [
    [
        simd-struct <struct>
        float-4{ 1 2 3 4 } >>x
        double-2{ 2 1 } >>y
        double-4{ 4 3 2 1 } >>z
        float-8{ 1 2 3 4 5 6 7 8 } >>w
        { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
    ] compile-call
 ] unit-test
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@ -1,15 +1,32 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types cpu.architecture kernel
+USING: alien.c-types combinators fry kernel lexer math math.parser
-math.vectors.simd.functor vocabs.loader ;
+math.vectors.simd.functor sequences splitting vocabs.generated
-FROM: sequences => each ;
+vocabs.loader vocabs.parser words ;
 IN: math.vectors.simd
-<<
+ERROR: bad-vector-size bits ;
-{ double float char uchar short ushort int uint }
+<PRIVATE
 [ [ define-simd-128 ] [ define-simd-256 ] bi ] each
->>
+: simd-vocab ( type -- vocab )
    "math.vectors.simd.instances." prepend ;
-"math.vectors.simd.alien" require
+: parse-simd-name ( string -- c-type quot )
    "-" split1
    [ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
    * 8 * {
        { 128 [ [ define-simd-128 ] ] }
        { 256 [ [ define-simd-256 ] ] }
        [ bad-vector-size ]
    } case ;
 PRIVATE>
 : define-simd-vocab ( type -- vocab )
    [ simd-vocab ]
    [ '[ _ parse-simd-name call( type -- ) ] ] bi
    generate-vocab ;
 SYNTAX: SIMD:
    scan define-simd-vocab use-vocab ;
--- a/basis/specialized-arrays/specialized-arrays.factor
+++ b/basis/specialized-arrays/specialized-arrays.factor
@ -4,7 +4,7 @@ USING: accessors alien alien.c-types alien.data alien.parser assocs
 byte-arrays classes compiler.units functors kernel lexer libc math
 math.vectors.specialization namespaces parser prettyprint.custom
 sequences sequences.private strings summary vocabs vocabs.loader
-vocabs.parser words fry combinators ;
+vocabs.parser vocabs.generated words fry combinators ;
 IN: specialized-arrays
 MIXIN: specialized-array
@ -123,15 +123,6 @@ M: word (underlying-type) "c-type" word-prop ;
 PRIVATE>
 : generate-vocab ( vocab-name quot -- vocab )
    [ dup vocab [ ] ] dip '[
        [
            [
                 _ with-current-vocab
            ] with-compilation-unit
        ] keep
    ] ?if ; inline
 : define-array-vocab ( type -- vocab )
    underlying-type-name
    [ specialized-array-vocab ] [ '[ _ define-array ] ] bi
--- a/basis/specialized-vectors/specialized-vectors.factor
+++ b/basis/specialized-vectors/specialized-vectors.factor
@ -3,7 +3,7 @@
 USING: accessors alien.c-types assocs compiler.units functors
 growable kernel lexer namespaces parser prettyprint.custom
 sequences specialized-arrays specialized-arrays.private strings
-vocabs vocabs.parser fry ;
+vocabs vocabs.parser vocabs.generated fry ;
 QUALIFIED: vectors.functor
 IN: specialized-vectors
--- a/basis/math/vectors/simd/alien/authors.txt
+++ b/basis/math/vectors/simd/alien/authors.txt
--- a/basis/vocabs/generated/generated.factor
+++ b/basis/vocabs/generated/generated.factor
@ -0,0 +1,13 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: compiler.units fry kernel vocabs vocabs.parser ;
 IN: vocabs.generated
 : generate-vocab ( vocab-name quot -- vocab )
    [ dup vocab [ ] ] dip '[
        [
            [
                 _ with-current-vocab
            ] with-compilation-unit
        ] keep
    ] ?if ; inline
--- a/extra/benchmark/nbody-simd/nbody-simd.factor
+++ b/extra/benchmark/nbody-simd/nbody-simd.factor
@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants
 math.functions math.vectors math.vectors.simd prettyprint
 combinators.smart sequences hints classes.struct
 specialized-arrays ;
 SIMD: double-4
 IN: benchmark.nbody-simd
 : solar-mass ( -- x ) 4 pi sq * ; inline
--- a/extra/benchmark/raytracer-simd/raytracer-simd.factor
+++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor
@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp
 io.encodings.binary kernel math math.constants math.functions
 math.vectors math.vectors.simd math.parser make sequences
 sequences.private words hints classes.struct ;
 SIMD: double-4
 IN: benchmark.raytracer-simd
 ! parameters
--- a/extra/benchmark/simd-1/simd-1.factor
+++ b/extra/benchmark/simd-1/simd-1.factor
@ -2,6 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel io math math.functions math.parser math.vectors
 math.vectors.simd sequences specialized-arrays ;
 SIMD: float-4
 SPECIALIZED-ARRAY: float-4
 IN: benchmark.simd-1