Merge branch 'master' of git://factorcode.org/git/factor

2009-12-06 02:28:46 -06:00 · 2009-12-06 02:28:46 -06:00 · 313f70dbd6
parent 50afb06993 6b9c4254ad
commit 313f70dbd6
23 changed files with 387 additions and 147 deletions
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -382,6 +382,16 @@ def: dst
 use: src1 src2
 literal: rep ;

+PURE-INSN: ##mul-high-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##mul-horizontal-add-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
 PURE-INSN: ##saturated-mul-vector
 def: dst
 use: src1 src2
@ -402,11 +412,21 @@ def: dst
 use: src1 src2
 literal: rep ;

+PURE-INSN: ##avg-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
 PURE-INSN: ##dot-vector
 def: dst/scalar-rep
 use: src1 src2
 literal: rep ;

+PURE-INSN: ##sad-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
 PURE-INSN: ##horizontal-add-vector
 def: dst
 use: src1 src2
--- a/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd-tests.factor
@ -273,14 +273,6 @@ unit-test
 [ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
 unit-test

-[ {
-    ##unpack-vector-head ##unpack-vector-tail ##add-vector
-    ##horizontal-add-vector
-    ##vector>scalar
-} ]
-[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ]
-unit-test
-
 [ {
    ##unpack-vector-head ##unpack-vector-tail ##add-vector
    ##horizontal-add-vector ##horizontal-add-vector
@ -514,22 +506,22 @@ unit-test

 ! with
 [ { ##scalar>vector ##shuffle-vector-imm } ]
-[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ]
+[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ]
 unit-test

 ! gather-2
 [ { ##gather-vector-2 } ]
-[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ]
+[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ]
 unit-test

 ! gather-4
 [ { ##gather-vector-4 } ]
-[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ]
 unit-test

 ! select
 [ { ##shuffle-vector-imm ##vector>scalar } ]
-[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ]
+[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ]
 unit-test

 ! test with nonliteral/invalid reps
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@ -57,6 +57,12 @@ IN: compiler.cfg.intrinsics.simd
        { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
    } case ;

+: ^load-half-vector ( rep -- dst )
+    {
+        { float-4-rep  [ float-array{  0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
+        { double-2-rep [ double-array{ 0.5 0.5 }         underlying>> ^^load-constant ] }
+    } case ;
+
 : >variable-shuffle ( shuffle rep -- shuffle' )
    rep-component-type heap-size
    [ dup <repetition> >byte-array ]
@ -336,6 +342,16 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
        [ ^^mul-vector ]
    } emit-vv-vector-op ;

+: emit-simd-v*high ( node -- )
+    {
+        [ ^^mul-high-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v*hs+ ( node -- )
+    {
+        [ ^^mul-horizontal-add-vector ]
+    } emit-vv-vector-op ;
+
 : emit-simd-v/ ( node -- )
    {
        [ ^^div-vector ]
@ -359,12 +375,29 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
        ]
    } emit-vv-vector-op ;

+: emit-simd-vavg ( node -- )
+    {
+        [ ^^avg-vector ]
+        { float-vector-rep [| src1 src2 rep |
+            src1 src2 rep ^^add-vector
+            rep ^load-half-vector rep ^^mul-vector
+        ] }
+    } emit-vv-vector-op ;
+
 : emit-simd-v. ( node -- )
    {
        [ ^^dot-vector ]
        { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
    } emit-vv-vector-op ;

+: emit-simd-vsad ( node -- )
+    {
+        [
+            [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
+            [ widen-vector-rep ^^vector>scalar ] bi
+        ]
+    } emit-vv-vector-op ;
+
 : emit-simd-vsqrt ( node -- )
    {
        [ ^^sqrt-vector ]
@ -580,10 +613,14 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
        { (simd-vs-)               [ emit-simd-vs-                 ] }
        { (simd-vs*)               [ emit-simd-vs*                 ] }
        { (simd-v*)                [ emit-simd-v*                  ] }
+        { (simd-v*high)            [ emit-simd-v*high              ] }
+        { (simd-v*hs+)             [ emit-simd-v*hs+               ] }
        { (simd-v/)                [ emit-simd-v/                  ] }
        { (simd-vmin)              [ emit-simd-vmin                ] }
        { (simd-vmax)              [ emit-simd-vmax                ] }
+        { (simd-vavg)              [ emit-simd-vavg                ] }
        { (simd-v.)                [ emit-simd-v.                  ] }
+        { (simd-vsad)              [ emit-simd-vsad                ] }
        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
        { (simd-sum)               [ emit-simd-sum                 ] }
        { (simd-vabs)              [ emit-simd-vabs                ] }
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@ -173,11 +173,15 @@ CODEGEN: ##add-sub-vector %add-sub-vector
 CODEGEN: ##sub-vector %sub-vector
 CODEGEN: ##saturated-sub-vector %saturated-sub-vector
 CODEGEN: ##mul-vector %mul-vector
+CODEGEN: ##mul-high-vector %mul-high-vector
+CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
 CODEGEN: ##saturated-mul-vector %saturated-mul-vector
 CODEGEN: ##div-vector %div-vector
 CODEGEN: ##min-vector %min-vector
 CODEGEN: ##max-vector %max-vector
+CODEGEN: ##avg-vector %avg-vector
 CODEGEN: ##dot-vector %dot-vector
+CODEGEN: ##sad-vector %sad-vector
 CODEGEN: ##sqrt-vector %sqrt-vector
 CODEGEN: ##horizontal-add-vector %horizontal-add-vector
 CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@ -16,9 +16,12 @@ CONSTANT: vector>vector-intrinsics
        (simd-vs-)
        (simd-vs*)
        (simd-v*)
+        (simd-v*high)
+        (simd-v*hs+)
        (simd-v/)
        (simd-vmin)
        (simd-vmax)
+        (simd-vavg)
        (simd-vsqrt)
        (simd-vabs)
        (simd-vbitand)
@ -60,6 +63,7 @@ CONSTANT: vector>vector-intrinsics
 CONSTANT: vector-other-intrinsics
    {
        (simd-v.)
+        (simd-vsad)
        (simd-sum)
        (simd-vany?)
        (simd-vall?)
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -283,11 +283,15 @@ HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
 HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
 HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
 HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
 HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
 HOOK: %div-vector cpu ( dst src1 src2 rep -- )
 HOOK: %min-vector cpu ( dst src1 src2 rep -- )
 HOOK: %max-vector cpu ( dst src1 src2 rep -- )
+HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
 HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
+HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
 HOOK: %sqrt-vector cpu ( dst src rep -- )
 HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
 HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
@ -332,11 +336,15 @@ HOOK: %add-sub-vector-reps cpu ( -- reps )
 HOOK: %sub-vector-reps cpu ( -- reps )
 HOOK: %saturated-sub-vector-reps cpu ( -- reps )
 HOOK: %mul-vector-reps cpu ( -- reps )
+HOOK: %mul-high-vector-reps cpu ( -- reps )
+HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
 HOOK: %saturated-mul-vector-reps cpu ( -- reps )
 HOOK: %div-vector-reps cpu ( -- reps )
 HOOK: %min-vector-reps cpu ( -- reps )
 HOOK: %max-vector-reps cpu ( -- reps )
+HOOK: %avg-vector-reps cpu ( -- reps )
 HOOK: %dot-vector-reps cpu ( -- reps )
+HOOK: %sad-vector-reps cpu ( -- reps )
 HOOK: %sqrt-vector-reps cpu ( -- reps )
 HOOK: %horizontal-add-vector-reps cpu ( -- reps )
 HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -1106,6 +1106,32 @@ M: x86 %mul-vector-reps
        { sse4.1? { int-4-rep uint-4-rep } }
    } available-reps ;

+M: x86 %mul-high-vector ( dst src1 src2 rep -- )
+    [ two-operand ] keep
+    {
+        { short-8-rep  [ PMULHW ] }
+        { ushort-8-rep [ PMULHUW ] }
+    } case ;
+
+M: x86 %mul-high-vector-reps
+    {
+        { sse2? { short-8-rep ushort-8-rep } }
+    } available-reps ;
+
+M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
+    [ two-operand ] keep
+    {
+        { char-16-rep  [ PMADDUBSW ] }
+        { uchar-16-rep [ PMADDUBSW ] }
+        { short-8-rep  [ PMADDWD ] }
+    } case ;
+
+M: x86 %mul-horizontal-add-vector-reps
+    {
+        { sse2?  { short-8-rep } }
+        { ssse3? { char-16-rep uchar-16-rep } }
+    } available-reps ;
+
 M: x86 %div-vector ( dst src1 src2 rep -- )
    [ two-operand ] keep
    {
@ -1159,6 +1185,18 @@ M: x86 %max-vector-reps
        { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
    } available-reps ;

+M: x86 %avg-vector ( dst src1 src2 rep -- )
+    [ two-operand ] keep
+    {
+        { uchar-16-rep [ PAVGB ] }
+        { ushort-8-rep [ PAVGW ] }
+    } case ;
+
+M: x86 %avg-vector-reps
+    {
+        { sse2? { uchar-16-rep ushort-8-rep } }
+    } available-reps ;
+
 M: x86 %dot-vector
    [ two-operand ] keep
    {
@ -1171,6 +1209,17 @@ M: x86 %dot-vector-reps
        { sse4.1? { float-4-rep double-2-rep } }
    } available-reps ;

+M: x86 %sad-vector
+    [ two-operand ] keep
+    {
+        { uchar-16-rep [ PSADBW ] }
+    } case ;
+
+M: x86 %sad-vector-reps
+    {
+        { sse2? { uchar-16-rep } }
+    } available-reps ;
+
 M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
    [ two-operand ] keep
    signed-rep {
@ -1323,7 +1372,7 @@ M: x86 %shr-vector-imm-reps %shr-vector-reps ;

 M: x86 %integer>scalar drop MOVD ;

-M:: x86 %scalar>integer ( dst src rep -- )
+:: %scalar>integer-32 ( dst src rep -- )
    rep {
        { int-scalar-rep [
            dst 32-bit-version-of src MOVD
@ -1359,6 +1408,14 @@ M:: x86 %scalar>integer ( dst src rep -- )
        ] }
    } case ;

+M: x86.32 %scalar>integer ( dst src rep -- ) %scalar>integer-32 ;
+M: x86.64 %scalar>integer ( dst src rep -- )
+    {
+        { longlong-scalar-rep  [ MOVD ] }
+        { ulonglong-scalar-rep [ MOVD ] }
+        [ %scalar>integer-32 ]
+    } case ;
+
 M: x86 %vector>scalar %copy ;
 M: x86 %scalar>vector %copy ;

--- a/basis/math/combinatorics/combinatorics.factor
+++ b/basis/math/combinatorics/combinatorics.factor
@ -1,7 +1,7 @@
 ! Copyright (c) 2007-2009 Slava Pestov, Doug Coleman, Aaron Schaefer.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs binary-search fry kernel locals math math.order
-    math.ranges mirrors namespaces sequences sorting ;
+    math.ranges namespaces sequences sorting ;
 IN: math.combinatorics

 <PRIVATE
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@ -1,8 +1,9 @@
 ! (c)2009 Slava Pestov, Joe Groff bsd license
 USING: accessors alien alien.c-types alien.data combinators
-sequences.cords cpu.architecture fry generalizations kernel
-libc locals math math.libm math.order math.ranges math.vectors
-sequences sequences.private specialized-arrays vocabs.loader ;
+sequences.cords cpu.architecture fry generalizations grouping
+kernel libc locals math math.libm math.order math.ranges
+math.vectors sequences sequences.private specialized-arrays
+vocabs.loader ;
 QUALIFIED-WITH: alien.c-types c
 SPECIALIZED-ARRAYS:
    c:char c:short c:int c:longlong
@ -141,13 +142,31 @@ PRIVATE>
 : (simd-vs*)               ( a b rep -- c )
    dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
 : (simd-v*)                ( a b rep -- c ) [ * ] components-2map ;
+: (simd-v*high)            ( a b rep -- c )
+    dup rep-component-type heap-size -8 * '[ * _ shift ] components-2map ;
+:: (simd-v*hs+)            ( a b rep -- c )
+    rep { char-16-rep uchar-16-rep } member-eq?
+    [ uchar-16-rep char-16-rep ]
+    [ rep rep ] if :> ( a-rep b-rep )
+    b-rep widen-vector-rep signed-rep :> wide-rep
+    wide-rep rep-component-type :> wide-type
+    a a-rep >rep-array 2 <groups> :> a'
+    b b-rep >rep-array 2 <groups> :> b'
+    a' b' [
+        [ [ first  ] bi@ * ]
+        [ [ second ] bi@ * ] 2bi +
+        wide-type c-type-clamp
+    ] wide-rep <rep-array> 2map-as underlying>> ;
 : (simd-v/)                ( a b rep -- c ) [ / ] components-2map ;
+: (simd-vavg)              ( a b rep -- c )
+    [ + dup integer? [ 1 + -1 shift ] [ 0.5 * ] if ] components-2map ;
 : (simd-vmin)              ( a b rep -- c ) [ min ] components-2map ;
 : (simd-vmax)              ( a b rep -- c ) [ max ] components-2map ;
 : (simd-v.)                ( a b rep -- n )
    [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
    1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
 : (simd-vsqrt)             ( a   rep -- c ) [ fsqrt ] components-map ;
+: (simd-vsad)              ( a b rep -- c ) 2>rep-array [ - abs ] [ + ] 2map-reduce ;
 : (simd-sum)               ( a   rep -- n ) [ + ] components-reduce ;
 : (simd-vabs)              ( a   rep -- c ) [ abs ] components-map ;
 : (simd-vbitand)           ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@ -69,6 +69,8 @@ CONSTANT: vector-words
        { v* { +vector+ +vector+ -> +vector+ } }
        { vs* { +vector+ +vector+ -> +vector+ } }
        { v*n { +vector+ +scalar+ -> +vector+ } }
+        { v*high { +vector+ +vector+ -> +vector+ } }
+        { v*hs+ { +vector+ +vector+ -> +vector+ } }
        { v+ { +vector+ +vector+ -> +vector+ } }
        { vs+ { +vector+ +vector+ -> +vector+ } }
        { v+- { +vector+ +vector+ -> +vector+ } }
@ -78,12 +80,14 @@ CONSTANT: vector-words
        { vs- { +vector+ +vector+ -> +vector+ } }
        { v-n { +vector+ +scalar+ -> +vector+ } }
        { v. { +vector+ +vector+ -> +scalar+ } }
+        { vsad { +vector+ +vector+ -> +scalar+ } }
        { v/ { +vector+ +vector+ -> +vector+ } }
        { v/n { +vector+ +scalar+ -> +vector+ } }
        { vceiling { +vector+ -> +vector+ } }
        { vfloor { +vector+ -> +vector+ } }
        { vmax { +vector+ +vector+ -> +vector+ } }
        { vmin { +vector+ +vector+ -> +vector+ } }
+        { vavg { +vector+ +vector+ -> +vector+ } }
        { vneg { +vector+ -> +vector+ } }
        { vtruncate { +vector+ -> +vector+ } }
        { sum { +vector+ -> +scalar+ } }
@ -197,7 +201,7 @@ CONSTANT: vector-words
    { vsqrt n/v v/n v/ normalize } unique assoc-diff ;

 : remove-integer-words ( alist -- alist' )
-    { vlshift vrshift } unique assoc-diff ;
+    { vlshift vrshift v*high v*hs+ } unique assoc-diff ;

 : boolean-ops ( -- words )
    { vand vandn vor vxor vnot } ;
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@ -9,6 +9,8 @@ IN: math.vectors.simd

 ERROR: bad-simd-length got expected ;

+ERROR: bad-simd-vector obj ;
+
 <<
 <PRIVATE
 ! Primitive SIMD constructors
@ -48,6 +50,7 @@ TUPLE: simd-128

 GENERIC: simd-element-type ( obj -- c-type )
 GENERIC: simd-rep ( simd -- rep )
+GENERIC: simd-with ( n exemplar -- v )

 M: object simd-element-type drop f ;
 M: object simd-rep drop f ;
@ -99,6 +102,131 @@ PRIVATE>
 >>

 <<
+
+! SIMD vectors as sequences
+
+M: simd-128 hashcode* underlying>> hashcode* ; inline
+M: simd-128 clone [ clone ] change-underlying ; inline
+M: simd-128 c:byte-length drop 16 ; inline
+
+M: simd-128 new-sequence
+    2dup length =
+    [ nip [ 16 (byte-array) ] make-underlying ]
+    [ length bad-simd-length ] if ; inline
+
+M: simd-128 equal?
+    dup simd-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
+
+! SIMD primitive operations
+
+M: simd-128 v+
+    dup simd-rep [ (simd-v+)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v-
+    dup simd-rep [ (simd-v-)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vneg
+    dup simd-rep [ (simd-vneg)              ] [ call-next-method ] v->v-op  ; inline
+M: simd-128 v+-
+    dup simd-rep [ (simd-v+-)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs+
+    dup simd-rep [ (simd-vs+)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs-
+    dup simd-rep [ (simd-vs-)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs*
+    dup simd-rep [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v*
+    dup simd-rep [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v*high
+    dup simd-rep [ (simd-v*high)            ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v/
+    dup simd-rep [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vavg
+    dup simd-rep [ (simd-vavg)              ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vmin
+    dup simd-rep [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vmax
+    dup simd-rep [ (simd-vmax)              ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v.
+    dup simd-rep [ (simd-v.)                ] [ call-next-method ] vv->n-op ; inline
+M: simd-128 vsad
+    dup simd-rep [ (simd-vsad)              ] [ call-next-method ] vv->n-op ; inline
+M: simd-128 vsqrt
+    dup simd-rep [ (simd-vsqrt)             ] [ call-next-method ] v->v-op  ; inline
+M: simd-128 sum
+    dup simd-rep [ (simd-sum)               ] [ call-next-method ] v->n-op  ; inline
+M: simd-128 vabs
+    dup simd-rep [ (simd-vabs)              ] [ call-next-method ] v->v-op  ; inline
+M: simd-128 vbitand
+    dup simd-rep [ (simd-vbitand)           ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitandn
+    dup simd-rep [ (simd-vbitandn)          ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitor
+    dup simd-rep [ (simd-vbitor)            ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitxor
+    dup simd-rep [ (simd-vbitxor)           ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitnot
+    dup simd-rep [ (simd-vbitnot)           ] [ call-next-method ] v->v-op  ; inline
+M: simd-128 vand
+    dup simd-rep [ (simd-vand)              ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vandn
+    dup simd-rep [ (simd-vandn)             ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vor
+    dup simd-rep [ (simd-vor)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vxor
+    dup simd-rep [ (simd-vxor)              ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vnot
+    dup simd-rep [ (simd-vnot)              ] [ call-next-method ] v->v-op  ; inline
+M: simd-128 vlshift
+    over simd-rep [ (simd-vlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vrshift
+    over simd-rep [ (simd-vrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 hlshift
+    over simd-rep [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 hrshift
+    over simd-rep [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vshuffle-elements
+    over simd-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vshuffle-bytes
+    dup simd-rep [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv'->v-op ; inline
+M: simd-128 (vmerge-head)
+    dup simd-rep [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 (vmerge-tail)
+    dup simd-rep [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v<=
+    dup simd-rep [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v<
+    dup simd-rep [ (simd-v<)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v=
+    dup simd-rep [ (simd-v=)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v>
+    dup simd-rep [ (simd-v>)                ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v>=
+    dup simd-rep [ (simd-v>=)               ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vunordered?
+    dup simd-rep [ (simd-vunordered?)       ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vany?
+    dup simd-rep [ (simd-vany?)             ] [ call-next-method ] v->n-op  ; inline
+M: simd-128 vall?
+    dup simd-rep [ (simd-vall?)             ] [ call-next-method ] v->n-op  ; inline
+M: simd-128 vnone?
+    dup simd-rep [ (simd-vnone?)            ] [ call-next-method ] v->n-op  ; inline
+
+! SIMD high-level specializations
+
+M: simd-128 vbroadcast swap [ nth ] [ simd-with ] bi ; inline
+M: simd-128 n+v [ simd-with ] keep v+ ; inline
+M: simd-128 n-v [ simd-with ] keep v- ; inline
+M: simd-128 n*v [ simd-with ] keep v* ; inline
+M: simd-128 n/v [ simd-with ] keep v/ ; inline
+M: simd-128 v+n over simd-with v+ ; inline
+M: simd-128 v-n over simd-with v- ; inline
+M: simd-128 v*n over simd-with v* ; inline
+M: simd-128 v/n over simd-with v/ ; inline
+M: simd-128 norm-sq dup v. assert-positive ; inline
+M: simd-128 distance v- norm ; inline
+
+M: simd-128 >pprint-sequence ;
+M: simd-128 pprint* pprint-object ;
+
 <PRIVATE

 ! SIMD concrete type functor
@ -128,7 +256,10 @@ TUPLE: A < simd-128 ;
 M: A new-underlying    drop \ A boa ; inline
 M: A simd-rep          drop A-rep ; inline
 M: A simd-element-type drop ELT ; inline
+M: A simd-with         drop A-with ; inline

+M: A nth-unsafe
+    swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
 M: A set-nth-unsafe
    [ ELT boolean>element ] 2dip
    underlying>> SET-NTH call ; inline
@ -140,84 +271,7 @@ M: A like drop dup \ A instance? [ >A ] unless ; inline
 : A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
 : A-cast ( v -- v' ) underlying>> \ A boa ; inline

-! SIMD vectors as sequences
-
-M: A hashcode* underlying>> hashcode* ; inline
-M: A clone [ clone ] change-underlying ; inline
 M: A length drop N ; inline
-M: A nth-unsafe
-    swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
-M: A c:byte-length drop 16 ; inline
-
-M: A new-sequence
-    2dup length =
-    [ nip [ 16 (byte-array) ] make-underlying ]
-    [ length bad-simd-length ] if ; inline
-
-M: A equal?
-    \ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
-
-! SIMD primitive operations
-
-M: A v+                \ A-rep [ (simd-v+)                ] [ call-next-method ] vv->v-op ; inline
-M: A v-                \ A-rep [ (simd-v-)                ] [ call-next-method ] vv->v-op ; inline
-M: A vneg              \ A-rep [ (simd-vneg)              ] [ call-next-method ] v->v-op  ; inline
-M: A v+-               \ A-rep [ (simd-v+-)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs+               \ A-rep [ (simd-vs+)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs-               \ A-rep [ (simd-vs-)               ] [ call-next-method ] vv->v-op ; inline
-M: A vs*               \ A-rep [ (simd-vs*)               ] [ call-next-method ] vv->v-op ; inline
-M: A v*                \ A-rep [ (simd-v*)                ] [ call-next-method ] vv->v-op ; inline
-M: A v/                \ A-rep [ (simd-v/)                ] [ call-next-method ] vv->v-op ; inline
-M: A vmin              \ A-rep [ (simd-vmin)              ] [ call-next-method ] vv->v-op ; inline
-M: A vmax              \ A-rep [ (simd-vmax)              ] [ call-next-method ] vv->v-op ; inline
-M: A v.                \ A-rep [ (simd-v.)                ] [ call-next-method ] vv->n-op ; inline
-M: A vsqrt             \ A-rep [ (simd-vsqrt)             ] [ call-next-method ] v->v-op  ; inline
-M: A sum               \ A-rep [ (simd-sum)               ] [ call-next-method ] v->n-op  ; inline
-M: A vabs              \ A-rep [ (simd-vabs)              ] [ call-next-method ] v->v-op  ; inline
-M: A vbitand           \ A-rep [ (simd-vbitand)           ] [ call-next-method ] vv->v-op ; inline
-M: A vbitandn          \ A-rep [ (simd-vbitandn)          ] [ call-next-method ] vv->v-op ; inline
-M: A vbitor            \ A-rep [ (simd-vbitor)            ] [ call-next-method ] vv->v-op ; inline
-M: A vbitxor           \ A-rep [ (simd-vbitxor)           ] [ call-next-method ] vv->v-op ; inline
-M: A vbitnot           \ A-rep [ (simd-vbitnot)           ] [ call-next-method ] v->v-op  ; inline
-M: A vand              \ A-rep [ (simd-vand)              ] [ call-next-method ] vv->v-op ; inline
-M: A vandn             \ A-rep [ (simd-vandn)             ] [ call-next-method ] vv->v-op ; inline
-M: A vor               \ A-rep [ (simd-vor)               ] [ call-next-method ] vv->v-op ; inline
-M: A vxor              \ A-rep [ (simd-vxor)              ] [ call-next-method ] vv->v-op ; inline
-M: A vnot              \ A-rep [ (simd-vnot)              ] [ call-next-method ] v->v-op  ; inline
-M: A vlshift           \ A-rep [ (simd-vlshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A vrshift           \ A-rep [ (simd-vrshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A hlshift           \ A-rep [ (simd-hlshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A hrshift           \ A-rep [ (simd-hrshift)           ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-bytes    \ A-rep [ (simd-vshuffle-bytes)    ] [ call-next-method ] vv'->v-op ; inline
-M: A (vmerge-head)     \ A-rep [ (simd-vmerge-head)       ] [ call-next-method ] vv->v-op ; inline
-M: A (vmerge-tail)     \ A-rep [ (simd-vmerge-tail)       ] [ call-next-method ] vv->v-op ; inline
-M: A v<=               \ A-rep [ (simd-v<=)               ] [ call-next-method ] vv->v-op ; inline
-M: A v<                \ A-rep [ (simd-v<)                ] [ call-next-method ] vv->v-op ; inline
-M: A v=                \ A-rep [ (simd-v=)                ] [ call-next-method ] vv->v-op ; inline
-M: A v>                \ A-rep [ (simd-v>)                ] [ call-next-method ] vv->v-op ; inline
-M: A v>=               \ A-rep [ (simd-v>=)               ] [ call-next-method ] vv->v-op ; inline
-M: A vunordered?       \ A-rep [ (simd-vunordered?)       ] [ call-next-method ] vv->v-op ; inline
-M: A vany?             \ A-rep [ (simd-vany?)             ] [ call-next-method ] v->n-op  ; inline
-M: A vall?             \ A-rep [ (simd-vall?)             ] [ call-next-method ] v->n-op  ; inline
-M: A vnone?            \ A-rep [ (simd-vnone?)            ] [ call-next-method ] v->n-op  ; inline
-
-! SIMD high-level specializations
-
-M: A vbroadcast swap nth A-with ; inline
-M: A n+v [ A-with ] dip v+ ; inline
-M: A n-v [ A-with ] dip v- ; inline
-M: A n*v [ A-with ] dip v* ; inline
-M: A n/v [ A-with ] dip v/ ; inline
-M: A v+n A-with v+ ; inline
-M: A v-n A-with v- ; inline
-M: A v*n A-with v* ; inline
-M: A v/n A-with v/ ; inline
-M: A norm-sq dup v. assert-positive ; inline
-M: A distance v- norm ; inline
-
-M: A >pprint-sequence ;
-M: A pprint* pprint-object ;

 \ A-boa
 [ COERCER N napply ] N {
@ -230,11 +284,16 @@ BOA-EFFECT define-inline
 M: A pprint-delims drop \ A{ \ } ;
 SYNTAX: A{ \ } [ >A ] parse-literal ;

+INSTANCE: A sequence
+
 c:<c-type>
    byte-array >>class
    A >>boxed-class
    { A-rep alien-vector A boa } >quotation >>getter
-    { [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter
+    {
+        [ dup simd-128? [ bad-simd-vector ] unless underlying>> ] 2dip
+        A-rep set-alien-vector
+    } >quotation >>setter
    16 >>size
    16 >>align
    A-rep >>rep
@ -249,8 +308,6 @@ PRIVATE>

 >>

-INSTANCE: simd-128 sequence
-
 ! SIMD instances

 SIMD-128: char-16
@ -269,6 +326,19 @@ SIMD-128: double-2
 M: simd-128 vshuffle ( u perm -- v )
    vshuffle-bytes ; inline

+M: uchar-16 v*hs+
+    uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
+M: ushort-8 v*hs+
+    ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
+M: uint-4 v*hs+
+    uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
+M: char-16 v*hs+
+    char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
+M: short-8 v*hs+
+    short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
+M: int-4 v*hs+
+    int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
+
 "mirrors" vocab [
    "math.vectors.simd.mirrors" require
 ] when
--- a/basis/math/vectors/vectors-docs.factor
+++ b/basis/math/vectors/vectors-docs.factor
@ -125,8 +125,6 @@ ARTICLE: "math-vectors-simd-logic" "Componentwise logic with SIMD vectors"
 "Processor SIMD units supported by the " { $vocab-link "math.vectors.simd" } " vocabulary represent boolean values as bitmasks, where a true result's binary representation is all ones and a false representation is all zeroes. This is the format in which results from comparison words such as " { $link v= } " return their results and in which logic and test words such as " { $link vand } " and " { $link vall? } " take their inputs when working with SIMD types. For a float vector, false will manifest itself as " { $snippet "0.0" } " and true as a " { $link POSTPONE: NAN: } " literal with a string of set bits in its payload:"
 { $example
 """USING: math.vectors math.vectors.simd prettyprint ;
-FROM: alien.c-types => float ;
-SIMD: float

 float-4{ 1.0 2.0 3.0 0/0. } float-4{ 1.0 -2.0 3.0 0/0. } v= ."""
 """float-4{ NAN: fffffe0000000 0.0 NAN: fffffe0000000 0.0 }"""
@ -134,8 +132,6 @@ float-4{ 1.0 2.0 3.0 0/0. } float-4{ 1.0 -2.0 3.0 0/0. } v= ."""
 "For an integer vector, false will manifest as " { $snippet "0" } " and true as " { $snippet "-1" } " (for signed vectors) or the largest representable value of the element type (for unsigned vectors):"
 { $example
 """USING: math.vectors math.vectors.simd prettyprint alien.c-types ;
-SIMD: int
-SIMD: uchar

 int-4{ 1 2 3 0 } int-4{ 1 -2 3 4 } v=
 uchar-16{  0  1  2  3  4  5 6 7 8 9 10 11 12 13 14 15 }
@ -147,7 +143,6 @@ uchar-16{ 255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0 }"""
 "This differs from Factor's native representation of boolean values, where " { $link f } " is false and every other value (including " { $snippet "0" } " and " { $snippet "0.0" } ") is true. To make it easy to construct literal SIMD masks, " { $link t } " and " { $link f } " are accepted inside SIMD literal syntax and expand to the proper true or false representation for the underlying type:"
 { $example
 """USING: math.vectors math.vectors.simd prettyprint alien.c-types ;
-SIMD: int

 int-4{ f f t f } ."""
 """int-4{ 0 0 -1 0 }""" }
@ -216,36 +211,36 @@ HELP: vtruncate
 { $description "Truncates each element of " { $snippet "u" } "." } ;

 HELP: n+v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
 { $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;

 HELP: v+n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
 { $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;

 HELP: n-v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
 { $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ;

 HELP: v-n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
 { $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ;

 HELP: n*v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
 { $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;

 HELP: v*n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
 { $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;

 HELP: n/v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
 { $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." }
 { $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;

 HELP: v/n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
 { $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." }
 { $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;

@ -259,7 +254,7 @@ HELP: v-

 HELP: v+-
 { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
-{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
+{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise. Elements at even indexes are subtracted, while elements at odd indexes are added." }
 { $examples
    { $example
        "USING: math.vectors prettyprint ;"
@ -413,7 +408,6 @@ HELP: vbroadcast
 { $examples
    { $example
        "USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
-        "SIMD: int"
        "int-4{ 69 42 911 13 } 2 vbroadcast ."
        "int-4{ 911 911 911 911 }"
    }
@ -429,7 +423,6 @@ HELP: vshuffle
 { $examples
    { $example
        "USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
-        "SIMD: int"
        "int-4{ 69 42 911 13 } { 1 3 2 3 } vshuffle ."
        "int-4{ 42 13 911 13 }"
    }
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2005, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: arrays alien.c-types assocs kernel sequences math math.functions
-hints math.order math.libm math.floats.private fry combinators
+grouping hints math.order math.libm math.floats.private fry combinators
 byte-arrays accessors locals ;
 QUALIFIED-WITH: alien.c-types c
 IN: math.vectors
@ -9,7 +9,7 @@ IN: math.vectors
 GENERIC: vneg ( u -- v )
 M: object vneg [ neg ] map ;

-GENERIC# v+n 1 ( u n -- v )
+GENERIC# v+n 1 ( u n -- w )
 M: object v+n [ + ] curry map ;

 GENERIC: n+v ( n v -- w )
@ -21,13 +21,13 @@ M: object v-n [ - ] curry map ;
 GENERIC: n-v ( n v -- w )
 M: object n-v [ - ] with map ;

-GENERIC# v*n 1 ( u n -- v )
+GENERIC# v*n 1 ( u n -- w )
 M: object v*n [ * ] curry map ;

 GENERIC: n*v ( n v -- w )
 M: object n*v [ * ] with map ;

-GENERIC# v/n 1 ( u n -- v )
+GENERIC# v/n 1 ( u n -- w )
 M: object v/n [ / ] curry map ;

 GENERIC: n/v ( n v -- w )
@ -45,6 +45,16 @@ M: object [v-] [ [-] ] 2map ;
 GENERIC: v* ( u v -- w )
 M: object v* [ * ] 2map ;

+GENERIC: v*high ( u v -- w )
+
+<PRIVATE
+: (h+) ( u -- w ) 2 <groups> [ first2 + ] map ;
+: (h-) ( u -- w ) 2 <groups> [ first2 - ] map ;
+PRIVATE>
+
+GENERIC: v*hs+ ( u v -- w )
+M: object v*hs+ [ * ] 2map (h+) ;
+
 GENERIC: v/ ( u v -- w )
 M: object v/ [ / ] 2map ;

@ -55,6 +65,9 @@ M: object v/ [ / ] 2map ;

 PRIVATE>

+GENERIC: vavg ( u v -- w )
+M: object vavg [ + 2 / ] 2map ;
+
 GENERIC: vmax ( u v -- w )
 M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ;

@ -82,6 +95,9 @@ M: object vabs [ abs ] map ;
 GENERIC: vsqrt ( u -- v )
 M: object vsqrt [ >float fsqrt ] map ;

+GENERIC: vsad ( u v -- n )
+M: object vsad [ - abs ] [ + ] 2map-reduce ;
+
 <PRIVATE

 : bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
--- a/basis/specialized-arrays/specialized-arrays-tests.factor
+++ b/basis/specialized-arrays/specialized-arrays-tests.factor
@ -10,8 +10,6 @@ FROM: alien.c-types => float ;
 SPECIALIZED-ARRAY: int
 SPECIALIZED-ARRAYS: bool ushort char uint float ulonglong ;

-[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
-
 [ t ] [ { 1 2 3 } >int-array int-array? ] unit-test

 [ t ] [ int-array{ 1 2 3 } int-array? ] unit-test
--- a/basis/specialized-arrays/specialized-arrays.factor
+++ b/basis/specialized-arrays/specialized-arrays.factor
@ -104,6 +104,12 @@ SYNTAX: A@ scan-object scan-object <direct-A> suffix! ;

 INSTANCE: A specialized-array

+M: A vs+ [ + \ T c-type-clamp ] 2map ;
+M: A vs- [ - \ T c-type-clamp ] 2map ;
+M: A vs* [ * \ T c-type-clamp ] 2map ;
+
+M: A v*high [ * \ T heap-size neg shift ] 2map ;
+
 ;FUNCTOR

 GENERIC: (underlying-type) ( c-type -- c-type' )
--- a/basis/tools/deploy/shaker/shaker.factor
+++ b/basis/tools/deploy/shaker/shaker.factor
@ -180,7 +180,6 @@ IN: tools.deploy.shaker
                "slots"
                "special"
                "specializer"
-                "specializations"
                "struct-slots"
                ! UI needs this
                ! "superclass"
--- a/vm/contexts.hpp
+++ b/vm/contexts.hpp
@ -55,11 +55,30 @@ struct context {
 #define rs_bot (ctx->retainstack_region->start)
 #define rs_top (ctx->retainstack_region->end)

-DEFPUSHPOP(d,ds)
-DEFPUSHPOP(r,rs)
+inline cell dpeek()
+{
+	return *(cell *)ds;
+}
+
+inline void drepl(cell tagged)
+{
+	*(cell *)ds = tagged;
+}
+
+inline cell dpop()
+{
+	cell value = dpeek();
+	ds -= sizeof(cell);
+	return value;
+}
+
+inline void dpush(cell tagged)
+{
+	ds += sizeof(cell);
+	drepl(tagged);
+}

 VM_C_API void nest_stacks(stack_frame *magic_frame, factor_vm *vm);
 VM_C_API void unnest_stacks(factor_vm *vm);

 }
-
--- a/vm/data_heap.cpp
+++ b/vm/data_heap.cpp
@ -98,11 +98,16 @@ void data_heap::reset_generation(tenured_space *gen)
 	clear_decks(gen);
 }

-bool data_heap::low_memory_p()
+bool data_heap::high_fragmentation_p()
 {
 	return (tenured->largest_free_block() <= nursery->size + aging->size);
 }

+bool data_heap::low_memory_p()
+{
+	return (tenured->free_space() <= nursery->size + aging->size);
+}
+
 void data_heap::mark_all_cards()
 {
 	memset(cards,-1,cards_end - cards);
--- a/vm/data_heap.hpp
+++ b/vm/data_heap.hpp
@ -29,6 +29,7 @@ struct data_heap {
 	void reset_generation(nursery_space *gen);
 	void reset_generation(aging_space *gen);
 	void reset_generation(tenured_space *gen);
+	bool high_fragmentation_p();
 	bool low_memory_p();
 	void mark_all_cards();
 };
--- a/vm/full_collector.cpp
+++ b/vm/full_collector.cpp
@ -143,12 +143,20 @@ void factor_vm::collect_full(bool trace_contexts_p)
 {
 	collect_mark_impl(trace_contexts_p);
 	collect_sweep_impl();
+
 	if(data->low_memory_p())
+	{
+		current_gc->op = collect_growing_heap_op;
+		current_gc->event->op = collect_growing_heap_op;
+		collect_growing_heap(0,trace_contexts_p);
+	}
+	else if(data->high_fragmentation_p())
 	{
 		current_gc->op = collect_compact_op;
 		current_gc->event->op = collect_compact_op;
 		collect_compact_impl(trace_contexts_p);
 	}
+
 	code->flush_icache();
 }

--- a/vm/gc.cpp
+++ b/vm/gc.cpp
@ -152,7 +152,7 @@ void factor_vm::gc(gc_op op, cell requested_bytes, bool trace_contexts_p)
 		break;
 	case collect_aging_op:
 		collect_aging();
-		if(data->low_memory_p())
+		if(data->high_fragmentation_p())
 		{
 			current_gc->op = collect_full_op;
 			current_gc->event->op = collect_full_op;
@ -161,7 +161,7 @@ void factor_vm::gc(gc_op op, cell requested_bytes, bool trace_contexts_p)
 		break;
 	case collect_to_tenured_op:
 		collect_to_tenured();
-		if(data->low_memory_p())
+		if(data->high_fragmentation_p())
 		{
 			current_gc->op = collect_full_op;
 			current_gc->event->op = collect_full_op;
--- a/vm/master.hpp
+++ b/vm/master.hpp
@ -40,7 +40,6 @@ namespace factor
 #include "layouts.hpp"
 #include "platform.hpp"
 #include "primitives.hpp"
-#include "stacks.hpp"
 #include "segments.hpp"
 #include "contexts.hpp"
 #include "run.hpp"
--- a/vm/stacks.hpp
+++ b/vm/stacks.hpp
@ -1,19 +0,0 @@
-namespace factor
-{
-
-#define DEFPUSHPOP(prefix,ptr) \
-	inline cell prefix##peek() { return *(cell *)ptr; } \
-	inline void prefix##repl(cell tagged) { *(cell *)ptr = tagged; } \
-	inline cell prefix##pop() \
-	{ \
-		cell value = prefix##peek(); \
-		ptr -= sizeof(cell); \
-		return value; \
-	} \
-	inline void prefix##push(cell tagged) \
-	{ \
-		ptr += sizeof(cell); \
-		prefix##repl(tagged); \
-	}
-
-}